def get_nodes_func(tf_config: BaseTerraformConfig, infraenv_config: InfraEnvConfig): if "nodes" in nodes_data: return nodes_data["nodes"] nodes_data["configs"] = infraenv_config, tf_config net_asset = LibvirtNetworkAssets() tf_config.net_asset = net_asset.get() nodes_data["net_asset"] = net_asset controller = TerraformController(tf_config, entity_config=infraenv_config) nodes = Nodes(controller) nodes_data["nodes"] = nodes nodes.prepare_nodes() interfaces = self.nat_interfaces(tf_config) nat = NatController( interfaces, NatController.get_namespace_index(interfaces[0])) nat.add_nat_rules() nodes_data["nat"] = nat return nodes
def kube_api_test_prepare_late_binding_infraenv( self, kube_api_context: KubeAPIContext, nodes: Nodes, infraenv_config: InfraEnvConfig ): api_client = kube_api_context.api_client spoke_namespace = kube_api_context.spoke_namespace infraenv_name = infraenv_config.entity_name.get() infraenv_config.iso_download_path = utils.get_iso_download_path(infraenv_name) nodes.prepare_nodes() spoke_namespace = spoke_namespace secret = Secret(api_client, f"{infraenv_name}-secret", spoke_namespace) secret.create(pull_secret=infraenv_config.pull_secret) ignition_config_override = None infra_env = InfraEnv(api_client, f"{infraenv_name}-infra-env", spoke_namespace) infra_env.create( cluster_deployment=None, ignition_config_override=ignition_config_override, secret=secret, proxy=None, ssh_pub_key=infraenv_config.ssh_public_key, ) agents = self.start_nodes(nodes, infra_env, infraenv_config, infraenv_config.is_static_ip) log.info("Waiting for agent status verification") Agent.wait_for_agents_to_be_ready_for_install(agents) return infra_env
def prepare_infraenv_nodes( self, infraenv_nodes: Nodes, infra_env_configuration: InfraEnvConfig) -> Nodes: try: yield infraenv_nodes finally: if global_variables.test_teardown: log.info("--- TEARDOWN --- node controller\n") infraenv_nodes.destroy_all_nodes() log.info( f"--- TEARDOWN --- deleting iso file from: {infra_env_configuration.iso_download_path}\n" ) utils.run_command( f"rm -f {infra_env_configuration.iso_download_path}", shell=True)
def prepare_nodes(self, nodes: Nodes, cluster_configuration: ClusterConfig) -> Nodes: try: nodes.prepare_nodes() yield nodes finally: if global_variables.test_teardown: log.info("--- TEARDOWN --- node controller\n") nodes.destroy_all_nodes() log.info( f"--- TEARDOWN --- deleting iso file from: {cluster_configuration.iso_download_path}\n" ) utils.run_command( f"rm -f {cluster_configuration.iso_download_path}", shell=True)
def kube_api_test_prepare_late_binding_infraenv( kube_api_context, nodes: Nodes, infraenv_config: InfraEnvConfig, *, is_ipv4=True): infraenv_name = infraenv_config.entity_name.get() secret = Secret( kube_api_client=kube_api_context.api_client, name=f"{infraenv_name}-secret", namespace=global_variables.spoke_namespace, ) secret.create(pull_secret=infraenv_config.pull_secret) ignition_config_override = None infra_env = InfraEnv( kube_api_client=kube_api_context.api_client, name=f"{infraenv_name}-infra-env", namespace=global_variables.spoke_namespace, ) infra_env.create( cluster_deployment=None, ignition_config_override=ignition_config_override, secret=secret, proxy=None, ssh_pub_key=infraenv_config.ssh_public_key, ) infra_env.status() download_iso_from_infra_env(infra_env, infraenv_config.iso_download_path) log.info("iso downloaded, starting nodes") nodes.start_all() log.info("waiting for host agent") agents = infra_env.wait_for_agents(len(nodes)) for agent in agents: agent.approve() set_agent_hostname(nodes[0], agent, is_ipv4) # Currently only supports single node log.info("Waiting for agent status verification") Agent.wait_for_agents_to_be_ready_for_install(agents) return infra_env
def start_nodes(cls, nodes: Nodes, infra_env: InfraEnv, entity_config: BaseEntityConfig) -> List[Agent]: infra_env.status() # wait until install-env will have status (i.e until resource will be processed). cls.download_iso_from_infra_env(infra_env, entity_config.iso_download_path) log.info("iso downloaded, starting nodes") nodes.controller.log_configuration() log.info(f"Entity configuration {entity_config}") nodes.start_all(check_ips=not (entity_config.is_static_ip and entity_config.is_ipv6)) log.info("waiting for host agent") agents = infra_env.wait_for_agents(len(nodes)) node_list = nodes.controller.list_nodes() for agent in agents: agent.approve() cls._set_host_name_from_node(node_list, agent, entity_config.is_ipv4) return agents
def _collect_virsh_logs(cls, nodes: Nodes, log_dir_name): log.info("Collecting virsh logs\n") os.makedirs(log_dir_name, exist_ok=True) virsh_log_path = os.path.join(log_dir_name, "libvirt_logs") os.makedirs(virsh_log_path, exist_ok=False) libvirt_list_path = os.path.join(virsh_log_path, "virsh_list") utils.run_command(f"virsh list --all >> {libvirt_list_path}", shell=True) libvirt_net_list_path = os.path.join(virsh_log_path, "virsh_net_list") utils.run_command(f"virsh net-list --all >> {libvirt_net_list_path}", shell=True) network_name = nodes.get_cluster_network() virsh_leases_path = os.path.join(virsh_log_path, "net_dhcp_leases") utils.run_command( f"virsh net-dhcp-leases {network_name} >> {virsh_leases_path}", shell=True) messages_log_path = os.path.join(virsh_log_path, "messages.log") try: shutil.copy("/var/log/messages", messages_log_path) except FileNotFoundError: log.warning( "Failed to copy /var/log/messages, file does not exist") qemu_libvirt_path = os.path.join(virsh_log_path, "qemu_libvirt_logs") os.makedirs(qemu_libvirt_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}.log", f"{qemu_libvirt_path}/{node.name}-qemu.log") except FileNotFoundError: log.warning( f"Failed to copy {node.name} qemu log, file does not exist" ) console_log_path = os.path.join(virsh_log_path, "console_logs") os.makedirs(console_log_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}-console.log", f"{console_log_path}/{node.name}-console.log") except FileNotFoundError: log.warning( f"Failed to copy {node.name} console log, file does not exist" ) libvird_log_path = os.path.join(virsh_log_path, "libvirtd_journal") utils.run_command( f'journalctl --since "{nodes.setup_time}" ' f"-u libvirtd -D /run/log/journal >> {libvird_log_path}", shell=True, )
def infraenv_nodes(self, infraenv_controller: NodeController) -> Nodes: return Nodes(infraenv_controller)
def nodes(self, controller: NodeController) -> Nodes: return Nodes(controller)
def kube_api_test( self, kube_api_context: KubeAPIContext, nodes: Nodes, cluster_config: ClusterConfig, prepared_controller_configuration: BaseNodeConfig, infra_env_configuration: BaseInfraEnvConfig, proxy_server: Optional[Callable] = None, *, is_disconnected: bool = False, ): cluster_name = cluster_config.cluster_name.get() api_client = kube_api_context.api_client spoke_namespace = kube_api_context.spoke_namespace # TODO resolve it from the service if the node controller doesn't have this information # (please see cluster.get_primary_machine_cidr()) agent_cluster_install = AgentClusterInstall( api_client, f"{cluster_name}-agent-cluster-install", spoke_namespace ) secret = Secret(api_client, f"{cluster_name}-secret", spoke_namespace) secret.create(pull_secret=cluster_config.pull_secret) cluster_deployment = ClusterDeployment(api_client, cluster_name, spoke_namespace) cluster_deployment.create(agent_cluster_install_ref=agent_cluster_install.ref, secret=secret) proxy = self.setup_proxy(nodes, cluster_config, proxy_server) if is_disconnected: log.info("getting ignition and install config override for disconnected install") ca_bundle = self.get_ca_bundle_from_hub(spoke_namespace) self.patch_install_config_with_ca_bundle(cluster_deployment, ca_bundle) ignition_config_override = self.get_ignition_config_override(ca_bundle) else: ignition_config_override = None infra_env = InfraEnv(api_client, f"{cluster_name}-infra-env", spoke_namespace) infraenv = infra_env.create( cluster_deployment, secret, proxy, ignition_config_override, ssh_pub_key=cluster_config.ssh_public_key ) cluster_config.iso_download_path = utils.get_iso_download_path(infraenv.get("metadata", {}).get("name")) nodes.prepare_nodes() agent_cluster_install.create( cluster_deployment_ref=cluster_deployment.ref, image_set_ref=self.deploy_image_set(cluster_name, api_client), cluster_cidr=cluster_config.cluster_networks[0].cidr, host_prefix=cluster_config.cluster_networks[0].host_prefix, service_network=cluster_config.service_networks[0].cidr, ssh_pub_key=cluster_config.ssh_public_key, hyperthreading=cluster_config.hyperthreading, control_plane_agents=nodes.masters_count, worker_agents=nodes.workers_count, proxy=proxy.as_dict() if proxy else {}, ) agent_cluster_install.wait_to_be_ready(ready=False) if infra_env_configuration.is_static_ip: self.apply_static_network_config(kube_api_context, nodes, cluster_name) agents = self.start_nodes(nodes, infra_env, cluster_config, infra_env_configuration.is_static_ip) if len(nodes) == 1: # for single node set the cidr and take the actual ip from the host # the vips is the ip of the host self._set_agent_cluster_install_machine_cidr(agent_cluster_install, nodes) # wait till the ip is set for the node and read it from its inventory self.set_single_node_ip(cluster_deployment, nodes) api_vip = ingress_vip = get_ip_for_single_node(cluster_deployment, nodes.is_ipv4) else: # for multi node allocate 2 address at a safe distance from the beginning # of the available address block to allow enough addresses for workers access_vips = nodes.controller.get_ingress_and_api_vips() api_vip = access_vips["api_vip"] ingress_vip = access_vips["ingress_vip"] # patch the aci with the vips. The cidr will be derived from the range agent_cluster_install.set_api_vip(api_vip) agent_cluster_install.set_ingress_vip(ingress_vip) nodes.controller.set_dns(api_ip=api_vip, ingress_ip=ingress_vip) log.info("Waiting for install") self._wait_for_install(agent_cluster_install, agents)
def capi_test( self, kube_api_context: KubeAPIContext, nodes: Nodes, cluster_config: ClusterConfig, is_static_ip: bool, proxy_server: Optional[Callable] = None, *, is_disconnected: bool = False, ): cluster_name = cluster_config.cluster_name.get() api_client = kube_api_context.api_client spoke_namespace = kube_api_context.spoke_namespace cluster_config.iso_download_path = utils.get_iso_download_path(cluster_name) nodes.prepare_nodes() secret = Secret(api_client, f"{cluster_name}-secret", spoke_namespace) secret.create(pull_secret=cluster_config.pull_secret) if is_disconnected: log.info("getting igntion and install config override for disconected install") ca_bundle = self.get_ca_bundle_from_hub(spoke_namespace) ignition_config_override = self.get_ignition_config_override(ca_bundle) else: ignition_config_override = None proxy = self.setup_proxy(nodes, cluster_config, proxy_server) infra_env = InfraEnv(api_client, f"{cluster_name}-infra-env", spoke_namespace) infra_env.create( cluster_deployment=None, ignition_config_override=ignition_config_override, secret=secret, proxy=proxy, ssh_pub_key=cluster_config.ssh_public_key, ) self.start_nodes(nodes, infra_env, cluster_config, is_static_ip) hypershift = HyperShift(name=cluster_name, kube_api_client=api_client) with utils.pull_secret_file() as ps: with tempfile.NamedTemporaryFile(mode="w") as f: f.write(cluster_config.ssh_public_key) f.flush() ssh_public_key_file = f.name hypershift.create( pull_secret_file=ps, agent_namespace=spoke_namespace, provider_image=os.environ.get("PROVIDER_IMAGE", ""), hypershift_cpo_image=os.environ.get("HYPERSHIFT_IMAGE", ""), release_image=os.environ.get("OPENSHIFT_INSTALL_RELEASE_IMAGE", ""), ssh_key=ssh_public_key_file, ) hypershift.wait_for_control_plane_ready() # WORKAROUND for ovn on minikube secret = Secret(api_client, "ovn-master-metrics-cert", hypershift.namespace) secret.create_with_data(secret_data={"ca_cert": "dummy data, we only need this secret to exists"}) cluster_deployment = ClusterDeployment(api_client, cluster_name, f"clusters-{cluster_name}") def _cluster_deployment_installed() -> bool: return cluster_deployment.get().get("spec", {}).get("installed") waiting.wait( _cluster_deployment_installed, sleep_seconds=1, timeout_seconds=60, waiting_for="clusterDeployment to get created", expected_exceptions=Exception, ) hypershift.wait_for_control_plane_ready() self.set_node_count_and_wait_for_ready_nodes(cluster_deployment, hypershift, spoke_namespace, node_count=1) self.set_node_count_and_wait_for_ready_nodes(cluster_deployment, hypershift, spoke_namespace, node_count=2) self.scale_down_nodepool_and_wait_for_unbounded_agent( cluster_deployment, hypershift, spoke_namespace, node_count=1 )
def kube_api_test( kube_api_context, nodes: Nodes, cluster_config: ClusterConfig, proxy_server=None, *, is_ipv4=True, is_disconnected=False, ): cluster_name = cluster_config.cluster_name.get() # TODO resolve it from the service if the node controller doesn't have this information # (please see cluster.get_primary_machine_cidr()) machine_cidr = nodes.controller.get_primary_machine_cidr() agent_cluster_install = AgentClusterInstall( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-agent-cluster-install", namespace=global_variables.spoke_namespace, ) secret = Secret( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-secret", namespace=global_variables.spoke_namespace, ) secret.create(pull_secret=cluster_config.pull_secret) cluster_deployment = ClusterDeployment( kube_api_client=kube_api_context.api_client, name=cluster_name, namespace=global_variables.spoke_namespace, ) cluster_deployment.create( agent_cluster_install_ref=agent_cluster_install.ref, secret=secret, ) agent_cluster_install.create( cluster_deployment_ref=cluster_deployment.ref, image_set_ref=deploy_image_set(cluster_name, kube_api_context), cluster_cidr=cluster_config.cluster_networks[0].cidr, host_prefix=cluster_config.cluster_networks[0].host_prefix, service_network=cluster_config.service_networks[0].cidr, ssh_pub_key=cluster_config.ssh_public_key, hyperthreading=cluster_config.hyperthreading, control_plane_agents=nodes.controller.params.master_count, worker_agents=nodes.controller.params.worker_count, machine_cidr=machine_cidr, ) agent_cluster_install.wait_to_be_ready(False) if is_disconnected: log.info("getting igntion and install config override for disconected install") ca_bundle = get_ca_bundle_from_hub() patch_install_config_with_ca_bundle(cluster_deployment, ca_bundle) ignition_config_override = get_ignition_config_override(ca_bundle) else: ignition_config_override = None proxy = setup_proxy(cluster_config, machine_cidr, cluster_name, proxy_server) infra_env = InfraEnv( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-infra-env", namespace=global_variables.spoke_namespace, ) infra_env.create( cluster_deployment=cluster_deployment, ignition_config_override=ignition_config_override, secret=secret, proxy=proxy, ssh_pub_key=cluster_config.ssh_public_key, ) infra_env.status() download_iso_from_infra_env(infra_env, cluster_config.iso_download_path) log.info("iso downloaded, starting nodes") nodes.start_all() log.info("waiting for host agent") agents = cluster_deployment.wait_for_agents(len(nodes)) for agent in agents: agent.approve() set_agent_hostname(nodes[0], agent, is_ipv4) # Currently only supports single node if len(nodes) == 1: set_single_node_ip(cluster_deployment, nodes, is_ipv4) log.info("Waiting for agent status verification") Agent.wait_for_agents_to_install(agents) agent_cluster_install.wait_to_be_ready(True) log.info("waiting for agent-cluster-install to be in installing state") agent_cluster_install.wait_to_be_installing() try: log.info("installation started, waiting for completion") agent_cluster_install.wait_to_be_installed() log.info("installation completed successfully") except Exception: log.exception("Failure during kube-api installation flow:") collect_debug_info_from_cluster(cluster_deployment, agent_cluster_install)
def capi_test( kube_api_context, nodes: Nodes, cluster_config: ClusterConfig, proxy_server=None, *, is_ipv4=True, is_disconnected=False, ): cluster_name = cluster_config.cluster_name.get() cluster_config # TODO resolve it from the service if the node controller doesn't have this information # (please see cluster.get_primary_machine_cidr()) machine_cidr = nodes.controller.get_primary_machine_cidr() secret = Secret( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-secret", namespace=global_variables.spoke_namespace, ) secret.create(pull_secret=cluster_config.pull_secret) if is_disconnected: log.info("getting igntion and install config override for disconected install") ca_bundle = get_ca_bundle_from_hub() ignition_config_override = get_ignition_config_override(ca_bundle) else: ignition_config_override = None proxy = setup_proxy(cluster_config, machine_cidr, cluster_name, proxy_server) infra_env = InfraEnv( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-infra-env", namespace=global_variables.spoke_namespace, ) infra_env.create( cluster_deployment=None, ignition_config_override=ignition_config_override, secret=secret, proxy=proxy, ssh_pub_key=cluster_config.ssh_public_key, ) infra_env.status() download_iso_from_infra_env(infra_env, cluster_config.iso_download_path) log.info("iso downloaded, starting nodes") nodes.start_all() log.info("waiting for host agent") agents = infra_env.wait_for_agents(len(nodes)) for agent in agents: agent.approve() set_agent_hostname(nodes[0], agent, is_ipv4) hypershift = HyperShift(name=cluster_name) with utils.pull_secret_file() as ps: with tempfile.NamedTemporaryFile(mode="w") as f: f.write(cluster_config.ssh_public_key) f.flush() ssh_public_key_file = f.name hypershift.create(pull_secret_file=ps, ssh_key=ssh_public_key_file) cluster_deployment = ClusterDeployment( kube_api_client=kube_api_context.api_client, name=cluster_name, namespace=f"clusters-{cluster_name}" ) def _cluster_deployment_installed() -> bool: return cluster_deployment.get().get("spec", {}).get("installed") waiting.wait( _cluster_deployment_installed, sleep_seconds=1, timeout_seconds=60, waiting_for="clusterDeployment to get created", expected_exceptions=Exception, ) set_node_count_and_wait_for_ready_nodes(cluster_deployment, hypershift, kube_api_context, 1) set_node_count_and_wait_for_ready_nodes(cluster_deployment, hypershift, kube_api_context, 2)