def waiting_for_installation_completion( self, controller: NodeController, cluster_configuration: ClusterConfig, skip_logs=False): master_ip = controller.master_ips[0][0] try: log.info("Configuring /etc/hosts...") utils.config_etc_hosts( cluster_name=cluster_configuration.cluster_name.get(), base_dns_domain=cluster_configuration.base_dns_domain, api_vip=master_ip, ) log.info("Waiting for installation to complete...") waiting.wait( self.all_operators_available, sleep_seconds=20, timeout_seconds=60 * 60, waiting_for="all operators to get up", ) log.info("Installation completed successfully!") except Exception: log.exception( "An unexpected error has occurred while waiting for installation to complete" ) # In case of error, always collect logs self.log_collection(master_ip) raise else: # If successful, collect logs only if caller asked not to skip if not skip_logs: self.log_collection(master_ip)
def update_oc_config(nodes, cluster): os.environ["KUBECONFIG"] = cluster.kubeconfig_path if nodes.masters_count == 1: main_cidr = cluster.get_primary_machine_cidr() api_vip = cluster.get_ip_for_single_node(cluster.api_client, cluster.id, main_cidr) else: vips = nodes.controller.get_ingress_and_api_vips() api_vip = vips["api_vip"] utils.config_etc_hosts( cluster_name=cluster.name, base_dns_domain=global_variables.base_dns_domain, api_vip=api_vip)
def waiting_for_installation_completion(controller): vm_ip = controller.master_ips[0][0] try: logging.info("Configuring /etc/hosts...") utils.config_etc_hosts(cluster_name=controller.cluster_name, base_dns_domain=controller.cluster_domain, api_vip=vm_ip) logging.info("Waiting for installation to complete...") waiting.wait(all_operators_up, sleep_seconds=20, timeout_seconds=60 * 60, waiting_for="all operators to get up") logging.info("Installation completed successfully!") finally: log_collection(vm_ip)
def _must_gather_kube_api(cluster_name, cluster_deployment, agent_cluster_install, output_folder): kubeconfig_path = os.path.join(output_folder, "kubeconfig", f"{cluster_name}_kubeconfig.yaml") agent_spec = agent_cluster_install.get_spec() agent_cluster_install.download_kubeconfig(kubeconfig_path=kubeconfig_path) log.info("Agent cluster install spec %s", agent_spec) # in case of single node we should set node ip and not vip if agent_spec.get("provisionRequirements", {}).get("controlPlaneAgents", 3) == 1: kube_api_ip = get_ip_for_single_node( cluster_deployment, is_cidr_is_ipv4( agent_spec["networking"]["machineNetwork"][0]["cidr"])) else: kube_api_ip = agent_cluster_install.get_spec()["apiVIP"] config_etc_hosts( cluster_name, cluster_deployment.get()["spec"]["baseDomain"], kube_api_ip, ) download_must_gather(kubeconfig_path, output_folder)
def download_logs( client: InventoryClient, cluster: dict, dest: str, must_gather: bool, update_by_events: bool = False, retry_interval: int = RETRY_INTERVAL, ): if "hosts" not in cluster or len(cluster["hosts"]) == 0: cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"]) output_folder = get_logs_output_folder(dest, cluster) if not is_update_needed(output_folder, update_by_events, client, cluster): log.info(f"Skipping, no need to update {output_folder}.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, "metadata.json")) with SuppressAndLog(requests.exceptions.RequestException, ConnectionError, KeyboardInterrupt): client.download_metrics(os.path.join(output_folder, "metrics.txt")) for cluster_file in ( "bootstrap.ign", "master.ign", "worker.ign", "install-config.yaml", ): with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_and_save_file( cluster["id"], cluster_file, os.path.join(output_folder, "cluster_files", cluster_file)) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): download_manifests(client, cluster["id"], output_folder) infra_env_list = set() for host_id, infra_env_id in map( lambda host: (host["id"], host["infra_env_id"]), cluster["hosts"]): with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_host_ignition( infra_env_id, host_id, os.path.join(output_folder, "cluster_files")) if infra_env_id not in infra_env_list: infra_env_list.add(infra_env_id) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_infraenv_events( infra_env_id, get_infraenv_events_path(infra_env_id, output_folder)) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_cluster_events( cluster["id"], get_cluster_events_path(cluster, output_folder)) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): are_masters_in_configuring_state = are_host_progress_in_stage( cluster["hosts"], [HostsProgressStages.CONFIGURING], 2) are_masters_in_join_or_done_state = are_host_progress_in_stage( cluster["hosts"], [HostsProgressStages.JOINED, HostsProgressStages.DONE], 2) max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_or_done_state else MAX_RETRIES is_controller_expected = cluster[ "status"] == ClusterStatus.INSTALLED or are_masters_in_configuring_state min_number_of_logs = min_number_of_log_files( cluster, is_controller_expected) for i in range(max_retries): cluster_logs_tar = os.path.join( output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster["id"], cluster_logs_tar) try: verify_logs_uploaded( cluster_logs_tar, min_number_of_logs, installation_success=( cluster["status"] == ClusterStatus.INSTALLED), check_oc=are_masters_in_join_or_done_state, ) break except AssertionError as ex: log.warning("Cluster logs verification failed: %s", ex) # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with SuppressAndLog(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster["id"], kubeconfig_path) if must_gather: config_etc_hosts( cluster["name"], cluster["base_dns_domain"], client.get_api_vip(cluster, cluster["id"]), ) download_must_gather(kubeconfig_path, output_folder) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")