def log_collection(self, master_ip: Optional[str]): """ Collects all sorts of logs about the installation process @param master_ip The IP address of the master node. Used to SSH into the node when doing installer gather. When not given, installer gather log collection is skipped. """ etype, _value, _tb = sys.exc_info() log.info( f"Collecting logs after a {('failed', 'successful')[etype is None]} installation" ) with SuppressAndLog(Exception): log.info("Gathering sosreport data from host...") gather_sosreport_data(output_dir=IBIP_DIR) if master_ip is not None: with SuppressAndLog(Exception): log.info("Gathering information via installer-gather...") utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True) self.installer_gather( ip=master_ip, ssh_key=consts.DEFAULT_SSH_PRIVATE_KEY_PATH, out_dir=INSTALLER_GATHER_DIR) with SuppressAndLog(Exception): log.info("Gathering information via must-gather...") download_must_gather(KUBE_CONFIG, IBIP_DIR)
def is_update_needed(output_folder: str, update_on_events_update: bool, client: InventoryClient, cluster: dict): if not os.path.isdir(output_folder): return True if not update_on_events_update: return False destination_event_file_path = get_cluster_events_path( cluster, output_folder) with tempfile.NamedTemporaryFile() as latest_event_tp: with SuppressAndLog(assisted_service_client.rest.ApiException): client.download_cluster_events(cluster["id"], latest_event_tp.name) if filecmp.cmp(destination_event_file_path, latest_event_tp.name): latest_event_tp.close() log.info(f"no new events found for {destination_event_file_path}") need_update = False else: log.info( f"update needed, new events found, deleting {destination_event_file_path}" ) os.remove(destination_event_file_path) latest_event_tp.close() need_update = True return need_update
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ipxe_server, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self.__set_up_proxy_server(cluster, cluster_configuration, proxy_server) if global_variables.ipxe_boot: infra_env = cluster.generate_infra_env() ipxe_server_controller = ipxe_server(name="ipxe_controller", api_client=cluster.api_client) ipxe_server_controller.start(infra_env_id=infra_env.id, cluster_name=cluster.name) ipxe_server_url = f"http://{consts.DEFAULT_IPXE_SERVER_IP}:{consts.DEFAULT_IPXE_SERVER_PORT}/{cluster.name}" network_name = cluster.nodes.get_cluster_network() libvirt_controller = LibvirtController( config=cluster.nodes.controller, entity_config=cluster_configuration) libvirt_controller.set_ipxe_url(network_name=network_name, ipxe_url=ipxe_server_url) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ipxe_server: Callable, tang_server: Callable, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") if cluster_configuration.disk_encryption_mode == consts.DiskEncryptionMode.TANG: self._start_tang_server(tang_server, cluster_configuration) cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self.__set_up_proxy_server(cluster, cluster_configuration, proxy_server) if global_variables.ipxe_boot: infra_env = cluster.generate_infra_env() ipxe_server_controller = ipxe_server(name="ipxe_controller", api_client=cluster.api_client) ipxe_server_controller.run(infra_env_id=infra_env.id, cluster_name=cluster.name) cluster_configuration.iso_download_path = utils.get_iso_download_path( infra_env_configuration.entity_name.get()) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def main(): parser = ArgumentParser(description="Logs junit parser") parser.add_argument("--src", help="Logs dir source", type=str) parser.add_argument("--dst", help="Junit XML report destination", type=str) args = parser.parse_args() report_dir = Path(args.dst) report_dir.mkdir(exist_ok=True) with SuppressAndLog(BaseException): log.info(f"Parsing logs from `{args.src}` to `{report_dir}`") LogsConverter.export_service_logs_to_junit_suites( Path(args.src), report_dir) with SuppressAndLog(BaseException): log.info(f"Parsing service events from `{args.src}` to `{report_dir}`") EventsConverter.export_service_events_to_junit_suite( Path(args.src), report_dir)
def download_logs_kube_api(api_client: ApiClient, cluster_name: str, namespace: str, dest: str, must_gather: bool, management_kubeconfig: str): cluster_deployment = ClusterDeployment( kube_api_client=api_client, name=cluster_name, namespace=namespace, ) agent_cluster_install = AgentClusterInstall( kube_api_client=api_client, name=cluster_deployment.get()["spec"]["clusterInstallRef"]["name"], namespace=namespace, ) output_folder = os.path.join(dest, f"{cluster_name}") recreate_folder(output_folder) try: with SuppressAndLog(requests.exceptions.RequestException, ConnectionError): collect_debug_info_from_cluster(cluster_deployment, agent_cluster_install, output_folder) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) with SuppressAndLog(Exception): # in case of hypershift if namespace.startswith("clusters"): log.info("Dumping hypershift files") hypershift = HyperShift(name=cluster_name) hypershift.dump(os.path.join(output_folder, "dump"), management_kubeconfig) else: _must_gather_kube_api(cluster_name, cluster_deployment, agent_cluster_install, output_folder) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self._set_up_proxy_server(cluster, cluster_configuration, proxy_server) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def infra_env( self, api_client: InventoryClient, request: FixtureRequest, proxy_server, prepare_infraenv_nodes_network: Nodes, infra_env_configuration: InfraEnvConfig, ): log.debug( f"--- SETUP --- Creating InfraEnv for test: {request.node.name}\n") infra_env = InfraEnv(api_client=api_client, config=infra_env_configuration, nodes=prepare_infraenv_nodes_network) yield infra_env log.info("--- TEARDOWN --- Infra env\n") if global_variables.test_teardown: with SuppressAndLog(ApiException): infra_env.deregister()
def delete_dnsmasq_conf_file(self, cluster_name): with SuppressAndLog(FileNotFoundError): fname = f"/etc/NetworkManager/dnsmasq.d/openshift-{cluster_name}.conf" log.info(f"--- TEARDOWN --- deleting dnsmasq file: {fname}\n") os.remove(fname)
def download_logs( client: InventoryClient, cluster: dict, dest: str, must_gather: bool, update_by_events: bool = False, retry_interval: int = RETRY_INTERVAL, ): if "hosts" not in cluster or len(cluster["hosts"]) == 0: cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"]) output_folder = get_logs_output_folder(dest, cluster) if not is_update_needed(output_folder, update_by_events, client, cluster): log.info(f"Skipping, no need to update {output_folder}.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, "metadata.json")) with SuppressAndLog(requests.exceptions.RequestException, ConnectionError, KeyboardInterrupt): client.download_metrics(os.path.join(output_folder, "metrics.txt")) for cluster_file in ( "bootstrap.ign", "master.ign", "worker.ign", "install-config.yaml", ): with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_and_save_file( cluster["id"], cluster_file, os.path.join(output_folder, "cluster_files", cluster_file)) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): download_manifests(client, cluster["id"], output_folder) infra_env_list = set() for host_id, infra_env_id in map( lambda host: (host["id"], host["infra_env_id"]), cluster["hosts"]): with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_host_ignition( infra_env_id, host_id, os.path.join(output_folder, "cluster_files")) if infra_env_id not in infra_env_list: infra_env_list.add(infra_env_id) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_infraenv_events( infra_env_id, get_infraenv_events_path(infra_env_id, output_folder)) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): client.download_cluster_events( cluster["id"], get_cluster_events_path(cluster, output_folder)) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with SuppressAndLog(assisted_service_client.rest.ApiException, KeyboardInterrupt): are_masters_in_configuring_state = are_host_progress_in_stage( cluster["hosts"], [HostsProgressStages.CONFIGURING], 2) are_masters_in_join_or_done_state = are_host_progress_in_stage( cluster["hosts"], [HostsProgressStages.JOINED, HostsProgressStages.DONE], 2) max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_or_done_state else MAX_RETRIES is_controller_expected = cluster[ "status"] == ClusterStatus.INSTALLED or are_masters_in_configuring_state min_number_of_logs = min_number_of_log_files( cluster, is_controller_expected) for i in range(max_retries): cluster_logs_tar = os.path.join( output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster["id"], cluster_logs_tar) try: verify_logs_uploaded( cluster_logs_tar, min_number_of_logs, installation_success=( cluster["status"] == ClusterStatus.INSTALLED), check_oc=are_masters_in_join_or_done_state, ) break except AssertionError as ex: log.warning("Cluster logs verification failed: %s", ex) # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with SuppressAndLog(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster["id"], kubeconfig_path) if must_gather: config_etc_hosts( cluster["name"], cluster["base_dns_domain"], client.get_api_vip(cluster, cluster["id"]), ) download_must_gather(kubeconfig_path, output_folder) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")