def is_update_needed(output_folder: str, update_on_events_update: bool, client: InventoryClient, cluster: dict): if not os.path.isdir(output_folder): return True if not update_on_events_update: return False destination_event_file_path = get_cluster_events_path( cluster, output_folder) with tempfile.NamedTemporaryFile() as latest_event_tp: with suppressAndLog(assisted_service_client.rest.ApiException): client.download_cluster_events(cluster['id'], latest_event_tp.name) if filecmp.cmp(destination_event_file_path, latest_event_tp.name): latest_event_tp.close() log.info("no new events found for {}".format( destination_event_file_path)) need_update = False else: log.info("update needed, new events found, deleting {} ".format( destination_event_file_path)) os.remove(destination_event_file_path) latest_event_tp.close() need_update = True return need_update
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, retry_interval: int = RETRY_INTERVAL): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info(f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files(cluster['id'], os.path.join(output_folder, "cluster_files")) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppress(assisted_service_client.rest.ApiException): client.download_host_ignition(cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events(cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2(os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): for i in range(MAX_RETRIES): cluster_logs_tar = os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) min_number_of_logs = len(cluster['hosts']) + 1 if cluster['status'] == ClusterStatus.INSTALLED else len(cluster['hosts']) try: verify_logs_uploaded(cluster_logs_tar, min_number_of_logs, cluster['status'] == ClusterStatus.INSTALLED) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster(client, cluster)) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def write_metadata_file(client: InventoryClient, cluster: dict, file_name: str): d = {'cluster': cluster} d.update(client.get_versions()) with suppress(KeyError): d['link'] = f"{get_ui_url_from_api_url(client.inventory_url)}/clusters/{cluster['id']}" with open(file_name, 'w') as metadata_file: json.dump(d, metadata_file, sort_keys=True, indent=4)
def _update_day2_config(self, api_client: InventoryClient, cluster_id: str): day2_cluster: models.cluster.Cluster = api_client.cluster_get( cluster_id) self.update_config(**dict( openshift_version=day2_cluster.openshift_version, cluster_name=ClusterName(day2_cluster.name), additional_ntp_source=day2_cluster.additional_ntp_source, user_managed_networking=day2_cluster.user_managed_networking, high_availability_mode=day2_cluster.high_availability_mode, olm_operators=day2_cluster.monitored_operators, base_dns_domain=day2_cluster.base_dns_domain, vip_dhcp_allocation=day2_cluster.vip_dhcp_allocation))
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info( f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files( cluster['id'], os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_logs( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar")) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], cluster['api_vip']) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) run_command("chmod -R ugo+rx '%s'" % output_folder)
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, update_by_events: bool = False, retry_interval: int = RETRY_INTERVAL, pull_secret=""): if "hosts" not in cluster or len(cluster["hosts"]) == 0: cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"]) output_folder = get_logs_output_folder(dest, cluster) if not is_update_needed(output_folder, update_by_events, client, cluster): log.info(f"Skipping, no need to update {output_folder}.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metadata.json')) with suppressAndLog(AssertionError, ConnectionError, requests.exceptions.ConnectionError): client.download_metrics(os.path.join(output_folder, "metrics.txt")) for cluster_file in ("bootstrap.ign", "master.ign", "worker.ign", "install-config.yaml"): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_and_save_file( cluster['id'], cluster_file, os.path.join(output_folder, "cluster_files", cluster_file)) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_host_ignition( cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppressAndLog(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], get_cluster_events_path(cluster, output_folder)) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppressAndLog(assisted_service_client.rest.ApiException): are_masters_in_configuring_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.CONFIGURING], 2) are_masters_in_join_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.JOINED], 2) max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_state else MAX_RETRIES is_controller_expected = cluster[ 'status'] == ClusterStatus.INSTALLED or are_masters_in_configuring_state min_number_of_logs = min_number_of_log_files( cluster, is_controller_expected) for i in range(max_retries): cluster_logs_tar = os.path.join( output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) try: verify_logs_uploaded( cluster_logs_tar, min_number_of_logs, installation_success=( cluster['status'] == ClusterStatus.INSTALLED), check_oc=are_masters_in_join_state) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppressAndLog(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts( cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster( client, cluster, pull_secret)) download_must_gather( kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")