def wait_till_installed(client, cluster, timeout=60 * 60 * 2): log.info("Waiting %s till cluster finished installation", timeout) # TODO: Change host validation for only previous known hosts try: utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLED], timeout=timeout, interval=60, ) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, statuses=[consts.ClusterStatus.INSTALLED], timeout=consts.CLUSTER_INSTALLATION_TIMEOUT if cluster.high_availability_mode == "Full" else consts.CLUSTER_INSTALLATION_TIMEOUT * 2, ) finally: output_folder = f'build/{cluster.id}' utils.recreate_folder(output_folder) download_logs_from_all_hosts(client=client, cluster_id=cluster.id, output_folder=output_folder)
def execute_day2_flow(cluster_id, args, day2_type_flag, has_ipv4): utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) client = assisted_service_api.create_client( url=utils.get_assisted_service_url_by_args(args=args)) cluster = client.cluster_get(cluster_id=cluster_id) cluster_name = cluster.name openshift_version = cluster.openshift_version api_vip_dnsname = "api." + cluster_name + "." + cluster.base_dns_domain api_vip_ip = cluster.api_vip terraform_cluster_dir_prefix = cluster_name if day2_type_flag == "ocp": terraform_cluster_dir_prefix = "test-infra-cluster-assisted-installer" else: cluster_id = str(uuid.uuid4()) copy_proxy_from_cluster = cluster cluster = client.create_day2_cluster( cluster_name + "-day2", cluster_id, **_day2_cluster_create_params(openshift_version, api_vip_dnsname)) set_cluster_pull_secret(client, cluster_id, args.pull_secret) set_cluster_proxy(client, cluster_id, copy_proxy_from_cluster, args) config_etc_hosts(api_vip_ip, api_vip_dnsname) image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') client.generate_and_download_image( cluster_id=cluster.id, image_path=image_path, ssh_key=args.ssh_key, ) day2_nodes_flow(client, terraform_cluster_dir_prefix, cluster, has_ipv4, args.number_of_day2_workers, api_vip_ip, api_vip_dnsname, args.namespace, args.install_cluster, day2_type_flag)
def waiting_for_installation_completion(controller): vm_ip = controller.master_ips[0][0] try: logging.info("Configuring /etc/hosts...") utils.config_etc_hosts(cluster_name=controller.cluster_name, base_dns_domain=controller.cluster_domain, api_vip=vm_ip) logging.info("Waiting for installation to complete...") waiting.wait(all_operators_up, sleep_seconds=20, timeout_seconds=60 * 60, waiting_for="all operators to get up") logging.info("Installation completed successfully!") finally: logging.info("Gathering sosreport data from host...") node = Nodes(controller, private_ssh_key_path=SSH_KEY)[0] gather_sosreport_data(node) logging.info("Gathering information via installer-gather...") utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True) installer_gather(ip=vm_ip, ssh_key=SSH_KEY, out_dir=INSTALLER_GATHER_DIR) logging.info("Gathering information via must-gather...") utils.recreate_folder(MUST_GATHER_DIR) download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR)
def wait_till_installed(client, cluster, timeout=60 * 60 * 2): # TODO: Change host validation for only previous known hosts try: utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLED], timeout=timeout, interval=60, ) utils.wait_till_all_operators_are_in_status( client=client, cluster_id=cluster.id, operators_count=len(cluster.monitored_operators), operator_types=[OperatorType.BUILTIN, OperatorType.OLM], statuses=[consts.OperatorStatus.AVAILABLE, consts.OperatorStatus.FAILED], timeout=consts.CLUSTER_INSTALLATION_TIMEOUT, fall_on_error_status=False, ) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, statuses=[consts.ClusterStatus.INSTALLED], timeout=consts.CLUSTER_INSTALLATION_TIMEOUT if cluster.high_availability_mode == "Full" else consts.CLUSTER_INSTALLATION_TIMEOUT * 2, break_statuses=[consts.ClusterStatus.ERROR] ) finally: output_folder = f'build/{cluster.id}' utils.recreate_folder(output_folder) download_logs_from_all_hosts(client=client, cluster_id=cluster.id, output_folder=output_folder)
def _create_tf_folder(cls, cluster_name: str, platform: str): tf_folder = utils.get_tf_folder(cluster_name) logging.info("Creating %s as terraform folder", tf_folder) utils.recreate_folder(tf_folder) utils.copy_template_tree( tf_folder, none_platform_mode=platform == consts.Platforms.NONE) return tf_folder
def _create_tf_folder(self, name: str, platform: str): tf_folder = utils.get_tf_folder(name) logging.info("Creating %s as terraform folder", tf_folder) utils.recreate_folder(tf_folder) utils.copy_template_tree(tf_folder, none_platform_mode=(platform == consts.Platforms.NONE), is_infra_env=isinstance(self._entity_config, BaseInfraEnvConfig)) return tf_folder
def log_collection(vm_ip): etype, _value, _tb = sys.exc_info() logging.info( f"Collecting logs after a {('failed', 'successful')[etype is None]} installation" ) try: logging.info("Gathering sosreport data from host...") gather_sosreport_data(output_dir=IBIP_DIR, private_ssh_key_path=SSH_KEY) except Exception: logging.exception("sosreport gathering failed!") utils.retry() try: logging.info("Gathering information via installer-gather...") utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True) installer_gather(ip=vm_ip, ssh_key=SSH_KEY, out_dir=INSTALLER_GATHER_DIR) except Exception: logging.exception("installer-gather failed!") try: logging.info("Gathering information via must-gather...") utils.recreate_folder(MUST_GATHER_DIR) download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR) except Exception: logging.exception("must-gather failed!")
def execute_day2_flow(cluster_id, args, day2_type_flag, has_ipv6): utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) client = ClientFactory.create_client( url=utils.get_assisted_service_url_by_args(args=args), offline_token=utils.get_env("OFFLINE_TOKEN")) cluster = client.cluster_get(cluster_id=cluster_id) cluster_name = cluster.name openshift_version = cluster.openshift_version api_vip_dnsname = "api." + cluster_name + "." + cluster.base_dns_domain api_vip_ip = cluster.api_vip terraform_cluster_dir_prefix = cluster_name if day2_type_flag == "ocp": terraform_cluster_dir_prefix = f"{consts.CLUSTER_PREFIX}-{consts.DEFAULT_NAMESPACE}" else: cluster_id = str(uuid.uuid4()) copy_proxy_from_cluster = cluster cluster = client.create_day2_cluster( cluster_name + "-day2", cluster_id, **_day2_cluster_create_params(openshift_version, api_vip_dnsname)) set_cluster_pull_secret(client, cluster_id, args.pull_secret) set_cluster_proxy(client, cluster_id, copy_proxy_from_cluster, args) config_etc_hosts(api_vip_ip, api_vip_dnsname) image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') tf_folder = os.path.join( utils.get_tf_folder(terraform_cluster_dir_prefix, args.namespace), consts.Platforms.BARE_METAL) set_day2_tf_configuration(tf_folder, args.number_of_day2_workers, api_vip_ip, api_vip_dnsname) static_network_config = None if args.with_static_network_config: static_network_config = static_network.generate_day2_static_network_data_from_tf( tf_folder, args.number_of_day2_workers) client.generate_and_download_image( cluster_id=cluster.id, image_path=image_path, ssh_key=args.ssh_key, static_network_config=static_network_config) day2_nodes_flow( client, terraform_cluster_dir_prefix, tf_folder, cluster, has_ipv6, args.number_of_day2_workers, api_vip_ip, api_vip_dnsname, args.install_cluster, day2_type_flag, args.with_static_network_config, cluster_name, )
def setup_files_and_folders(args, net_asset, cluster_name): logging.info("Creating needed files and folders") utils.recreate_folder(consts.BASE_IMAGE_FOLDER, force_recreate=False) utils.recreate_folder(IBIP_DIR, with_chmod=False, force_recreate=True) shutil.copy(os.path.join(RESOURCES_DIR, INSTALL_CONFIG_FILE_NAME), IBIP_DIR) fill_install_config(args.pull_secret, args.ssh_key, net_asset, cluster_name)
def prepare_nodes(self): logging.info("Preparing nodes") self.destroy_all_nodes() if not os.path.exists(self.image_path): utils.recreate_folder(os.path.dirname(self.image_path), force_recreate=False) # if file not exist lets create dummy utils.touch(self.image_path) self.params.running = False self._create_nodes()
def gather_sosreport_data(output_dir: str): sosreport_output = os.path.join(output_dir, "sosreport") recreate_folder(sosreport_output) controller = LibvirtController(config=TerraformConfig(), entity_config=ClusterConfig()) run_concurrently( jobs=[(gather_sosreport_from_node, node, sosreport_output) for node in controller.list_nodes()], timeout=60 * 20, )
def gather_sosreport_data(output_dir: str, private_ssh_key_path: str=private_ssh_key_path_default): sosreport_output = os.path.join(output_dir, "sosreport") recreate_folder(sosreport_output) controller = LibvirtController(private_ssh_key_path=private_ssh_key_path) run_concurrently( jobs=[(gather_sosreport_from_node, node, sosreport_output) for node in controller.list_nodes()], timeout=60 * 20, )
def _collect_journalctl(nodes: Nodes, log_dir_name): logging.info('Collecting journalctl\n') infra_utils.recreate_folder(log_dir_name, with_chmod=False, force_recreate=False) journal_ctl_path = Path(log_dir_name) / 'nodes_journalctl' infra_utils.recreate_folder(journal_ctl_path, with_chmod=False) for node in nodes: try: node.run_command(f'sudo journalctl >> /tmp/{node.name}-journalctl') journal_path = journal_ctl_path / node.name node.download_file(f'/tmp/{node.name}-journalctl', str(journal_path)) except (RuntimeError, TimeoutError, SSHException): logging.info(f'Could not collect journalctl for {node.name}')
def download_image(self, iso_download_path=None): iso_download_path = iso_download_path or self._config.iso_download_path # ensure file path exists before downloading if not os.path.exists(iso_download_path): utils.recreate_folder(os.path.dirname(iso_download_path), force_recreate=False) self.api_client.download_infraenv_image( infraenv_id=self.id, image_path=iso_download_path, )
def download_image(self, iso_download_path: str = None) -> Path: iso_download_url = self.get_details().download_url iso_download_path = iso_download_path or self._config.iso_download_path # ensure file path exists before downloading if not os.path.exists(iso_download_path): utils.recreate_folder(os.path.dirname(iso_download_path), force_recreate=False) log.info( f"Downloading image {iso_download_url} to {iso_download_path}") return utils.download_file(iso_download_url, iso_download_path)
def execute_day1_flow(cluster_name): client = None cluster = {} if args.managed_dns_domains: args.base_dns_domain = args.managed_dns_domains.split(":")[0] if not args.vm_network_cidr: net_cidr = IPNetwork('192.168.126.0/24') net_cidr += args.ns_index args.vm_network_cidr = str(net_cidr) if not args.vm_network_cidr6: net_cidr = IPNetwork('1001:db8::/120') net_cidr += args.ns_index args.vm_network_cidr6 = str(net_cidr) if not args.network_bridge: args.network_bridge = f'tt{args.ns_index}' image_path = None if not args.image: utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) client = assisted_service_api.create_client( url=utils.get_assisted_service_url_by_args(args=args)) if args.cluster_id: cluster = client.cluster_get(cluster_id=args.cluster_id) else: cluster = client.create_cluster(cluster_name, ssh_public_key=args.ssh_key, **_cluster_create_params()) image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') client.generate_and_download_image( cluster_id=cluster.id, image_path=image_path, ssh_key=args.ssh_key, ) # Iso only, cluster will be up and iso downloaded but vm will not be created if not args.iso_only: try: nodes_flow(client, cluster_name, cluster, args.image or image_path) finally: if not image_path or args.keep_iso: return log.info('deleting iso: %s', image_path) os.unlink(image_path) return cluster.id
def collect_debug_info_from_cluster(cluster_deployment, agent_cluster_install): cluster_name = cluster_deployment.ref.name output_folder = f'build/{cluster_name}' recreate_folder(output_folder) aci = agent_cluster_install.get() debug_info = aci['status']['debugInfo'] try: log.info("Collecting debugInfo (events/logs) from cluster") fetch_url_and_write_to_file('eventsURL', 'events.json', debug_info, output_folder) fetch_url_and_write_to_file('logsURL', 'logs.tar', debug_info, output_folder) except Exception as err: log.warning(f"Failed to collect debug info for cluster {cluster_name} ({err})")
def execute_day1_flow(): client, cluster = try_get_cluster() cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}' if cluster: args.base_dns_domain = cluster.base_dns_domain cluster_name = cluster.name elif args.managed_dns_domains: args.base_dns_domain = args.managed_dns_domains.split(":")[0] log.info('Cluster name: %s', cluster_name) machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) image_path = args.image or os.path.join( consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') set_tf_config(cluster_name) if not args.image: utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) if not client: client = ClientFactory.create_client( url=utils.get_assisted_service_url_by_args(args=args), offline_token=utils.get_env("OFFLINE_TOKEN")) if args.cluster_id: cluster = client.cluster_get(cluster_id=args.cluster_id) else: cluster = client.create_cluster(cluster_name, ssh_public_key=args.ssh_key, **_cluster_create_params(client)) static_network_config = apply_static_network_config( cluster_name=cluster_name, kube_client=None, ) client.generate_and_download_image( cluster_id=cluster.id, image_path=image_path, image_type=args.iso_image_type, ssh_key=args.ssh_key, static_network_config=static_network_config, ) # Iso only, cluster will be up and iso downloaded but vm will not be created if not args.iso_only: run_nodes_flow(client, cluster_name, cluster, machine_net, image_path) return cluster.id if cluster else None
def set_tf_config(cluster_name): nodes_details = _create_node_details(cluster_name) tf_folder = utils.get_tf_folder(cluster_name, args.namespace) utils.recreate_folder(tf_folder) utils.copy_template_tree(tf_folder, is_none_platform_mode()) machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) default_image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') fill_tfvars(image_path=args.image or default_image_path, storage_path=args.storage_path, master_count=args.master_count, nodes_details=nodes_details, tf_folder=tf_folder, machine_net=machine_net)
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, retry_interval: int = RETRY_INTERVAL): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info(f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files(cluster['id'], os.path.join(output_folder, "cluster_files")) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppress(assisted_service_client.rest.ApiException): client.download_host_ignition(cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events(cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2(os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): for i in range(MAX_RETRIES): cluster_logs_tar = os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) min_number_of_logs = len(cluster['hosts']) + 1 if cluster['status'] == ClusterStatus.INSTALLED else len(cluster['hosts']) try: verify_logs_uploaded(cluster_logs_tar, min_number_of_logs, cluster['status'] == ClusterStatus.INSTALLED) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster(client, cluster)) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info( f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files( cluster['id'], os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_logs( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar")) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], cluster['api_vip']) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) run_command("chmod -R ugo+rx '%s'" % output_folder)
def execute_kube_api_flow(): log.info("Executing kube-api flow") cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}' utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) kube_client = create_kube_api_client() cluster_deployment = ClusterDeployment(kube_api_client=kube_client, name=cluster_name, namespace=args.namespace) set_tf_config(cluster_name) secret = Secret( kube_api_client=kube_client, name=cluster_name, namespace=args.namespace, ) secret.apply(pull_secret=args.pull_secret) ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES api_vip, ingress_vip = "", "" if args.master_count > 1: api_vip, ingress_vip = _get_vips_ips(machine_net) cluster_deployment.apply( platform=Platform( api_vip=api_vip, ingress_vip=ingress_vip, ), install_strategy=InstallStrategy( host_prefix=args.host_prefix if ipv4 else args.host_prefix6, machine_cidr=get_machine_cidr_from_machine_net(machine_net), cluster_cidr=args.cluster_network if ipv4 else args.cluster_network6, service_cidr=args.service_network if ipv4 else args.service_network6, ssh_public_key=args.ssh_key, control_plane_agents=args.master_count, worker_agents=args.number_of_workers, ), secret=secret, base_domain=args.base_dns_domain, ) cluster_deployment.wait_for_state(consts.ClusterStatus.INSUFFICIENT) apply_static_network_config( cluster_name=cluster_name, kube_client=kube_client, ) image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') log.info("Creating installenv") http_proxy, https_proxy, no_proxy = _get_http_proxy_params(ipv4=ipv4, ipv6=ipv6) install_env = InfraEnv(kube_api_client=kube_client, name=f"{cluster_name}-install-env", namespace=args.namespace) install_env.apply(cluster_deployment=cluster_deployment, secret=secret, proxy=Proxy(http_proxy=http_proxy, https_proxy=https_proxy, no_proxy=no_proxy)) install_env.status() image_url = install_env.get_iso_download_url() utils.download_iso(image_url, image_path) try: nodes_flow_kube_api(cluster_name, machine_net, cluster_deployment) finally: if not image_path or args.keep_iso: return log.info('deleting iso: %s', image_path) os.unlink(image_path)
def nodes_flow(client, cluster_name, cluster, image_path): nodes_details = _create_node_details(cluster_name) if cluster: nodes_details["cluster_inventory_id"] = cluster.id tf_folder = utils.get_tf_folder(cluster_name, args.namespace) utils.recreate_folder(tf_folder) copy_tree(consts.TF_TEMPLATE, tf_folder) tf = terraform_utils.TerraformUtils(working_dir=tf_folder) machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) create_nodes_and_wait_till_registered(cluster_name=cluster_name, inventory_client=client, cluster=cluster, image_path=image_path, storage_path=args.storage_path, master_count=args.master_count, nodes_details=nodes_details, tf=tf, machine_net=machine_net) if client: cluster_info = client.cluster_get(cluster.id) macs = utils.get_libvirt_nodes_macs( nodes_details["libvirt_network_name"]) if not (cluster_info.api_vip and cluster_info.ingress_vip): utils.wait_till_hosts_with_macs_are_in_status( client=client, cluster_id=cluster.id, macs=macs, statuses=[ consts.NodesStatus.INSUFFICIENT, consts.NodesStatus.PENDING_FOR_INPUT, ], ) if args.vip_dhcp_allocation: set_cluster_machine_cidr(client, cluster.id, machine_net) else: set_cluster_vips(client, cluster.id, machine_net) else: log.info("VIPs already configured") network_name = nodes_details["libvirt_network_name"] if machine_net.has_ip_v4: libvirt_nodes = utils.get_libvirt_nodes_mac_role_ip_and_name( network_name) update_hostnames = False else: log.warning( "Work around libvirt for Terrafrom not setting hostnames of IPv6-only hosts" ) libvirt_nodes = _get_libvirt_nodes_from_tf_state( network_name, tf.get_state()) update_hostnames = True update_hosts(client, cluster.id, libvirt_nodes, update_hostnames) utils.wait_till_hosts_with_macs_are_in_status( client=client, cluster_id=cluster.id, macs=macs, statuses=[consts.NodesStatus.KNOWN], ) if args.install_cluster: time.sleep(10) install_cluster.run_install_flow( client=client, cluster_id=cluster.id, kubeconfig_path=consts.DEFAULT_CLUSTER_KUBECONFIG_PATH, pull_secret=args.pull_secret, tf=tf) # Validate DNS domains resolvability validate_dns(client, cluster.id) if args.wait_for_cvo: cluster_info = client.cluster_get(cluster.id) log.info("Start waiting till CVO status is available") config_etc_hosts(cluster_info.name, cluster_info.base_dns_domain, cluster_info.api_vip) utils.wait_for_cvo_available()
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, update_by_events: bool = False, retry_interval: int = RETRY_INTERVAL, pull_secret=""): if "hosts" not in cluster or len(cluster["hosts"]) == 0: cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"]) output_folder = get_logs_output_folder(dest, cluster) if not is_update_needed(output_folder, update_by_events, client, cluster): log.info(f"Skipping, no need to update {output_folder}.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metadata.json')) with suppressAndLog(AssertionError, ConnectionError, requests.exceptions.ConnectionError): client.download_metrics(os.path.join(output_folder, "metrics.txt")) for cluster_file in ("bootstrap.ign", "master.ign", "worker.ign", "install-config.yaml"): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_and_save_file( cluster['id'], cluster_file, os.path.join(output_folder, "cluster_files", cluster_file)) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_host_ignition( cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppressAndLog(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], get_cluster_events_path(cluster, output_folder)) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppressAndLog(assisted_service_client.rest.ApiException): are_masters_in_configuring_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.CONFIGURING], 2) are_masters_in_join_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.JOINED], 2) max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_state else MAX_RETRIES is_controller_expected = cluster[ 'status'] == ClusterStatus.INSTALLED or are_masters_in_configuring_state min_number_of_logs = min_number_of_log_files( cluster, is_controller_expected) for i in range(max_retries): cluster_logs_tar = os.path.join( output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) try: verify_logs_uploaded( cluster_logs_tar, min_number_of_logs, installation_success=( cluster['status'] == ClusterStatus.INSTALLED), check_oc=are_masters_in_join_state) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppressAndLog(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts( cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster( client, cluster, pull_secret)) download_must_gather( kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def execute_kube_api_flow(): log.info("Executing kube-api flow") cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}' utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) kube_client = create_kube_api_client() cluster_deployment = ClusterDeployment( kube_api_client=kube_client, name=cluster_name, namespace=args.namespace ) set_tf_config(cluster_name) secret = Secret( kube_api_client=kube_client, name=cluster_name, namespace=args.namespace, ) secret.apply(pull_secret=args.pull_secret) imageSet=ClusterImageSet( kube_api_client=kube_client, name=f"{cluster_name}-image-set", namespace=args.namespace ) releaseImage=utils.get_env('OPENSHIFT_INSTALL_RELEASE_IMAGE', utils.get_openshift_release_image("4.8")) imageSet.apply(releaseImage=releaseImage) ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES api_vip, ingress_vip = "", "" if args.master_count > 1: api_vip, ingress_vip = _get_vips_ips(machine_net) agent_cluster_install = AgentClusterInstall( kube_api_client=kube_client, name=f'{cluster_name}-agent-cluster-install', namespace=args.namespace ) image_set_ref = ClusterImageSetReference(name=f'{cluster_name}-image-set') cluster_deployment.apply( secret=secret, base_domain=args.base_dns_domain, agent_cluster_install_ref=agent_cluster_install.ref, ) agent_cluster_install.apply( cluster_deployment_ref=cluster_deployment.ref, api_vip=api_vip, ingress_vip=ingress_vip, image_set_ref=image_set_ref, cluster_cidr=args.cluster_network if ipv4 else args.cluster_network6, host_prefix=args.host_prefix if ipv4 else args.host_prefix6, service_network=args.service_network if ipv4 else args.service_network6, ssh_pub_key=args.ssh_key, control_plane_agents=args.master_count, worker_agents=args.number_of_workers, machine_cidr=get_machine_cidr_from_machine_net(machine_net), ) agent_cluster_install.wait_to_be_ready(False) apply_static_network_config( cluster_name=cluster_name, kube_client=kube_client, ) image_path = os.path.join( consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso' ) log.info("Creating infraEnv") http_proxy, https_proxy, no_proxy = _get_http_proxy_params(ipv4=ipv4, ipv6=ipv6) infra_env = InfraEnv( kube_api_client=kube_client, name=f"{cluster_name}-infra-env", namespace=args.namespace ) infra_env.apply( cluster_deployment=cluster_deployment, secret=secret, proxy=Proxy( http_proxy=http_proxy, https_proxy=https_proxy, no_proxy=no_proxy ), ssh_pub_key=args.ssh_key, nmstate_label=cluster_name, ) infra_env.status() image_url = infra_env.get_iso_download_url() utils.download_iso(image_url, image_path) try: nodes_flow_kube_api(cluster_name, machine_net, cluster_deployment, agent_cluster_install) finally: if not image_path or args.keep_iso: return log.info('deleting iso: %s', image_path) os.unlink(image_path)
def _create_tf_folder(self): tf_folder = utils.get_tf_folder(self.cluster_name) logging.info("Creating %s as terraform folder", tf_folder) utils.recreate_folder(tf_folder) copy_tree(consts.TF_TEMPLATE, tf_folder) return tf_folder
def _create_tf_folder(self): tf_folder = utils.get_tf_folder(self.cluster_name) logging.info("Creating %s as terraform folder", tf_folder) utils.recreate_folder(tf_folder) utils.copy_template_tree(tf_folder) return tf_folder
def execute_day1_flow(cluster_name): client = None cluster = {} if args.managed_dns_domains: args.base_dns_domain = args.managed_dns_domains.split(":")[0] if not args.vm_network_cidr: net_cidr = IPNetwork('192.168.126.0/24') net_cidr += args.ns_index args.vm_network_cidr = str(net_cidr) if not args.vm_network_cidr6: net_cidr = IPNetwork('1001:db8::/120') net_cidr += args.ns_index args.vm_network_cidr6 = str(net_cidr) if not args.network_bridge: args.network_bridge = f'tt{args.ns_index}' set_tf_config(cluster_name) image_path = None image_url = None image_type = args.iso_image_type kube_client = None cluster_deployment = None machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index) if not args.image: utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False) client = assisted_service_api.create_client( url=utils.get_assisted_service_url_by_args(args=args)) if args.cluster_id: cluster = client.cluster_get(cluster_id=args.cluster_id) elif args.kube_api: kube_client = create_kube_api_client( str(pathlib.Path("~/.kube/config").expanduser())) cluster_deployment = ClusterDeployment(kube_api_client=kube_client, name=cluster_name, namespace=args.namespace) secret = Secret( kube_api_client=kube_client, name=cluster_name, namespace=args.namespace, ) with contextlib.suppress(ApiException): secret.delete() secret.create(pull_secret=args.pull_secret) ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES api_vip, ingress_vip = "", "" with contextlib.suppress(ApiException): cluster_deployment.delete() cluster_deployment.create( platform=Platform( api_vip=api_vip, ingress_vip=ingress_vip, ), install_strategy=InstallStrategy( host_prefix=args.host_prefix if ipv4 else args.host_prefix6, machine_cidr=machine_net.machine_cidr_addresses[0], cluster_cidr=args.cluster_network if ipv4 else args.cluster_network6, service_cidr=args.service_network if ipv4 else args.service_network6, ssh_public_key=args.ssh_key, control_plane_agents=args.master_count, worker_agents=args.number_of_workers, ), secret=secret, base_domain=args.base_dns_domain, ) cluster_deployment.wait_for_state("insufficient") http_proxy, https_proxy, no_proxy = _get_http_proxy_params( ipv4=ipv4, ipv6=ipv6) install_env = InstallEnv(kube_api_client=kube_client, name=f"{cluster_name}-install-env", namespace=args.namespace) with contextlib.suppress(ApiException): install_env.delete() install_env.create(cluster_deployment=cluster_deployment, secret=secret, proxy=Proxy(http_proxy=http_proxy, https_proxy=https_proxy, no_proxy=no_proxy)) install_env.status() image_url = install_env.get_iso_download_url() cluster = client.cluster_get( cluster_id=install_env.get_cluster_id()) else: cluster = client.create_cluster(cluster_name, ssh_public_key=args.ssh_key, **_cluster_create_params()) image_path = os.path.join(consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso') if args.with_static_network_config: tf_folder = utils.get_tf_folder(cluster_name, args.namespace) static_network_config = static_network.generate_static_network_data_from_tf( tf_folder) else: static_network_config = None if image_url is not None: utils.download_iso(image_url, image_path) else: client.generate_and_download_image( cluster_id=cluster.id, image_path=image_path, image_type=image_type, ssh_key=args.ssh_key, static_network_config=static_network_config, ) # Iso only, cluster will be up and iso downloaded but vm will not be created if not args.iso_only: try: nodes_flow(client, cluster_name, cluster, machine_net, kube_client, cluster_deployment) finally: if not image_path or args.keep_iso: return log.info('deleting iso: %s', image_path) os.unlink(image_path) return cluster.id