Exemple #1
0
def wait_till_installed(client, cluster, timeout=60 * 60 * 2):
    log.info("Waiting %s till cluster finished installation", timeout)
    # TODO: Change host validation for only previous known hosts
    try:
        utils.wait_till_all_hosts_are_in_status(
            client=client,
            cluster_id=cluster.id,
            nodes_count=len(cluster.hosts),
            statuses=[consts.NodesStatus.INSTALLED],
            timeout=timeout,
            interval=60,
        )
        utils.wait_till_cluster_is_in_status(
            client=client,
            cluster_id=cluster.id,
            statuses=[consts.ClusterStatus.INSTALLED],
            timeout=consts.CLUSTER_INSTALLATION_TIMEOUT
            if cluster.high_availability_mode == "Full" else
            consts.CLUSTER_INSTALLATION_TIMEOUT * 2,
        )
    finally:
        output_folder = f'build/{cluster.id}'
        utils.recreate_folder(output_folder)
        download_logs_from_all_hosts(client=client,
                                     cluster_id=cluster.id,
                                     output_folder=output_folder)
Exemple #2
0
def execute_day2_flow(cluster_id, args, day2_type_flag, has_ipv4):
    utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
    client = assisted_service_api.create_client(
        url=utils.get_assisted_service_url_by_args(args=args))
    cluster = client.cluster_get(cluster_id=cluster_id)
    cluster_name = cluster.name
    openshift_version = cluster.openshift_version
    api_vip_dnsname = "api." + cluster_name + "." + cluster.base_dns_domain
    api_vip_ip = cluster.api_vip
    terraform_cluster_dir_prefix = cluster_name
    if day2_type_flag == "ocp":
        terraform_cluster_dir_prefix = "test-infra-cluster-assisted-installer"
    else:
        cluster_id = str(uuid.uuid4())
        copy_proxy_from_cluster = cluster
        cluster = client.create_day2_cluster(
            cluster_name + "-day2", cluster_id,
            **_day2_cluster_create_params(openshift_version, api_vip_dnsname))
        set_cluster_pull_secret(client, cluster_id, args.pull_secret)
        set_cluster_proxy(client, cluster_id, copy_proxy_from_cluster, args)

    config_etc_hosts(api_vip_ip, api_vip_dnsname)
    image_path = os.path.join(consts.IMAGE_FOLDER,
                              f'{args.namespace}-installer-image.iso')
    client.generate_and_download_image(
        cluster_id=cluster.id,
        image_path=image_path,
        ssh_key=args.ssh_key,
    )

    day2_nodes_flow(client, terraform_cluster_dir_prefix, cluster, has_ipv4,
                    args.number_of_day2_workers, api_vip_ip, api_vip_dnsname,
                    args.namespace, args.install_cluster, day2_type_flag)
Exemple #3
0
def waiting_for_installation_completion(controller):
    vm_ip = controller.master_ips[0][0]

    try:
        logging.info("Configuring /etc/hosts...")
        utils.config_etc_hosts(cluster_name=controller.cluster_name,
                               base_dns_domain=controller.cluster_domain,
                               api_vip=vm_ip)

        logging.info("Waiting for installation to complete...")
        waiting.wait(all_operators_up,
                     sleep_seconds=20,
                     timeout_seconds=60 * 60,
                     waiting_for="all operators to get up")
        logging.info("Installation completed successfully!")

    finally:
        logging.info("Gathering sosreport data from host...")
        node = Nodes(controller, private_ssh_key_path=SSH_KEY)[0]
        gather_sosreport_data(node)

        logging.info("Gathering information via installer-gather...")
        utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True)
        installer_gather(ip=vm_ip, ssh_key=SSH_KEY, out_dir=INSTALLER_GATHER_DIR)

        logging.info("Gathering information via must-gather...")
        utils.recreate_folder(MUST_GATHER_DIR)
        download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR)
def wait_till_installed(client, cluster, timeout=60 * 60 * 2):
    # TODO: Change host validation for only previous known hosts
    try:
        utils.wait_till_all_hosts_are_in_status(
            client=client,
            cluster_id=cluster.id,
            nodes_count=len(cluster.hosts),
            statuses=[consts.NodesStatus.INSTALLED],
            timeout=timeout,
            interval=60,
        )
        utils.wait_till_all_operators_are_in_status(
            client=client,
            cluster_id=cluster.id,
            operators_count=len(cluster.monitored_operators),
            operator_types=[OperatorType.BUILTIN, OperatorType.OLM],
            statuses=[consts.OperatorStatus.AVAILABLE, consts.OperatorStatus.FAILED],
            timeout=consts.CLUSTER_INSTALLATION_TIMEOUT,
            fall_on_error_status=False,
        )
        utils.wait_till_cluster_is_in_status(
            client=client,
            cluster_id=cluster.id,
            statuses=[consts.ClusterStatus.INSTALLED],
            timeout=consts.CLUSTER_INSTALLATION_TIMEOUT if cluster.high_availability_mode == "Full"
            else consts.CLUSTER_INSTALLATION_TIMEOUT * 2,
            break_statuses=[consts.ClusterStatus.ERROR]
        )
    finally:
        output_folder = f'build/{cluster.id}'
        utils.recreate_folder(output_folder)
        download_logs_from_all_hosts(client=client, cluster_id=cluster.id, output_folder=output_folder)
Exemple #5
0
 def _create_tf_folder(cls, cluster_name: str, platform: str):
     tf_folder = utils.get_tf_folder(cluster_name)
     logging.info("Creating %s as terraform folder", tf_folder)
     utils.recreate_folder(tf_folder)
     utils.copy_template_tree(
         tf_folder, none_platform_mode=platform == consts.Platforms.NONE)
     return tf_folder
Exemple #6
0
 def _create_tf_folder(self, name: str, platform: str):
     tf_folder = utils.get_tf_folder(name)
     logging.info("Creating %s as terraform folder", tf_folder)
     utils.recreate_folder(tf_folder)
     utils.copy_template_tree(tf_folder, none_platform_mode=(platform == consts.Platforms.NONE),
                              is_infra_env=isinstance(self._entity_config, BaseInfraEnvConfig))
     return tf_folder
def log_collection(vm_ip):
    etype, _value, _tb = sys.exc_info()

    logging.info(
        f"Collecting logs after a {('failed', 'successful')[etype is None]} installation"
    )

    try:
        logging.info("Gathering sosreport data from host...")
        gather_sosreport_data(output_dir=IBIP_DIR,
                              private_ssh_key_path=SSH_KEY)
    except Exception:
        logging.exception("sosreport gathering failed!")

    utils.retry()
    try:
        logging.info("Gathering information via installer-gather...")
        utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True)
        installer_gather(ip=vm_ip,
                         ssh_key=SSH_KEY,
                         out_dir=INSTALLER_GATHER_DIR)
    except Exception:
        logging.exception("installer-gather failed!")

    try:
        logging.info("Gathering information via must-gather...")
        utils.recreate_folder(MUST_GATHER_DIR)
        download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR)
    except Exception:
        logging.exception("must-gather failed!")
Exemple #8
0
def execute_day2_flow(cluster_id, args, day2_type_flag, has_ipv6):
    utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)

    client = ClientFactory.create_client(
        url=utils.get_assisted_service_url_by_args(args=args),
        offline_token=utils.get_env("OFFLINE_TOKEN"))

    cluster = client.cluster_get(cluster_id=cluster_id)
    cluster_name = cluster.name
    openshift_version = cluster.openshift_version
    api_vip_dnsname = "api." + cluster_name + "." + cluster.base_dns_domain
    api_vip_ip = cluster.api_vip
    terraform_cluster_dir_prefix = cluster_name
    if day2_type_flag == "ocp":
        terraform_cluster_dir_prefix = f"{consts.CLUSTER_PREFIX}-{consts.DEFAULT_NAMESPACE}"
    else:
        cluster_id = str(uuid.uuid4())
        copy_proxy_from_cluster = cluster
        cluster = client.create_day2_cluster(
            cluster_name + "-day2", cluster_id,
            **_day2_cluster_create_params(openshift_version, api_vip_dnsname))
        set_cluster_pull_secret(client, cluster_id, args.pull_secret)
        set_cluster_proxy(client, cluster_id, copy_proxy_from_cluster, args)

    config_etc_hosts(api_vip_ip, api_vip_dnsname)
    image_path = os.path.join(consts.IMAGE_FOLDER,
                              f'{args.namespace}-installer-image.iso')

    tf_folder = os.path.join(
        utils.get_tf_folder(terraform_cluster_dir_prefix, args.namespace),
        consts.Platforms.BARE_METAL)
    set_day2_tf_configuration(tf_folder, args.number_of_day2_workers,
                              api_vip_ip, api_vip_dnsname)

    static_network_config = None
    if args.with_static_network_config:
        static_network_config = static_network.generate_day2_static_network_data_from_tf(
            tf_folder, args.number_of_day2_workers)

    client.generate_and_download_image(
        cluster_id=cluster.id,
        image_path=image_path,
        ssh_key=args.ssh_key,
        static_network_config=static_network_config)

    day2_nodes_flow(
        client,
        terraform_cluster_dir_prefix,
        tf_folder,
        cluster,
        has_ipv6,
        args.number_of_day2_workers,
        api_vip_ip,
        api_vip_dnsname,
        args.install_cluster,
        day2_type_flag,
        args.with_static_network_config,
        cluster_name,
    )
def setup_files_and_folders(args, net_asset, cluster_name):
    logging.info("Creating needed files and folders")
    utils.recreate_folder(consts.BASE_IMAGE_FOLDER, force_recreate=False)
    utils.recreate_folder(IBIP_DIR, with_chmod=False, force_recreate=True)
    shutil.copy(os.path.join(RESOURCES_DIR, INSTALL_CONFIG_FILE_NAME),
                IBIP_DIR)
    fill_install_config(args.pull_secret, args.ssh_key, net_asset,
                        cluster_name)
Exemple #10
0
 def prepare_nodes(self):
     logging.info("Preparing nodes")
     self.destroy_all_nodes()
     if not os.path.exists(self.image_path):
         utils.recreate_folder(os.path.dirname(self.image_path), force_recreate=False)
         # if file not exist lets create dummy
         utils.touch(self.image_path)
     self.params.running = False
     self._create_nodes()
def gather_sosreport_data(output_dir: str):
    sosreport_output = os.path.join(output_dir, "sosreport")
    recreate_folder(sosreport_output)

    controller = LibvirtController(config=TerraformConfig(),
                                   entity_config=ClusterConfig())
    run_concurrently(
        jobs=[(gather_sosreport_from_node, node, sosreport_output)
              for node in controller.list_nodes()],
        timeout=60 * 20,
    )
def gather_sosreport_data(output_dir: str,
                          private_ssh_key_path: str=private_ssh_key_path_default):
    sosreport_output = os.path.join(output_dir, "sosreport")
    recreate_folder(sosreport_output)

    controller = LibvirtController(private_ssh_key_path=private_ssh_key_path)
    run_concurrently(
        jobs=[(gather_sosreport_from_node, node, sosreport_output)
              for node in controller.list_nodes()],
        timeout=60 * 20,
    )
 def _collect_journalctl(nodes: Nodes, log_dir_name):
     logging.info('Collecting journalctl\n')
     infra_utils.recreate_folder(log_dir_name, with_chmod=False, force_recreate=False)
     journal_ctl_path = Path(log_dir_name) / 'nodes_journalctl'
     infra_utils.recreate_folder(journal_ctl_path, with_chmod=False)
     for node in nodes:
         try:
             node.run_command(f'sudo journalctl >> /tmp/{node.name}-journalctl')
             journal_path = journal_ctl_path / node.name
             node.download_file(f'/tmp/{node.name}-journalctl', str(journal_path))
         except (RuntimeError, TimeoutError, SSHException):
             logging.info(f'Could not collect journalctl for {node.name}')
Exemple #14
0
    def download_image(self, iso_download_path=None):
        iso_download_path = iso_download_path or self._config.iso_download_path

        # ensure file path exists before downloading
        if not os.path.exists(iso_download_path):
            utils.recreate_folder(os.path.dirname(iso_download_path),
                                  force_recreate=False)

        self.api_client.download_infraenv_image(
            infraenv_id=self.id,
            image_path=iso_download_path,
        )
    def download_image(self, iso_download_path: str = None) -> Path:
        iso_download_url = self.get_details().download_url
        iso_download_path = iso_download_path or self._config.iso_download_path

        # ensure file path exists before downloading
        if not os.path.exists(iso_download_path):
            utils.recreate_folder(os.path.dirname(iso_download_path),
                                  force_recreate=False)

        log.info(
            f"Downloading image {iso_download_url} to {iso_download_path}")
        return utils.download_file(iso_download_url, iso_download_path)
def execute_day1_flow(cluster_name):
    client = None
    cluster = {}
    if args.managed_dns_domains:
        args.base_dns_domain = args.managed_dns_domains.split(":")[0]

    if not args.vm_network_cidr:
        net_cidr = IPNetwork('192.168.126.0/24')
        net_cidr += args.ns_index
        args.vm_network_cidr = str(net_cidr)

    if not args.vm_network_cidr6:
        net_cidr = IPNetwork('1001:db8::/120')
        net_cidr += args.ns_index
        args.vm_network_cidr6 = str(net_cidr)

    if not args.network_bridge:
        args.network_bridge = f'tt{args.ns_index}'

    image_path = None

    if not args.image:
        utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
        client = assisted_service_api.create_client(
            url=utils.get_assisted_service_url_by_args(args=args))
        if args.cluster_id:
            cluster = client.cluster_get(cluster_id=args.cluster_id)
        else:

            cluster = client.create_cluster(cluster_name,
                                            ssh_public_key=args.ssh_key,
                                            **_cluster_create_params())

        image_path = os.path.join(consts.IMAGE_FOLDER,
                                  f'{args.namespace}-installer-image.iso')
        client.generate_and_download_image(
            cluster_id=cluster.id,
            image_path=image_path,
            ssh_key=args.ssh_key,
        )

    # Iso only, cluster will be up and iso downloaded but vm will not be created
    if not args.iso_only:
        try:
            nodes_flow(client, cluster_name, cluster, args.image or image_path)
        finally:
            if not image_path or args.keep_iso:
                return
            log.info('deleting iso: %s', image_path)
            os.unlink(image_path)

    return cluster.id
def collect_debug_info_from_cluster(cluster_deployment, agent_cluster_install):
    cluster_name = cluster_deployment.ref.name
    output_folder = f'build/{cluster_name}'
    recreate_folder(output_folder)
    aci = agent_cluster_install.get()
    debug_info = aci['status']['debugInfo']

    try:
        log.info("Collecting debugInfo (events/logs) from cluster")
        fetch_url_and_write_to_file('eventsURL', 'events.json', debug_info, output_folder)
        fetch_url_and_write_to_file('logsURL', 'logs.tar', debug_info, output_folder)
    except Exception as err:
        log.warning(f"Failed to collect debug info for cluster {cluster_name} ({err})")
def execute_day1_flow():
    client, cluster = try_get_cluster()
    cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}'

    if cluster:
        args.base_dns_domain = cluster.base_dns_domain
        cluster_name = cluster.name

    elif args.managed_dns_domains:
        args.base_dns_domain = args.managed_dns_domains.split(":")[0]

    log.info('Cluster name: %s', cluster_name)

    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr,
                                 args.vm_network_cidr6, args.ns_index)
    image_path = args.image or os.path.join(
        consts.IMAGE_FOLDER, f'{args.namespace}-installer-image.iso')
    set_tf_config(cluster_name)

    if not args.image:
        utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
        if not client:
            client = ClientFactory.create_client(
                url=utils.get_assisted_service_url_by_args(args=args),
                offline_token=utils.get_env("OFFLINE_TOKEN"))

        if args.cluster_id:
            cluster = client.cluster_get(cluster_id=args.cluster_id)
        else:
            cluster = client.create_cluster(cluster_name,
                                            ssh_public_key=args.ssh_key,
                                            **_cluster_create_params(client))

        static_network_config = apply_static_network_config(
            cluster_name=cluster_name,
            kube_client=None,
        )

        client.generate_and_download_image(
            cluster_id=cluster.id,
            image_path=image_path,
            image_type=args.iso_image_type,
            ssh_key=args.ssh_key,
            static_network_config=static_network_config,
        )

    # Iso only, cluster will be up and iso downloaded but vm will not be created
    if not args.iso_only:
        run_nodes_flow(client, cluster_name, cluster, machine_net, image_path)

    return cluster.id if cluster else None
def set_tf_config(cluster_name):
    nodes_details = _create_node_details(cluster_name)
    tf_folder = utils.get_tf_folder(cluster_name, args.namespace)
    utils.recreate_folder(tf_folder)

    utils.copy_template_tree(tf_folder, is_none_platform_mode())

    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr,
                                 args.vm_network_cidr6, args.ns_index)
    default_image_path = os.path.join(consts.IMAGE_FOLDER,
                                      f'{args.namespace}-installer-image.iso')
    fill_tfvars(image_path=args.image or default_image_path,
                storage_path=args.storage_path,
                master_count=args.master_count,
                nodes_details=nodes_details,
                tf_folder=tf_folder,
                machine_net=machine_net)
Exemple #20
0
def download_logs(client: InventoryClient, cluster: dict, dest: str,
                  must_gather: bool, retry_interval: int = RETRY_INTERVAL):

    output_folder = get_logs_output_folder(dest, cluster)

    if os.path.isdir(output_folder):
        log.info(f"Skipping. The logs directory {output_folder} already exists.")
        return

    recreate_folder(output_folder)
    recreate_folder(os.path.join(output_folder, "cluster_files"))

    try:
        write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json'))

        with suppress(assisted_service_client.rest.ApiException):
            client.download_ignition_files(cluster['id'], os.path.join(output_folder, "cluster_files"))

        for host_id in map(lambda host: host['id'], cluster['hosts']):
            with suppress(assisted_service_client.rest.ApiException):
                client.download_host_ignition(cluster['id'], host_id, os.path.join(output_folder, "cluster_files"))

        with suppress(assisted_service_client.rest.ApiException):
            client.download_cluster_events(cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json"))
            shutil.copy2(os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder)

        with suppress(assisted_service_client.rest.ApiException):
            for i in range(MAX_RETRIES):
                cluster_logs_tar = os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar")

                with suppress(FileNotFoundError):
                    os.remove(cluster_logs_tar)

                client.download_cluster_logs(cluster['id'], cluster_logs_tar)

                min_number_of_logs = len(cluster['hosts']) + 1 if cluster['status'] == ClusterStatus.INSTALLED else len(cluster['hosts'])

                try:
                    verify_logs_uploaded(cluster_logs_tar, min_number_of_logs, cluster['status'] == ClusterStatus.INSTALLED)
                    break
                except AssertionError as ex:
                    log.warn(f"Cluster logs verification failed: {ex}")

                    # Skip sleeping on last retry
                    if i < MAX_RETRIES - 1:
                        log.info(f"Going to retry in {retry_interval} seconds")
                        time.sleep(retry_interval)

        kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress")

        with suppress(assisted_service_client.rest.ApiException):
            client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path)

            if must_gather:
                recreate_folder(os.path.join(output_folder, "must-gather"))
                config_etc_hosts(cluster['name'], cluster['base_dns_domain'],
                                 helper_cluster.get_api_vip_from_cluster(client, cluster))
                download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather"))
    finally:
        run_command(f"chmod -R ugo+rx '{output_folder}'")
Exemple #21
0
def download_logs(client: InventoryClient, cluster: dict, dest: str,
                  must_gather: bool):
    output_folder = get_logs_output_folder(dest, cluster)

    if os.path.isdir(output_folder):
        log.info(
            f"Skipping. The logs directory {output_folder} already exists.")
        return

    recreate_folder(output_folder)
    recreate_folder(os.path.join(output_folder, "cluster_files"))

    write_metadata_file(client, cluster,
                        os.path.join(output_folder, 'metdata.json'))

    with suppress(assisted_service_client.rest.ApiException):
        client.download_ignition_files(
            cluster['id'], os.path.join(output_folder, "cluster_files"))

    with suppress(assisted_service_client.rest.ApiException):
        client.download_cluster_events(
            cluster['id'],
            os.path.join(output_folder,
                         f"cluster_{cluster['id']}_events.json"))
        shutil.copy2(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         "events.html"), output_folder)

    with suppress(assisted_service_client.rest.ApiException):
        client.download_cluster_logs(
            cluster['id'],
            os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar"))

    kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress")

    with suppress(assisted_service_client.rest.ApiException):
        client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path)

        if must_gather:
            recreate_folder(os.path.join(output_folder, "must-gather"))
            config_etc_hosts(cluster['name'], cluster['base_dns_domain'],
                             cluster['api_vip'])
            download_must_gather(kubeconfig_path,
                                 os.path.join(output_folder, "must-gather"))

    run_command("chmod -R ugo+rx '%s'" % output_folder)
def execute_kube_api_flow():
    log.info("Executing kube-api flow")
    cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}'
    utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr,
                                 args.vm_network_cidr6, args.ns_index)
    kube_client = create_kube_api_client()
    cluster_deployment = ClusterDeployment(kube_api_client=kube_client,
                                           name=cluster_name,
                                           namespace=args.namespace)
    set_tf_config(cluster_name)

    secret = Secret(
        kube_api_client=kube_client,
        name=cluster_name,
        namespace=args.namespace,
    )
    secret.apply(pull_secret=args.pull_secret)

    ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES
    ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES
    api_vip, ingress_vip = "", ""
    if args.master_count > 1:
        api_vip, ingress_vip = _get_vips_ips(machine_net)

    cluster_deployment.apply(
        platform=Platform(
            api_vip=api_vip,
            ingress_vip=ingress_vip,
        ),
        install_strategy=InstallStrategy(
            host_prefix=args.host_prefix if ipv4 else args.host_prefix6,
            machine_cidr=get_machine_cidr_from_machine_net(machine_net),
            cluster_cidr=args.cluster_network
            if ipv4 else args.cluster_network6,
            service_cidr=args.service_network
            if ipv4 else args.service_network6,
            ssh_public_key=args.ssh_key,
            control_plane_agents=args.master_count,
            worker_agents=args.number_of_workers,
        ),
        secret=secret,
        base_domain=args.base_dns_domain,
    )
    cluster_deployment.wait_for_state(consts.ClusterStatus.INSUFFICIENT)
    apply_static_network_config(
        cluster_name=cluster_name,
        kube_client=kube_client,
    )

    image_path = os.path.join(consts.IMAGE_FOLDER,
                              f'{args.namespace}-installer-image.iso')

    log.info("Creating installenv")
    http_proxy, https_proxy, no_proxy = _get_http_proxy_params(ipv4=ipv4,
                                                               ipv6=ipv6)
    install_env = InfraEnv(kube_api_client=kube_client,
                           name=f"{cluster_name}-install-env",
                           namespace=args.namespace)
    install_env.apply(cluster_deployment=cluster_deployment,
                      secret=secret,
                      proxy=Proxy(http_proxy=http_proxy,
                                  https_proxy=https_proxy,
                                  no_proxy=no_proxy))
    install_env.status()
    image_url = install_env.get_iso_download_url()
    utils.download_iso(image_url, image_path)
    try:
        nodes_flow_kube_api(cluster_name, machine_net, cluster_deployment)
    finally:
        if not image_path or args.keep_iso:
            return
        log.info('deleting iso: %s', image_path)
        os.unlink(image_path)
Exemple #23
0
def nodes_flow(client, cluster_name, cluster, image_path):
    nodes_details = _create_node_details(cluster_name)
    if cluster:
        nodes_details["cluster_inventory_id"] = cluster.id

    tf_folder = utils.get_tf_folder(cluster_name, args.namespace)
    utils.recreate_folder(tf_folder)
    copy_tree(consts.TF_TEMPLATE, tf_folder)
    tf = terraform_utils.TerraformUtils(working_dir=tf_folder)
    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr,
                                 args.vm_network_cidr6, args.ns_index)

    create_nodes_and_wait_till_registered(cluster_name=cluster_name,
                                          inventory_client=client,
                                          cluster=cluster,
                                          image_path=image_path,
                                          storage_path=args.storage_path,
                                          master_count=args.master_count,
                                          nodes_details=nodes_details,
                                          tf=tf,
                                          machine_net=machine_net)

    if client:
        cluster_info = client.cluster_get(cluster.id)
        macs = utils.get_libvirt_nodes_macs(
            nodes_details["libvirt_network_name"])

        if not (cluster_info.api_vip and cluster_info.ingress_vip):
            utils.wait_till_hosts_with_macs_are_in_status(
                client=client,
                cluster_id=cluster.id,
                macs=macs,
                statuses=[
                    consts.NodesStatus.INSUFFICIENT,
                    consts.NodesStatus.PENDING_FOR_INPUT,
                ],
            )

            if args.vip_dhcp_allocation:
                set_cluster_machine_cidr(client, cluster.id, machine_net)
            else:
                set_cluster_vips(client, cluster.id, machine_net)
        else:
            log.info("VIPs already configured")

        network_name = nodes_details["libvirt_network_name"]
        if machine_net.has_ip_v4:
            libvirt_nodes = utils.get_libvirt_nodes_mac_role_ip_and_name(
                network_name)
            update_hostnames = False
        else:
            log.warning(
                "Work around libvirt for Terrafrom not setting hostnames of IPv6-only hosts"
            )
            libvirt_nodes = _get_libvirt_nodes_from_tf_state(
                network_name, tf.get_state())
            update_hostnames = True

        update_hosts(client, cluster.id, libvirt_nodes, update_hostnames)
        utils.wait_till_hosts_with_macs_are_in_status(
            client=client,
            cluster_id=cluster.id,
            macs=macs,
            statuses=[consts.NodesStatus.KNOWN],
        )

        if args.install_cluster:
            time.sleep(10)
            install_cluster.run_install_flow(
                client=client,
                cluster_id=cluster.id,
                kubeconfig_path=consts.DEFAULT_CLUSTER_KUBECONFIG_PATH,
                pull_secret=args.pull_secret,
                tf=tf)
            # Validate DNS domains resolvability
            validate_dns(client, cluster.id)
            if args.wait_for_cvo:
                cluster_info = client.cluster_get(cluster.id)
                log.info("Start waiting till CVO status is available")
                config_etc_hosts(cluster_info.name,
                                 cluster_info.base_dns_domain,
                                 cluster_info.api_vip)
                utils.wait_for_cvo_available()
Exemple #24
0
def download_logs(client: InventoryClient,
                  cluster: dict,
                  dest: str,
                  must_gather: bool,
                  update_by_events: bool = False,
                  retry_interval: int = RETRY_INTERVAL,
                  pull_secret=""):

    if "hosts" not in cluster or len(cluster["hosts"]) == 0:
        cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"])

    output_folder = get_logs_output_folder(dest, cluster)
    if not is_update_needed(output_folder, update_by_events, client, cluster):
        log.info(f"Skipping, no need to update {output_folder}.")
        return

    recreate_folder(output_folder)
    recreate_folder(os.path.join(output_folder, "cluster_files"))

    try:
        write_metadata_file(client, cluster,
                            os.path.join(output_folder, 'metadata.json'))

        with suppressAndLog(AssertionError, ConnectionError,
                            requests.exceptions.ConnectionError):
            client.download_metrics(os.path.join(output_folder, "metrics.txt"))

        for cluster_file in ("bootstrap.ign", "master.ign", "worker.ign",
                             "install-config.yaml"):
            with suppressAndLog(assisted_service_client.rest.ApiException):
                client.download_and_save_file(
                    cluster['id'], cluster_file,
                    os.path.join(output_folder, "cluster_files", cluster_file))

        for host_id in map(lambda host: host['id'], cluster['hosts']):
            with suppressAndLog(assisted_service_client.rest.ApiException):
                client.download_host_ignition(
                    cluster['id'], host_id,
                    os.path.join(output_folder, "cluster_files"))

        with suppressAndLog(assisted_service_client.rest.ApiException):
            client.download_cluster_events(
                cluster['id'], get_cluster_events_path(cluster, output_folder))
            shutil.copy2(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "events.html"), output_folder)

        with suppressAndLog(assisted_service_client.rest.ApiException):
            are_masters_in_configuring_state = are_host_progress_in_stage(
                cluster['hosts'], [HostsProgressStages.CONFIGURING], 2)
            are_masters_in_join_state = are_host_progress_in_stage(
                cluster['hosts'], [HostsProgressStages.JOINED], 2)
            max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_state else MAX_RETRIES
            is_controller_expected = cluster[
                'status'] == ClusterStatus.INSTALLED or are_masters_in_configuring_state
            min_number_of_logs = min_number_of_log_files(
                cluster, is_controller_expected)

            for i in range(max_retries):
                cluster_logs_tar = os.path.join(
                    output_folder, f"cluster_{cluster['id']}_logs.tar")

                with suppress(FileNotFoundError):
                    os.remove(cluster_logs_tar)

                client.download_cluster_logs(cluster['id'], cluster_logs_tar)
                try:
                    verify_logs_uploaded(
                        cluster_logs_tar,
                        min_number_of_logs,
                        installation_success=(
                            cluster['status'] == ClusterStatus.INSTALLED),
                        check_oc=are_masters_in_join_state)
                    break
                except AssertionError as ex:
                    log.warn(f"Cluster logs verification failed: {ex}")

                    # Skip sleeping on last retry
                    if i < MAX_RETRIES - 1:
                        log.info(f"Going to retry in {retry_interval} seconds")
                        time.sleep(retry_interval)

        kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress")

        with suppressAndLog(assisted_service_client.rest.ApiException):
            client.download_kubeconfig_no_ingress(cluster['id'],
                                                  kubeconfig_path)

            if must_gather:
                recreate_folder(os.path.join(output_folder, "must-gather"))
                config_etc_hosts(
                    cluster['name'], cluster['base_dns_domain'],
                    helper_cluster.get_api_vip_from_cluster(
                        client, cluster, pull_secret))
                download_must_gather(
                    kubeconfig_path, os.path.join(output_folder,
                                                  "must-gather"))

    finally:
        run_command(f"chmod -R ugo+rx '{output_folder}'")
Exemple #25
0
def execute_kube_api_flow():
    log.info("Executing kube-api flow")
    cluster_name = f'{args.cluster_name or consts.CLUSTER_PREFIX}-{args.namespace}'
    utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr, args.vm_network_cidr6, args.ns_index)
    kube_client = create_kube_api_client()
    cluster_deployment = ClusterDeployment(
        kube_api_client=kube_client,
        name=cluster_name,
        namespace=args.namespace
    )
    set_tf_config(cluster_name)

    secret = Secret(
        kube_api_client=kube_client,
        name=cluster_name,
        namespace=args.namespace,
    )
    secret.apply(pull_secret=args.pull_secret)

    imageSet=ClusterImageSet(
        kube_api_client=kube_client,
        name=f"{cluster_name}-image-set",
        namespace=args.namespace
    )
    releaseImage=utils.get_env('OPENSHIFT_INSTALL_RELEASE_IMAGE', utils.get_openshift_release_image("4.8"))
    imageSet.apply(releaseImage=releaseImage)

    ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES
    ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES
    api_vip, ingress_vip = "", ""
    if args.master_count > 1:
        api_vip, ingress_vip = _get_vips_ips(machine_net)

    agent_cluster_install = AgentClusterInstall(
        kube_api_client=kube_client,
        name=f'{cluster_name}-agent-cluster-install',
        namespace=args.namespace
    )

    image_set_ref = ClusterImageSetReference(name=f'{cluster_name}-image-set')
    cluster_deployment.apply(
        secret=secret,
        base_domain=args.base_dns_domain,
        agent_cluster_install_ref=agent_cluster_install.ref,
    )

    agent_cluster_install.apply(
        cluster_deployment_ref=cluster_deployment.ref,
        api_vip=api_vip,
        ingress_vip=ingress_vip,
        image_set_ref=image_set_ref,
        cluster_cidr=args.cluster_network if ipv4 else args.cluster_network6,
        host_prefix=args.host_prefix if ipv4 else args.host_prefix6,
        service_network=args.service_network if ipv4 else args.service_network6,
        ssh_pub_key=args.ssh_key,
        control_plane_agents=args.master_count,
        worker_agents=args.number_of_workers,
        machine_cidr=get_machine_cidr_from_machine_net(machine_net),
    )
    agent_cluster_install.wait_to_be_ready(False)

    apply_static_network_config(
        cluster_name=cluster_name,
        kube_client=kube_client,
    )

    image_path = os.path.join(
        consts.IMAGE_FOLDER,
        f'{args.namespace}-installer-image.iso'
    )

    log.info("Creating infraEnv")
    http_proxy, https_proxy, no_proxy = _get_http_proxy_params(ipv4=ipv4, ipv6=ipv6)
    infra_env = InfraEnv(
        kube_api_client=kube_client,
        name=f"{cluster_name}-infra-env",
        namespace=args.namespace
    )
    infra_env.apply(
        cluster_deployment=cluster_deployment,
        secret=secret,
        proxy=Proxy(
            http_proxy=http_proxy,
            https_proxy=https_proxy,
            no_proxy=no_proxy
        ),
        ssh_pub_key=args.ssh_key,
        nmstate_label=cluster_name,
    )
    infra_env.status()
    image_url = infra_env.get_iso_download_url()
    utils.download_iso(image_url, image_path)
    try:
        nodes_flow_kube_api(cluster_name, machine_net, cluster_deployment, agent_cluster_install)
    finally:
        if not image_path or args.keep_iso:
            return
        log.info('deleting iso: %s', image_path)
        os.unlink(image_path)
 def _create_tf_folder(self):
     tf_folder = utils.get_tf_folder(self.cluster_name)
     logging.info("Creating %s as terraform folder", tf_folder)
     utils.recreate_folder(tf_folder)
     copy_tree(consts.TF_TEMPLATE, tf_folder)
     return tf_folder
Exemple #27
0
 def _create_tf_folder(self):
     tf_folder = utils.get_tf_folder(self.cluster_name)
     logging.info("Creating %s as terraform folder", tf_folder)
     utils.recreate_folder(tf_folder)
     utils.copy_template_tree(tf_folder)
     return tf_folder
Exemple #28
0
def execute_day1_flow(cluster_name):
    client = None
    cluster = {}
    if args.managed_dns_domains:
        args.base_dns_domain = args.managed_dns_domains.split(":")[0]

    if not args.vm_network_cidr:
        net_cidr = IPNetwork('192.168.126.0/24')
        net_cidr += args.ns_index
        args.vm_network_cidr = str(net_cidr)

    if not args.vm_network_cidr6:
        net_cidr = IPNetwork('1001:db8::/120')
        net_cidr += args.ns_index
        args.vm_network_cidr6 = str(net_cidr)

    if not args.network_bridge:
        args.network_bridge = f'tt{args.ns_index}'

    set_tf_config(cluster_name)
    image_path = None
    image_url = None
    image_type = args.iso_image_type
    kube_client = None
    cluster_deployment = None

    machine_net = MachineNetwork(args.ipv4, args.ipv6, args.vm_network_cidr,
                                 args.vm_network_cidr6, args.ns_index)

    if not args.image:
        utils.recreate_folder(consts.IMAGE_FOLDER, force_recreate=False)
        client = assisted_service_api.create_client(
            url=utils.get_assisted_service_url_by_args(args=args))
        if args.cluster_id:
            cluster = client.cluster_get(cluster_id=args.cluster_id)

        elif args.kube_api:
            kube_client = create_kube_api_client(
                str(pathlib.Path("~/.kube/config").expanduser()))
            cluster_deployment = ClusterDeployment(kube_api_client=kube_client,
                                                   name=cluster_name,
                                                   namespace=args.namespace)

            secret = Secret(
                kube_api_client=kube_client,
                name=cluster_name,
                namespace=args.namespace,
            )
            with contextlib.suppress(ApiException):
                secret.delete()

            secret.create(pull_secret=args.pull_secret)

            ipv4 = args.ipv4 and args.ipv4.lower() in MachineNetwork.YES_VALUES
            ipv6 = args.ipv6 and args.ipv6.lower() in MachineNetwork.YES_VALUES
            api_vip, ingress_vip = "", ""

            with contextlib.suppress(ApiException):
                cluster_deployment.delete()

            cluster_deployment.create(
                platform=Platform(
                    api_vip=api_vip,
                    ingress_vip=ingress_vip,
                ),
                install_strategy=InstallStrategy(
                    host_prefix=args.host_prefix
                    if ipv4 else args.host_prefix6,
                    machine_cidr=machine_net.machine_cidr_addresses[0],
                    cluster_cidr=args.cluster_network
                    if ipv4 else args.cluster_network6,
                    service_cidr=args.service_network
                    if ipv4 else args.service_network6,
                    ssh_public_key=args.ssh_key,
                    control_plane_agents=args.master_count,
                    worker_agents=args.number_of_workers,
                ),
                secret=secret,
                base_domain=args.base_dns_domain,
            )
            cluster_deployment.wait_for_state("insufficient")

            http_proxy, https_proxy, no_proxy = _get_http_proxy_params(
                ipv4=ipv4, ipv6=ipv6)
            install_env = InstallEnv(kube_api_client=kube_client,
                                     name=f"{cluster_name}-install-env",
                                     namespace=args.namespace)
            with contextlib.suppress(ApiException):
                install_env.delete()

            install_env.create(cluster_deployment=cluster_deployment,
                               secret=secret,
                               proxy=Proxy(http_proxy=http_proxy,
                                           https_proxy=https_proxy,
                                           no_proxy=no_proxy))
            install_env.status()
            image_url = install_env.get_iso_download_url()
            cluster = client.cluster_get(
                cluster_id=install_env.get_cluster_id())

        else:
            cluster = client.create_cluster(cluster_name,
                                            ssh_public_key=args.ssh_key,
                                            **_cluster_create_params())

        image_path = os.path.join(consts.IMAGE_FOLDER,
                                  f'{args.namespace}-installer-image.iso')

        if args.with_static_network_config:
            tf_folder = utils.get_tf_folder(cluster_name, args.namespace)
            static_network_config = static_network.generate_static_network_data_from_tf(
                tf_folder)
        else:
            static_network_config = None

        if image_url is not None:
            utils.download_iso(image_url, image_path)
        else:
            client.generate_and_download_image(
                cluster_id=cluster.id,
                image_path=image_path,
                image_type=image_type,
                ssh_key=args.ssh_key,
                static_network_config=static_network_config,
            )

    # Iso only, cluster will be up and iso downloaded but vm will not be created
    if not args.iso_only:
        try:
            nodes_flow(client, cluster_name, cluster, machine_net, kube_client,
                       cluster_deployment)
        finally:
            if not image_path or args.keep_iso:
                return
            log.info('deleting iso: %s', image_path)
            os.unlink(image_path)

    return cluster.id