Example #1
0
 def __post_init__(self):
     super().__post_init__()
     if self.cluster_name is None or isinstance(self.cluster_name, str):
         self.cluster_name = ClusterName()  # todo rm cluster_name after removing config.cluster_name dependencies
     self.entity_name = self.cluster_name
     if self.kubeconfig_path is None:
         self.kubeconfig_path = utils.get_kubeconfig_path(self.cluster_name.get())
Example #2
0
    def download_kubeconfig(self, kube_api_client: ApiClient) -> str:
        log.info(f"Downloading kubeconfig for HyperShift cluster {self.name}")
        kubeconfig_data = (Secret(
            kube_api_client=kube_api_client,
            namespace=f"clusters-{self.name}",
            name="admin-kubeconfig",
        ).get().data["kubeconfig"])
        hypershift_kubeconfig_path = utils.get_kubeconfig_path(
            self.name) + "-hypershift"

        with open(hypershift_kubeconfig_path, "wt") as kubeconfig_file:
            kubeconfig_file.write(b64decode(kubeconfig_data).decode())
            kubeconfig_file.flush()
        self.kubeconfig_path = hypershift_kubeconfig_path
        return self.kubeconfig_path
def nodes_flow_kube_api(cluster_name, machine_net, cluster_deployment, agent_cluster_install):
    tf_folder = utils.get_tf_folder(cluster_name, args.namespace)
    baremetal_template = os.path.join(tf_folder, consts.Platforms.BARE_METAL)

    nodes_details = utils.get_tfvars(baremetal_template)
    tf = terraform_utils.TerraformUtils(working_dir=baremetal_template)
    is_ipv4 = machine_net.has_ip_v4 or not machine_net.has_ip_v6
    nodes_number = args.master_count + args.number_of_workers

    create_nodes_and_wait_till_registered(
        inventory_client=None,
        cluster=None,
        nodes_details=nodes_details,
        tf=tf,
        is_ipv4=is_ipv4,
        nodes_number=nodes_number,
        cluster_deployment=cluster_deployment,
    )

    if args.master_count == 1:
        set_single_node_ip(
            client=None,
            cluster_id=None,
            main_cidr=args.vm_network_cidr if is_ipv4 else args.vm_network_cidr6,
            is_ipv4=is_ipv4,
            cluster_deployment=cluster_deployment,
            tf=tf,
        )
    else:
        log.info("VIPs already configured")

    kubeapi_utils.set_agents_hostnames(
        cluster_deployment=cluster_deployment,
        is_ipv4=is_ipv4,
        static_network_mode=args.with_static_network_config,
        tf=tf,
        nodes_number=nodes_number,
    )

    if args.install_cluster:
        install_cluster.run_installation_flow_kube_api(
            cluster_deployment=cluster_deployment,
            agent_cluster_install=agent_cluster_install,
            nodes_number=nodes_number,
            kubeconfig_path=utils.get_kubeconfig_path(cluster_name)
        )
    def kubeconfig_path(self) -> str:
        if self._kubeconfig_path == "":
            log.info(
                f"Downloading kubeconfig for HyperShift cluster {self.name}")
            kubeconfig_data = (Secret(
                kube_api_client=self.management_kube_api_client,
                namespace=f"clusters-{self.name}",
                name="admin-kubeconfig",
            ).get().data["kubeconfig"])
            hypershift_kubeconfig_path = utils.get_kubeconfig_path(
                self.name) + "-hypershift"

            log.info(f"Kubeconfig path {hypershift_kubeconfig_path}")
            with open(hypershift_kubeconfig_path, "wt") as kubeconfig_file:
                kubeconfig_file.write(b64decode(kubeconfig_data).decode())
                kubeconfig_file.flush()
            self._kubeconfig_path = hypershift_kubeconfig_path
        return self._kubeconfig_path
Example #5
0
    def start_install_and_wait_for_installed(self):
        cluster_name = self.config.day1_cluster_name
        # Running twice as a workaround for an issue with terraform not spawning a new node on first apply.
        for _ in range(2):
            with utils.file_lock_context():
                utils.run_command(
                    f"make _apply_terraform CLUSTER_NAME={cluster_name} PLATFORM={consts.Platforms.BARE_METAL}"
                )
        time.sleep(5)

        num_nodes_to_wait = self.config.day2_workers_count
        installed_status = consts.NodesStatus.DAY2_INSTALLED

        tfvars = utils.get_tfvars(self.config.tf_folder)
        tf_network_name = tfvars["libvirt_network_name"]

        config = TerraformConfig()
        config.nodes_count = num_nodes_to_wait
        libvirt_controller = LibvirtController(config=config,
                                               entity_config=ClusterConfig())
        libvirt_controller.wait_till_nodes_are_ready(
            network_name=tf_network_name)

        # Wait for day2 nodes
        waiting.wait(
            lambda: self.are_libvirt_nodes_in_cluster_hosts(),
            timeout_seconds=consts.NODES_REGISTERED_TIMEOUT,
            sleep_seconds=10,
            waiting_for="Nodes to be registered in inventory service",
        )
        self.set_nodes_hostnames_if_needed(tf_network_name)
        wait_till_all_hosts_are_in_status(
            client=self.api_client,
            cluster_id=self.config.cluster_id,
            nodes_count=self.config.day2_workers_count,
            statuses=[consts.NodesStatus.KNOWN],
            interval=30,
        )

        # Start day2 nodes installation
        log.info("Start installing all known nodes in the cluster %s",
                 self.config.cluster_id)
        kubeconfig = utils.get_kubeconfig_path(self.config.day1_cluster_name)
        ocp_ready_nodes = self.get_ocp_cluster_ready_nodes_num(kubeconfig)
        hosts = self.api_client.get_cluster_hosts(self.config.cluster_id)
        [
            self.api_client.install_day2_host(self.config.infra_env_id,
                                              host["id"]) for host in hosts
            if host["status"] == "known"
        ]

        log.info(
            "Waiting until all nodes of cluster %s have been installed (reached added-to-existing-cluster)",
            self.config.cluster_id,
        )
        wait_till_all_hosts_are_in_status(
            client=self.api_client,
            cluster_id=self.config.cluster_id,
            nodes_count=num_nodes_to_wait,
            statuses=[installed_status],
            interval=30,
        )

        log.info(
            "Waiting until installed nodes has actually been added to the OCP cluster"
        )
        waiting.wait(
            lambda: self.wait_nodes_join_ocp_cluster(
                ocp_ready_nodes, self.config.day2_workers_count, kubeconfig),
            timeout_seconds=consts.NODES_REGISTERED_TIMEOUT,
            sleep_seconds=30,
            waiting_for="Day2 nodes to be added to OCP cluster",
            expected_exceptions=Exception,
        )
        log.info("%d worker nodes were successfully added to OCP cluster",
                 self.config.day2_workers_count)
Example #6
0
def day2_nodes_flow(client, terraform_cluster_dir_prefix, tf_folder, cluster,
                    has_ipv_6, num_worker_nodes, install_cluster_flag,
                    day2_type_flag, with_static_network_config,
                    base_cluster_name):
    tf_network_name, total_num_nodes = get_network_num_nodes_from_tf(tf_folder)

    # Running twice as a workaround for an issue with terraform not spawning a new node on first apply.
    for _ in range(2):
        with utils.file_lock_context():
            utils.run_command(
                f'make _apply_terraform CLUSTER_NAME={terraform_cluster_dir_prefix} PLATFORM={consts.Platforms.BARE_METAL}'
            )
    time.sleep(5)

    if day2_type_flag == "ocp":
        num_nodes_to_wait = total_num_nodes
        installed_status = consts.NodesStatus.INSTALLED
    else:
        num_nodes_to_wait = num_worker_nodes
        installed_status = consts.NodesStatus.DAY2_INSTALLED

    wait_till_nodes_are_ready(nodes_count=num_nodes_to_wait,
                              network_name=tf_network_name)

    waiting.wait(
        lambda: are_libvirt_nodes_in_cluster_hosts(client, cluster.id,
                                                   num_nodes_to_wait),
        timeout_seconds=consts.NODES_REGISTERED_TIMEOUT,
        sleep_seconds=10,
        waiting_for="Nodes to be registered in inventory service",
    )

    set_nodes_hostnames_if_needed(client, tf_folder,
                                  with_static_network_config, has_ipv_6,
                                  tf_network_name, cluster.id)

    wait_till_all_hosts_are_in_status(
        client=client,
        cluster_id=cluster.id,
        nodes_count=num_worker_nodes,
        statuses=[consts.NodesStatus.KNOWN],
        interval=30,
    )

    if install_cluster_flag:
        log.info("Start installing all known nodes in the cluster %s",
                 cluster.id)
        kubeconfig = utils.get_kubeconfig_path(base_cluster_name)
        ocp_orig_ready_nodes = get_ocp_cluster_ready_nodes_num(kubeconfig)
        hosts = client.get_cluster_hosts(cluster.id)
        [
            client.install_day2_host(cluster.id, host['id']) for host in hosts
            if host["status"] == 'known'
        ]

        log.info(
            "Start waiting until all nodes of cluster %s have been installed( reached added-to-existing-clustertate)",
            cluster.id)
        wait_till_all_hosts_are_in_status(
            client=client,
            cluster_id=cluster.id,
            nodes_count=num_nodes_to_wait,
            statuses=[installed_status],
            interval=30,
        )

        log.info(
            "Start waiting until installed nodes has actually been added to the OCP cluster"
        )
        waiting.wait(lambda: wait_nodes_join_ocp_cluster(
            ocp_orig_ready_nodes, num_worker_nodes, day2_type_flag, kubeconfig
        ),
                     timeout_seconds=consts.NODES_REGISTERED_TIMEOUT,
                     sleep_seconds=30,
                     waiting_for="Day2 nodes to be added to OCP cluster",
                     expected_exceptions=Exception)
        log.info("%d worker nodes were successfully added to OCP cluster",
                 num_worker_nodes)
def nodes_flow(
        client,
        cluster_name,
        cluster,
        machine_net,
        cluster_deployment=None,
        agent_cluster_install=None,
):
    tf_folder = utils.get_tf_folder(cluster_name, args.namespace)
    nodes_details = utils.get_tfvars(tf_folder)
    if cluster:
        nodes_details["cluster_inventory_id"] = cluster.id
        utils.set_tfvars(tf_folder, nodes_details)

    tf = terraform_utils.TerraformUtils(working_dir=tf_folder)
    is_ipv4 = machine_net.has_ip_v4 or not machine_net.has_ip_v6
    nodes_number = args.master_count + args.number_of_workers

    create_nodes_and_wait_till_registered(
        inventory_client=client,
        cluster=cluster,
        nodes_details=nodes_details,
        tf=tf,
        is_ipv4=is_ipv4,
        nodes_number=nodes_number,
        cluster_deployment=cluster_deployment,
    )

    main_cidr = args.vm_network_cidr if is_ipv4 else args.vm_network_cidr6
    secondary_cidr = machine_net.provisioning_cidr_v4 if is_ipv4 else machine_net.provisioning_cidr_v6

    if client:
        cluster_info = client.cluster_get(cluster.id)
        macs = get_libvirt_nodes_macs(nodes_details["libvirt_network_name"])
        if is_none_platform_mode():
            macs += get_libvirt_nodes_macs(nodes_details["libvirt_secondary_network_name"])

        if not (cluster_info.api_vip and cluster_info.ingress_vip):
            if not args.kube_api:
                wait_till_hosts_with_macs_are_in_status(
                    client=client,
                    cluster_id=cluster.id,
                    macs=macs,
                    statuses=[
                        consts.NodesStatus.INSUFFICIENT,
                        consts.NodesStatus.PENDING_FOR_INPUT,
                        consts.NodesStatus.KNOWN
                    ],
                )

            if args.master_count == 1:
                set_single_node_ip(
                    client=client,
                    cluster_id=cluster.id,
                    main_cidr=main_cidr,
                    is_ipv4=is_ipv4,
                    cluster_deployment=cluster_deployment,
                    tf=tf,
                )
                if not args.kube_api:
                    set_cluster_machine_cidr(
                        client=client,
                        cluster_id=cluster.id,
                        machine_net=machine_net,
                        set_vip_dhcp_allocation=False,
                    )
            elif is_none_platform_mode():
                pass
            elif args.vip_dhcp_allocation and not args.kube_api:
                set_cluster_machine_cidr(client, cluster.id, machine_net)
            else:
                set_cluster_vips(client, cluster.id, machine_net)
        else:
            log.info("VIPs already configured")

        if args.kube_api:
            kubeapi_utils.set_agents_hostnames(
                cluster_deployment=cluster_deployment,
                is_ipv4=is_ipv4,
                static_network_mode=args.with_static_network_config,
                tf=tf,
                nodes_number=nodes_number,
            )
        else:
            set_hosts_roles(
                client=client,
                cluster=cluster,
                nodes_details=nodes_details,
                machine_net=machine_net,
                tf=tf,
                master_count=args.master_count,
                static_network_mode=args.with_static_network_config,
            )

        if is_none_platform_mode() and args.master_count > 1:
            master_ips = helper_cluster.Cluster.get_master_ips(client, cluster.id,
                                                               main_cidr) + helper_cluster.Cluster.get_master_ips(
                client, cluster.id, secondary_cidr)
            worker_ips = helper_cluster.Cluster.get_worker_ips(client, cluster.id,
                                                               main_cidr) + helper_cluster.Cluster.get_worker_ips(
                client, cluster.id, secondary_cidr)
            if not worker_ips:
                worker_ips = master_ips
            load_balancer_ip = _get_host_ip_from_cidr(
                machine_net.cidr_v6 if machine_net.has_ip_v6 and not machine_net.has_ip_v4 else machine_net.cidr_v4)
            lb_controller = LoadBalancerController(tf)
            lb_controller.set_load_balancing_config(load_balancer_ip, master_ips, worker_ips)

        if not args.kube_api:
            wait_till_hosts_with_macs_are_in_status(
                client=client,
                cluster_id=cluster.id,
                macs=macs,
                statuses=[consts.NodesStatus.KNOWN],
            )

            if args.vip_dhcp_allocation:
                vips_info = helper_cluster.Cluster.get_vips_from_cluster(client, cluster.id)
                tf.set_new_vips(api_vip=vips_info["api_vip"], ingress_vip=vips_info["ingress_vip"])

        if args.install_cluster:
            install_cluster.run_install_flow(
                client=client,
                cluster_id=cluster.id,
                kubeconfig_path=utils.get_kubeconfig_path(cluster_name),
                pull_secret=args.pull_secret,
                tf=tf,
                cluster_deployment=cluster_deployment,
                agent_cluster_install=agent_cluster_install,
                nodes_number=nodes_number,
            )
            # Validate DNS domains resolvability
            validate_dns(client, cluster.id)