def nodes_flow(client, cluster_name, cluster): nodes_details = _create_node_details(cluster_name) if cluster: nodes_details["cluster_inventory_id"] = cluster.id create_nodes_and_wait_till_registered(inventory_client=client, cluster=cluster, image_path=args.image or consts.IMAGE_PATH, storage_path=args.storage_path, master_count=args.master_count, nodes_details=nodes_details) if client: nodes_count = args.master_count + args.number_of_workers utils.wait_till_all_hosts_are_in_status(client=client, cluster_id=cluster.id, nodes_count=nodes_count, statuses=[consts.NodesStatus.INSUFFICIENT]) set_cluster_vips(client, cluster.id) set_hosts_roles(client, cluster.id) utils.wait_till_all_hosts_are_in_status(client=client, cluster_id=cluster.id, nodes_count=nodes_count, statuses=[consts.NodesStatus.KNOWN]) log.info("Printing after setting roles") pprint.pprint(client.get_cluster_hosts(cluster.id)) if args.install_cluster: time.sleep(10) install_cluster.run_install_flow(client=client, cluster_id=cluster.id, kubeconfig_path=consts.DEFAULT_CLUSTER_KUBECONFIG_PATH, pull_secret=args.pull_secret)
def _install_cluster(client, cluster): cluster = client.install_cluster(cluster_id=cluster.id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLING], interval=30)
def wait_until_hosts_are_discovered(cluster_id, api_client): utils.wait_till_all_hosts_are_in_status( client=api_client, cluster_id=cluster_id, nodes_count=env_variables['NUM_NODES'], statuses=[ consts.NodesStatus.PENDING_FOR_INPUT, consts.NodesStatus.KNOWN ])
def day2_nodes_flow(client, cluster_name, cluster, image_path, num_worker_nodes, api_vip_ip, api_vip_dnsname, namespace, install_cluster_flag): tf_network_name, total_num_nodes = apply_day2_tf_configuration( cluster_name, num_worker_nodes, api_vip_ip, api_vip_dnsname, namespace) with utils.file_lock_context(): utils.run_command(f'make _apply_terraform CLUSTER_NAME={cluster_name}') time.sleep(5) utils.wait_till_nodes_are_ready(nodes_count=total_num_nodes, network_name=tf_network_name) waiting.wait( lambda: utils.are_libvirt_nodes_in_cluster_hosts( client, cluster.id, num_worker_nodes), timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=10, waiting_for="Nodes to be registered in inventory service", ) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_worker_nodes, statuses=[consts.NodesStatus.KNOWN], interval=30, ) if install_cluster_flag: log.info("Start installing all known nodes in the cluster %s", cluster.id) config_etc_hosts(api_vip_ip, api_vip_dnsname) ocp_orig_ready_nodes = get_ocp_cluster_ready_nodes_num() client.install_day2_cluster(cluster.id) log.info( "Start waiting until all nodes of cluster %s have been installed( reached added-to-existing-clustertate)", cluster.id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_worker_nodes, statuses=[consts.NodesStatus.DAY2_INSTALLED], interval=30, ) log.info( "Start waiting until installed nodes has actually been added to the OCP cluster" ) waiting.wait( lambda: wait_nodes_join_ocp_cluster(ocp_orig_ready_nodes, num_worker_nodes), timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=30, waiting_for="Day2 nodes to be added to OCP cluster", ) log.info("%d worker nodes were successfully added to OCP cluster", num_worker_nodes)
def wait_for_nodes_to_install(cluster_id, api_client, timeout=consts.CLUSTER_INSTALLATION_TIMEOUT): utils.wait_till_all_hosts_are_in_status( client=api_client, cluster_id=cluster_id, statuses=[consts.ClusterStatus.INSTALLED], nodes_count=env_variables['NUM_NODES'], timeout=timeout, )
def wait_till_installed(client, cluster, timeout=60 * 60 * 2): log.info("Waiting %s till cluster finished installation", timeout) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLED], timeout=timeout, interval=60) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, statuses=[consts.ClusterStatus.INSTALLED])
def run_install_flow(client, cluster_id, kubeconfig_path): print("Verifying cluster exists") client.cluster_get(cluster_id) print("Install cluster", cluster_id) cluster = client.install_cluster(cluster_id=cluster_id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster_id, nodes_count=len(cluster.hosts), status=consts.NodesStatus.INSTALLING) print("Download kubeconfig") client.download_kubeconfig(cluster_id=cluster_id, kubeconfig_path=kubeconfig_path)
def create_nodes_and_wait_till_registered(inventory_client, cluster, image_path, storage_path, master_count, nodes_details): nodes_count = master_count + nodes_details["worker_count"] create_nodes(image_path, storage_path=storage_path, master_count=master_count, nodes_details=nodes_details) utils.wait_till_nodes_are_ready(nodes_count=nodes_count, cluster_name=nodes_details["cluster_name"]) if not inventory_client: log.info("No inventory url, will not wait till nodes registration") return log.info("Wait till nodes will be registered") waiting.wait(lambda: len(inventory_client.get_cluster_hosts(cluster.id)) >= nodes_count, timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=5, waiting_for="Nodes to be registered in inventory service") log.info("Registered nodes are:") pprint.pprint(inventory_client.get_cluster_hosts(cluster.id)) utils.wait_till_all_hosts_are_in_status(client=inventory_client, cluster_id=cluster.id, nodes_count=nodes_count, status=consts.NodesStatus.KNOWN)
def wait_till_installed(client, cluster, timeout=60 * 60 * 2): log.info("Waiting %s till cluster finished installation", timeout) # TODO: Change host validation for only previous known hosts utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLED], timeout=timeout, interval=60, ) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, statuses=[consts.ClusterStatus.INSTALLED], timeout=consts.CLUSTER_INSTALLATION_TIMEOUT, )
def _install_cluster(client, cluster): cluster = client.install_cluster(cluster_id=cluster.id) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, timeout=consts.START_CLUSTER_INSTALLATION_TIMEOUT, statuses=[consts.ClusterStatus.INSTALLING], ) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[ consts.NodesStatus.INSTALLING, consts.NodesStatus.INSTALLING_IN_PROGRESS, ], interval=30, )
def nodes_flow(client, cluster_name, cluster): nodes_details = _create_node_details(cluster_name) if cluster: nodes_details["cluster_inventory_id"] = cluster.id create_nodes_and_wait_till_registered(inventory_client=client, cluster=cluster, image_path=args.image or consts.IMAGE_PATH, storage_path=args.storage_path, master_count=args.master_count, nodes_details=nodes_details) if client: set_hosts_roles(client, cluster.id) nodes_count = args.master_count + args.number_of_workers utils.wait_till_all_hosts_are_in_status(client=client, cluster_id=cluster.id, nodes_count=nodes_count, status=consts.NodesStatus.KNOWN) log.info("Printing after setting roles") pprint.pprint(client.get_cluster_hosts(cluster.id)) if args.install_cluster: install_cluster.run_install_flow(client, cluster.id, consts.DEFAULT_CLUSTER_KUBECONFIG_PATH)
def wait_till_installed(client, cluster, timeout=60 * 60 * 2): log.info("Waiting %s till cluster finished installation", timeout) # TODO: Change host validation for only previous known hosts try: utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=len(cluster.hosts), statuses=[consts.NodesStatus.INSTALLED], timeout=timeout, interval=60, ) utils.wait_till_cluster_is_in_status( client=client, cluster_id=cluster.id, statuses=[consts.ClusterStatus.INSTALLED], timeout=consts.CLUSTER_INSTALLATION_TIMEOUT, ) finally: output_folder = f'build/{cluster.id}' utils.recreate_folder(output_folder) download_logs_from_all_hosts(client=client, cluster_id=cluster.id, output_folder=output_folder)
def day2_nodes_flow(client, cluster_name, cluster, image_path, num_worker_nodes, api_vip_ip, api_vip_dnsname, namespace, install_cluster_flag): tf_network_name, total_num_nodes = apply_day2_tf_configuration( cluster_name, num_worker_nodes, api_vip_ip, api_vip_dnsname, namespace) with utils.file_lock_context(): utils.run_command(f'make _apply_terraform CLUSTER_NAME={cluster_name}') time.sleep(5) utils.wait_till_nodes_are_ready(nodes_count=total_num_nodes, network_name=tf_network_name) waiting.wait( lambda: utils.are_libvirt_nodes_in_cluster_hosts( client, cluster.id, num_worker_nodes), timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=10, waiting_for="Nodes to be registered in inventory service", ) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_worker_nodes, statuses=[consts.NodesStatus.KNOWN], interval=30, ) if install_cluster_flag: client.install_day2_cluster(cluster.id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_worker_nodes, statuses=[consts.NodesStatus.DAY2_INSTALLED], interval=30, )
def is_hosts_in_wrong_boot_order(cluster_id, api_client): return utils.wait_till_all_hosts_are_in_status()