Exemple #1
0
 def _add_flannel_cni(self):
     template_file = os.path.join(self.capz_flannel_dir,
                                  "flannel/kube-flannel.yaml.j2")
     context = {
         "cluster_network_subnet": self.deployer.cluster_network_subnet,
         "flannel_mode": self.opts.flannel_mode
     }
     kube_flannel = "/tmp/kube-flannel.yaml"
     utils.render_template(template_file, kube_flannel, context)
     server_core_tag = "windowsservercore-%s" % (
         self.opts.base_container_image_tag)
     mode = "overlay"
     if self.opts.flannel_mode == constants.FLANNEL_MODE_L2BRIDGE:
         mode = "l2bridge"
     context = {
         "server_core_tag": server_core_tag,
         "container_runtime": self.opts.container_runtime,
         "mode": mode
     }
     kube_flannel_windows = "/tmp/kube-flannel-windows.yaml"
     searchpath = os.path.join(self.capz_flannel_dir, "flannel")
     utils.render_template("kube-flannel-windows.yaml.j2",
                           kube_flannel_windows, context, searchpath)
     cmd = [self.kubectl, "apply", "-f", kube_flannel]
     utils.retry_on_error()(utils.run_shell_cmd)(cmd)
     cmd = [self.kubectl, "apply", "-f", kube_flannel_windows]
     utils.retry_on_error()(utils.run_shell_cmd)(cmd)
     if self.opts.flannel_mode == constants.FLANNEL_MODE_OVERLAY:
         self._set_vxlan_devices_mtu()
Exemple #2
0
 def _create_bootstrap_azure_vm(self):
     self.logging.info("Setting up the bootstrap Azure VM")
     vm_nic = self._create_bootstrap_vm_nic()
     vm_parameters = {
         "location": self.azure_location,
         "os_profile": {
             "computer_name": self.bootstrap_vm_name,
             "admin_username": "******",
             "linux_configuration": {
                 "disable_password_authentication": True,
                 "ssh": {
                     "public_keys": [{
                         "key_data":
                         os.environ["AZURE_SSH_PUBLIC_KEY"],
                         "path":
                         "/home/capi/.ssh/authorized_keys"
                     }]
                 }
             }
         },
         "hardware_profile": {
             "vm_size": self.bootstrap_vm_size
         },
         "storage_profile": {
             "image_reference": {
                 "publisher": "Canonical",
                 "offer": "0001-com-ubuntu-server-focal",
                 "sku": "20_04-lts-gen2",
                 "version": "latest"
             },
         },
         "network_profile": {
             "network_interfaces": [{
                 "id": vm_nic.id
             }]
         }
     }
     self.logging.info("Creating bootstrap VM")
     vm = utils.retry_on_error()(
         self.compute_client.virtual_machines.begin_create_or_update)(
             self.cluster_name, self.bootstrap_vm_name,
             vm_parameters).result()
     vm = self._wait_for_bootstrap_vm()
     ip_config = utils.retry_on_error()(
         self.network_client.network_interfaces.get)(
             self.cluster_name, vm_nic.name).ip_configurations[0]
     bootstrap_vm_private_ip = ip_config.private_ip_address
     public_ip = utils.retry_on_error()(
         self.network_client.public_ip_addresses.get)(
             self.cluster_name, self.bootstrap_vm_public_ip_name)
     bootstrap_vm_public_ip = public_ip.ip_address
     self.logging.info("Waiting for bootstrap VM SSH port to be reachable")
     utils.wait_for_port_connectivity(bootstrap_vm_public_ip, 22)
     self.logging.info("Finished setting up the bootstrap VM")
     return {
         'private_ip': bootstrap_vm_private_ip,
         'public_ip': bootstrap_vm_public_ip,
         'vm': vm,
     }
Exemple #3
0
 def _set_vxlan_devices_mtu(self):
     self.logging.info(
         "Set the proper MTU for the k8s master vxlan devices")
     ssh_key_path = (os.environ.get("SSH_KEY")
                     or os.path.join(os.environ.get("HOME"), ".ssh/id_rsa"))
     utils.retry_on_error()(utils.run_shell_cmd)([
         "ssh", "-o", "StrictHostKeyChecking=no", "-o",
         "UserKnownHostsFile=/dev/null", "-i", ssh_key_path,
         "capi@%s" % self.deployer.master_public_address,
         "'sudo bash -s' < %s" % os.path.join(
             self.e2e_runner_dir, "scripts/set-vxlan-devices-mtu.sh")
     ])
Exemple #4
0
 def _get_agents_private_addresses(self, operating_system):
     cmd = [
         self.kubectl, "get", "nodes", "--kubeconfig",
         self.capz_kubeconfig_path, "-o", "yaml"
     ]
     output, _ = utils.retry_on_error()(utils.run_shell_cmd)(cmd,
                                                             sensitive=True)
     addresses = []
     nodes = yaml.safe_load(output)
     for node in nodes['items']:
         node_os = node['status']['nodeInfo']['operatingSystem']
         if node_os != operating_system:
             continue
         try:
             node_addresses = [
                 n['address'] for n in node['status']['addresses']
                 if n['type'] == 'InternalIP'
             ]
         except Exception as ex:
             self.logging.warning(
                 "Cannot find private address for node %s. Exception "
                 "details: %s. Skipping", node["metadata"]["name"], ex)
             continue
         # pick the first node internal address
         addresses.append(node_addresses[0])
     return addresses
Exemple #5
0
 def enable_ip_forwarding(self):
     self.logging.info("Enabling IP forwarding for the cluster VMs")
     vm_nics = utils.retry_on_error()(
         self.network_client.network_interfaces.list)(self.cluster_name)
     for nic in vm_nics:
         if nic.name == self.bootstrap_vm_nic_name:
             continue
         if nic.enable_ip_forwarding:
             self.logging.info("IP forwarding is already enabled on nic %s",
                               nic.name)
             continue
         self.logging.info("Enabling IP forwarding on nic %s", nic.name)
         nic_parameters = nic.as_dict()
         nic_parameters["enable_ip_forwarding"] = True
         utils.retry_on_error()(
             self.network_client.network_interfaces.begin_create_or_update)(
                 self.cluster_name, nic.name, nic_parameters).result()
Exemple #6
0
 def _add_kube_proxy_windows(self):
     template_file = os.path.join(self.capz_flannel_dir,
                                  "kube-proxy/kube-proxy-windows.yaml.j2")
     server_core_tag = "windowsservercore-%s" % (
         self.opts.base_container_image_tag)
     enable_ipv6dualstack = str(self.opts.enable_ipv6dualstack).lower()
     context = {
         "kubernetes_version": self.kubernetes_version,
         "server_core_tag": server_core_tag,
         "enable_win_dsr": str(self.opts.enable_win_dsr).lower(),
         "enable_ipv6dualstack": enable_ipv6dualstack,
         "flannel_mode": self.opts.flannel_mode
     }
     output_file = "/tmp/kube-proxy-windows.yaml"
     utils.render_template(template_file, output_file, context)
     cmd = [self.kubectl, "apply", "-f", output_file]
     utils.retry_on_error()(utils.run_shell_cmd)(cmd)
Exemple #7
0
 def master_public_address(self):
     cmd = [
         self.kubectl, "get", "cluster", "--kubeconfig",
         self.mgmt_kubeconfig_path, self.cluster_name, "-o",
         "custom-columns=MASTER_ADDRESS:.spec.controlPlaneEndpoint.host",
         "--no-headers"
     ]
     output, _ = utils.retry_on_error()(utils.run_shell_cmd)(cmd)
     return output.decode().strip()
Exemple #8
0
 def _create_bootstrap_vm_public_ip(self):
     self.logging.info("Creating bootstrap VM public IP")
     public_ip_parameters = {
         "location": self.azure_location,
         "public_ip_address_version": "IPV4"
     }
     return utils.retry_on_error()(
         self.network_client.public_ip_addresses.begin_create_or_update)(
             self.cluster_name, self.bootstrap_vm_public_ip_name,
             public_ip_parameters).result()
Exemple #9
0
 def connect_agents_to_controlplane_subnet(self):
     self.logging.info("Connecting agents VMs to the control-plane subnet")
     control_plane_subnet = utils.retry_on_error()(
         self.network_client.subnets.get)(
             self.cluster_name, "{}-vnet".format(self.cluster_name),
             "{}-controlplane-subnet".format(self.cluster_name))
     subnet_id = control_plane_subnet.id
     for vm in self._get_agents_vms():
         self.logging.info("Connecting VM {}".format(vm.name))
         nic_id = vm.network_profile.network_interfaces[0].id
         vm_nic = self._get_vm_nic(nic_id)
         nic_address = vm_nic.ip_configurations[0].private_ip_address
         route = self._get_vm_route(nic_address)
         self.logging.info("Shutting down VM")
         utils.retry_on_error()(
             self.compute_client.virtual_machines.begin_deallocate)(
                 self.cluster_name, vm.name).wait()
         self.logging.info("Updating VM NIC subnet")
         nic_parameters = vm_nic.as_dict()
         nic_model = net_models.NetworkInterface(**nic_parameters)
         nic_model.ip_configurations[0]['subnet']['id'] = subnet_id
         utils.retry_on_error()(
             self.network_client.network_interfaces.begin_create_or_update)(
                 self.cluster_name, vm_nic.name, nic_model).wait()
         self.logging.info("Starting VM")
         utils.retry_on_error()(
             self.compute_client.virtual_machines.begin_start)(
                 self.cluster_name, vm.name).wait()
         self.logging.info("Updating the node routetable")
         route_params = route.as_dict()
         vm_nic = self._get_vm_nic(nic_id)  # Refresh NIC info
         nic_address = vm_nic.ip_configurations[0].private_ip_address
         route_params["next_hop_ip_address"] = nic_address
         utils.retry_on_error()(
             self.network_client.routes.begin_create_or_update)(
                 self.cluster_name,
                 "{}-node-routetable".format(self.cluster_name), route.name,
                 route_params).wait()
         self.logging.info(
             "Waiting until VM address is refreshed in the CAPZ cluster")
         for attempt in Retrying(stop=stop_after_delay(10 * 60),
                                 wait=wait_exponential(max=30),
                                 reraise=True):
             with attempt:
                 addresses = self._get_agents_private_addresses("windows")
                 assert nic_address in addresses
Exemple #10
0
 def _create_capz_cluster(self):
     bootstrap_vm_address = "{}:8081".format(self.bootstrap_vm_private_ip)
     context = {
         "cluster_name": self.cluster_name,
         "cluster_network_subnet": self.cluster_network_subnet,
         "azure_location": self.azure_location,
         "azure_subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
         "azure_tenant_id": os.environ["AZURE_TENANT_ID"],
         "azure_client_id": os.environ["AZURE_CLIENT_ID"],
         "azure_client_secret": os.environ["AZURE_CLIENT_SECRET"],
         "azure_ssh_public_key": os.environ["AZURE_SSH_PUBLIC_KEY"],
         "azure_ssh_public_key_b64": os.environ["AZURE_SSH_PUBLIC_KEY_B64"],
         "master_vm_size": self.master_vm_size,
         "win_minion_count": self.win_minion_count,
         "win_minion_size": self.win_minion_size,
         "win_minion_image_type": self.win_minion_image_type,
         "bootstrap_vm_address": bootstrap_vm_address,
         "ci_version": self.ci_version,
         "flannel_mode": self.flannel_mode,
         "container_runtime": self.container_runtime,
         "k8s_bins": "k8sbins" in self.bins_built,
         "sdn_cni_bins": "sdncnibins" in self.bins_built,
         "containerd_bins": "containerdbins" in self.bins_built,
         "containerd_shim_bins": "containerdshim" in self.bins_built,
     }
     if self.win_minion_image_type == constants.SHARED_IMAGE_GALLERY_TYPE:
         parsed = self._parse_win_minion_image_gallery()
         context["win_minion_image_rg"] = parsed["resource_group"]
         context["win_minion_image_gallery"] = parsed["gallery_name"]
         context["win_minion_image_definition"] = parsed["image_definition"]
         context["win_minion_image_version"] = parsed["image_version"]
     elif self.win_minion_image_type == constants.MANAGED_IMAGE_TYPE:
         context["win_minion_image_id"] = self.win_minion_image_id
     self.logging.info("Create CAPZ cluster")
     output_file = "/tmp/capz-cluster.yaml"
     utils.render_template("cluster.yaml.j2", output_file, context,
                           self.capz_dir)
     utils.retry_on_error()(utils.run_shell_cmd)([
         self.kubectl, "apply", "--kubeconfig", self.mgmt_kubeconfig_path,
         "-f", output_file
     ])
Exemple #11
0
 def _prepull_images(self, timeout=3600):
     prepull_yaml_path = "/tmp/prepull-windows-images.yaml"
     utils.download_file(self.opts.prepull_yaml, prepull_yaml_path)
     self.logging.info("Starting Windows images pre-pull")
     utils.retry_on_error()(utils.run_shell_cmd)(
         [self.kubectl, "apply", "-f", prepull_yaml_path])
     self.logging.info(
         "Waiting up to %.2f minutes to pre-pull Windows container images",
         timeout / 60.0)
     cmd = [self.kubectl, "get", "-o", "yaml", "-f", prepull_yaml_path]
     for attempt in Retrying(stop=stop_after_delay(timeout),
                             wait=wait_exponential(max=30),
                             retry=retry_if_exception_type(AssertionError),
                             reraise=True):
         with attempt:
             output, _ = utils.run_shell_cmd(cmd, sensitive=True)
             ds = yaml.safe_load(output.decode())
             ready_nr = ds["status"]["numberReady"]
             desired_ready_nr = ds["status"]["desiredNumberScheduled"]
             assert ready_nr == desired_ready_nr
     self.logging.info("Windows images successfully pre-pulled")
     self.logging.info("Cleaning up")
     utils.retry_on_error()(utils.run_shell_cmd)(
         [self.kubectl, "delete", "--wait", "-f", prepull_yaml_path])
Exemple #12
0
 def _create_bootstrap_vm_nic(self):
     self.logging.info("Creating bootstrap VM NIC")
     public_ip = self._create_bootstrap_vm_public_ip()
     control_plane_subnet = utils.retry_on_error()(
         self.network_client.subnets.get)(
             self.cluster_name, "%s-vnet" % self.cluster_name,
             "%s-controlplane-subnet" % self.cluster_name)
     nic_parameters = {
         "location":
         self.azure_location,
         "ip_configurations": [{
             "name": "%s-ipconfig" % self.bootstrap_vm_nic_name,
             "subnet": {
                 "id": control_plane_subnet.id
             },
             "public_ip_address": {
                 "id": public_ip.id
             }
         }]
     }
     return utils.retry_on_error()(
         self.network_client.network_interfaces.begin_create_or_update)(
             self.cluster_name, self.bootstrap_vm_nic_name,
             nic_parameters).result()
Exemple #13
0
 def _create_resource_group(self):
     self.logging.info("Creating Azure resource group")
     resource_group_params = {
         'location': self.azure_location,
         'tags': self.resource_group_tags,
     }
     self.resource_mgmt_client.resource_groups.create_or_update(
         self.cluster_name, resource_group_params)
     for attempt in Retrying(stop=stop_after_delay(600),
                             wait=wait_exponential(max=30),
                             retry=retry_if_exception_type(AssertionError),
                             reraise=True):
         with attempt:
             rg = utils.retry_on_error()(
                 self.resource_mgmt_client.resource_groups.get)(
                     self.cluster_name)
             assert rg.properties.provisioning_state == "Succeeded"
Exemple #14
0
 def _create_node_subnet(self):
     self.logging.info("Creating Azure vNET node subnet")
     nsg = self._create_node_secgroup()
     route_table = self._node_route_table
     subnet_params = {
         "address_prefix": self.node_subnet_cidr_block,
         "network_security_group": {
             "id": nsg.id
         },
         "route_table": {
             "id": route_table.id
         },
     }
     return utils.retry_on_error()(
         self.network_client.subnets.begin_create_or_update)(
             self.cluster_name, "{}-vnet".format(self.cluster_name),
             "{}-node-subnet".format(self.cluster_name),
             subnet_params).result()
Exemple #15
0
 def _validate_k8s_api_versions(self):
     self.logging.info("Validating K8s API versions")
     output, _ = utils.retry_on_error()(
         utils.run_shell_cmd)([self.kubectl, "get", "nodes", "-o", "yaml"])
     nodes = yaml.safe_load(output.decode())
     for node in nodes["items"]:
         node_name = node["metadata"]["name"]
         node_info = node["status"]["nodeInfo"]
         if node_info["kubeletVersion"] != self.ci_version:
             raise Exception(
                 "Wrong kubelet version on node %s. "
                 "Expected %s, but found %s" %
                 (node_name, self.ci_version, node_info["kubeletVersion"]))
         if node_info["kubeProxyVersion"] != self.ci_version:
             raise Exception(
                 "Wrong kube-proxy version on node %s. "
                 "Expected %s, but found %s" %
                 (node_name, self.ci_version, node_info["kubeletVersion"]))
Exemple #16
0
 def _wait_for_bootstrap_vm(self, timeout=900):
     self.logging.info("Waiting up to %.2f minutes for VM %s to provision",
                       timeout / 60.0, self.bootstrap_vm_name)
     valid_vm_states = ["Creating", "Updating", "Succeeded"]
     for attempt in Retrying(stop=stop_after_delay(timeout),
                             wait=wait_exponential(max=30),
                             retry=retry_if_exception_type(AssertionError),
                             reraise=True):
         with attempt:
             vm = utils.retry_on_error()(
                 self.compute_client.virtual_machines.get)(
                     self.cluster_name, self.bootstrap_vm_name)
             if vm.provisioning_state not in valid_vm_states:
                 err_msg = 'VM "{}" entered invalid state: "{}"'.format(
                     self.bootstrap_vm_name, vm.provisioning_state)
                 self.logging.error(err_msg)
                 raise azure_exceptions.AzureError(err_msg)
             assert vm.provisioning_state == "Succeeded"
     return vm
Exemple #17
0
 def cleanup_bootstrap_vm(self):
     self.logging.info("Cleaning up the bootstrap VM")
     self.logging.info("Deleting bootstrap VM")
     utils.retry_on_error()(
         self.compute_client.virtual_machines.begin_delete)(
             self.cluster_name, self.bootstrap_vm_name).wait()
     self.logging.info("Deleting bootstrap VM NIC")
     utils.retry_on_error()(
         self.network_client.network_interfaces.begin_delete)(
             self.cluster_name, self.bootstrap_vm_nic_name).wait()
     self.logging.info("Deleting bootstrap VM public IP")
     utils.retry_on_error()(
         self.network_client.public_ip_addresses.begin_delete)(
             self.cluster_name, self.bootstrap_vm_public_ip_name).wait()
Exemple #18
0
 def _wait_for_control_plane(self, timeout=2700):
     self.logging.info(
         "Waiting up to %.2f minutes for the control-plane to be ready.",
         timeout / 60.0)
     machines_list_cmd = [
         self.kubectl, "get", "machine", "--kubeconfig",
         self.mgmt_kubeconfig_path,
         "--output=custom-columns=NAME:.metadata.name", "--no-headers"
     ]
     control_plane_name_prefix = "{}-control-plane".format(
         self.cluster_name)
     for attempt in Retrying(stop=stop_after_delay(timeout),
                             wait=wait_exponential(max=30),
                             retry=retry_if_exception_type(AssertionError),
                             reraise=True):
         with attempt:
             output, _ = utils.retry_on_error()(utils.run_shell_cmd)(
                 machines_list_cmd, sensitive=True)
             machines = output.decode().strip().split('\n')
             control_plane_machines = [
                 m for m in machines
                 if m.startswith(control_plane_name_prefix)
             ]
             assert len(control_plane_machines) > 0
             control_plane_ready = True
             for control_plane_machine in control_plane_machines:
                 try:
                     status_phase = self._get_mgmt_capz_machine_phase(
                         control_plane_machine)
                 except Exception:
                     control_plane_ready = False
                     break
                 if status_phase != "Running":
                     control_plane_ready = False
                     break
             assert control_plane_ready
     self.logging.info("Control-plane is ready")
Exemple #19
0
 def _validate_k8s_api_container_images(self):
     self.logging.info("Validating K8s API container images")
     output, _ = utils.retry_on_error()(utils.run_shell_cmd)([
         self.kubectl, "get", "nodes", "-o", "yaml", "-l",
         "kubernetes.io/os=linux"
     ])
     nodes = yaml.safe_load(output.decode())
     images_tag = self.ci_version.replace("+", "_").strip("v")
     name_regex = re.compile(r"^(k8s.gcr.io/kube-.*):v(.*)$")
     for node in nodes["items"]:
         non_ci_images_names = []
         for image in node["status"]["images"]:
             non_ci_images_names += [
                 name for name in image["names"]
                 if (name_regex.match(name)
                     and name_regex.match(name).group(2) != images_tag)
             ]
             if len(non_ci_images_names) > 0:
                 self.logging.error(
                     "Found the following non-CI images %s on the "
                     "node %s.", non_ci_images_names,
                     node["metadata"]["name"])
                 raise Exception("Found non-CI container images on "
                                 "node %s" % node["metadata"]["name"])
Exemple #20
0
 def _wait_for_ready_pods(self):
     self.logging.info("Waiting for all the pods to be ready")
     utils.retry_on_error()(utils.run_shell_cmd)([
         self.kubectl, "wait", "--for=condition=Ready", "--timeout", "30m",
         "pods", "--all", "--all-namespaces"
     ])