def createPod(self, node, pool, label): resource = self.createNamespace(node, pool, restricted_access=True) namespace = resource['namespace'] pod_body = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': label.name }, 'spec': { 'containers': [{ 'name': label.name, 'image': label.image, 'imagePullPolicy': label.image_pull, 'command': ["/bin/bash", "-c", "--"], 'args': ["while true; do sleep 30; done;"], 'workingDir': '/tmp' }] }, 'restartPolicy': 'Never', } self.k8s_client.create_namespaced_pod(namespace, pod_body) for retry in range(300): pod = self.k8s_client.read_namespaced_pod(label.name, namespace) if pod.status.phase == "Running": break self.log.debug("%s: pod status is %s", namespace, pod.status.phase) time.sleep(1) if retry == 299: raise exceptions.LaunchNodepoolException( "%s: pod failed to initialize (%s)" % (namespace, pod.status.phase)) resource["pod"] = label.name return resource
def waitForPod(self, project, pod_name): for retry in range(300): pod = self.k8s_client.read_namespaced_pod(pod_name, project) if pod.status.phase == "Running": break self.log.debug("%s: pod status is %s", project, pod.status.phase) time.sleep(1) if retry == 299: raise exceptions.LaunchNodepoolException( "%s: pod failed to initialize (%s)" % (project, pod.status.phase))
def createPod(self, project, label): spec_body = { 'name': label.name, 'image': label.image, 'imagePullPolicy': label.image_pull, 'command': ["/bin/bash", "-c", "--"], 'args': ["while true; do sleep 30; done;"], 'workingDir': '/tmp', } if label.cpu or label.memory: spec_body['resources'] = {} for rtype in ('requests', 'limits'): rbody = {} if label.cpu: rbody['cpu'] = int(label.cpu) if label.memory: rbody['memory'] = '%dMi' % int(label.memory) spec_body['resources'][rtype] = rbody pod_body = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': label.name }, 'spec': { 'containers': [spec_body], }, 'restartPolicy': 'Never', } self.k8s_client.create_namespaced_pod(project, pod_body) for retry in range(300): pod = self.k8s_client.read_namespaced_pod(label.name, project) if pod.status.phase == "Running": break self.log.debug("%s: pod status is %s", project, pod.status.phase) time.sleep(1) if retry == 299: raise exceptions.LaunchNodepoolException( "%s: pod failed to initialize (%s)" % (project, pod.status.phase))
def _launchNode(self): if self.label.diskimage: diskimage = self.provider_config.diskimages[ self.label.diskimage.name] else: diskimage = None if diskimage: # launch using diskimage cloud_image = self.handler.zk.getMostRecentImageUpload( diskimage.name, self.provider_config.name) if not cloud_image: raise exceptions.LaunchNodepoolException( "Unable to find current cloud image %s in %s" % (diskimage.name, self.provider_config.name) ) config_drive = diskimage.config_drive image_external = dict(id=cloud_image.external_id) image_id = "{path}/{upload_id}".format( path=self.handler.zk._imageUploadPath( cloud_image.image_name, cloud_image.build_id, cloud_image.provider_name), upload_id=cloud_image.id) image_name = diskimage.name username = cloud_image.username connection_type = diskimage.connection_type connection_port = diskimage.connection_port else: # launch using unmanaged cloud image config_drive = self.label.cloud_image.config_drive image_external = self.label.cloud_image.external image_id = self.label.cloud_image.name image_name = self.label.cloud_image.name username = self.label.cloud_image.username connection_type = self.label.cloud_image.connection_type connection_port = self.label.cloud_image.connection_port hostname = self.provider_config.hostname_format.format( label=self.label, provider=self.provider_config, node=self.node ) self.log.info("Creating server with hostname %s in %s from image %s " "for node id: %s" % (hostname, self.provider_config.name, image_name, self.node.id)) # NOTE: We store the node ID in the server metadata to use for leaked # instance detection. We cannot use the external server ID for this # because that isn't available in ZooKeeper until after the server is # active, which could cause a race in leak detection. server = self.handler.manager.createServer( hostname, image=image_external, min_ram=self.label.min_ram, flavor_name=self.label.flavor_name, key_name=self.label.key_name, az=self.node.az, config_drive=config_drive, nodepool_node_id=self.node.id, nodepool_node_label=self.node.type[0], nodepool_image_name=image_name, networks=self.pool.networks, security_groups=self.pool.security_groups, boot_from_volume=self.label.boot_from_volume, volume_size=self.label.volume_size) self.node.external_id = server.id self.node.hostname = hostname self.node.image_id = image_id if username: self.node.username = username self.node.connection_type = connection_type self.node.connection_port = connection_port # Checkpoint save the updated node info self.zk.storeNode(self.node) self.log.debug("Waiting for server %s for node id: %s" % (server.id, self.node.id)) server = self.handler.manager.waitForServer( server, self.provider_config.launch_timeout, auto_ip=self.pool.auto_floating_ip) if server.status != 'ACTIVE': raise exceptions.LaunchStatusException("Server %s for node id: %s " "status: %s" % (server.id, self.node.id, server.status)) # If we didn't specify an AZ, set it to the one chosen by Nova. # Do this after we are done waiting since AZ may not be available # immediately after the create request. if not self.node.az: self.node.az = server.location.zone # Use private_ipv4 on clouds where no public IP is configured. if self.pool.use_private_ip: interface_ip = server.private_ipv4 interface_type = 'private' else: interface_ip = server.interface_ip interface_type = 'public' if not interface_ip: self.log.debug( "Server data for failed IP: %s" % pprint.pformat( server)) raise exceptions.LaunchNetworkException( "Unable to find %s IP of server" % (interface_type)) self.node.interface_ip = interface_ip self.node.public_ipv4 = server.public_v4 self.node.public_ipv6 = server.public_v6 self.node.private_ipv4 = server.private_v4 # devstack-gate multi-node depends on private_v4 being populated # with something. On clouds that don't have a private address, use # the public. if not self.node.private_ipv4: self.node.private_ipv4 = server.public_v4 # Checkpoint save the updated node info self.zk.storeNode(self.node) self.log.debug( "Node %s is running [region: %s, az: %s, ip: %s ipv4: %s, " "ipv6: %s]" % (self.node.id, self.node.region, self.node.az, self.node.interface_ip, self.node.public_ipv4, self.node.public_ipv6)) # wait and scan the new node and record in ZooKeeper host_keys = [] if self.pool.host_key_checking: try: self.log.debug( "Gathering host keys for node %s", self.node.id) # only gather host keys if the connection type is ssh gather_host_keys = connection_type == 'ssh' host_keys = utils.nodescan( interface_ip, timeout=self.provider_config.boot_timeout, gather_hostkeys=gather_host_keys, port=connection_port) if gather_host_keys and not host_keys: raise exceptions.LaunchKeyscanException( "Unable to gather host keys") except exceptions.ConnectionTimeoutException: self._logConsole(self.node.external_id, self.node.hostname) raise self.node.host_keys = host_keys self.zk.storeNode(self.node)
def createNamespace(self, node, pool, restricted_access=False): name = node.id namespace = "%s-%s" % (pool, name) user = "******" self.log.debug("%s: creating namespace" % namespace) # Create the namespace ns_body = { 'apiVersion': 'v1', 'kind': 'Namespace', 'metadata': { 'name': namespace, 'nodepool_node_id': name } } proj = self.k8s_client.create_namespace(ns_body) node.external_id = namespace # Create the service account sa_body = { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': { 'name': user } } self.k8s_client.create_namespaced_service_account(namespace, sa_body) # Wait for the token to be created for retry in range(30): sa = self.k8s_client.read_namespaced_service_account( user, namespace) ca_crt = None token = None if sa.secrets: for secret_obj in sa.secrets: secret = self.k8s_client.read_namespaced_secret( secret_obj.name, namespace) ca_crt = secret.data.get('ca.crt') token = secret.data.get('token') if token and ca_crt: break if token and ca_crt: break time.sleep(1) if not token or not ca_crt: raise exceptions.LaunchNodepoolException( "%s: couldn't find token for service account %s" % (namespace, sa)) # Create service account role all_verbs = [ "create", "delete", "get", "list", "patch", "update", "watch" ] if restricted_access: role_name = "zuul-restricted" role_body = { 'kind': 'Role', 'apiVersion': 'rbac.authorization.k8s.io/v1beta1', 'metadata': { 'name': role_name, }, 'rules': [{ 'apiGroups': [""], 'resources': ["pods"], 'verbs': ["get", "list"], }, { 'apiGroups': [""], 'resources': ["pods/exec"], 'verbs': all_verbs }, { 'apiGroups': [""], 'resources': ["pods/logs"], 'verbs': all_verbs }] } else: role_name = "zuul" role_body = { 'kind': 'Role', 'apiVersion': 'rbac.authorization.k8s.io/v1beta1', 'metadata': { 'name': role_name, }, 'rules': [{ 'apiGroups': [""], 'resources': [ "pods", "pods/exec", "pods/log", "services", "endpoints", "crontabs", "jobs", "deployments", "replicasets", "configmaps", "secrets" ], 'verbs': all_verbs, }] } self.rbac_client.create_namespaced_role(namespace, role_body) # Give service account admin access role_binding_body = { 'apiVersion': 'rbac.authorization.k8s.io/v1beta1', 'kind': 'RoleBinding', 'metadata': { 'name': 'zuul-role' }, 'roleRef': { 'apiGroup': 'rbac.authorization.k8s.io', 'kind': 'Role', 'name': role_name, }, 'subjects': [{ 'kind': 'ServiceAccount', 'name': user, 'namespace': namespace, }], 'userNames': ['system:serviceaccount:%s:zuul-worker' % namespace] } self.rbac_client.create_namespaced_role_binding( namespace, role_binding_body) resource = { 'name': proj.metadata.name, 'namespace': namespace, 'host': self.k8s_client.api_client.configuration.host, 'skiptls': not self.k8s_client.api_client.configuration.verify_ssl, 'token': token, 'ca_crt': ca_crt, 'user': user, } self.log.info("%s: namespace created" % namespace) return resource
def prepareProject(self, project): user = "******" # Create the service account sa_body = { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': { 'name': user } } self.k8s_client.create_namespaced_service_account(project, sa_body) # Wait for the token to be created for retry in range(30): sa = self.k8s_client.read_namespaced_service_account(user, project) token = None if sa.secrets: for secret_obj in sa.secrets: secret = self.k8s_client.read_namespaced_secret( secret_obj.name, project) token = secret.metadata.annotations.get( 'openshift.io/token-secret.value') if token: break if token: break time.sleep(1) if not token: raise exceptions.LaunchNodepoolException( "%s: couldn't find token for service account %s" % (project, sa)) # Give service account admin access role_body = { 'apiVersion': 'v1', 'kind': 'RoleBinding', 'metadata': { 'name': 'admin-0' }, 'roleRef': { 'name': 'admin' }, 'subjects': [{ 'kind': 'ServiceAccount', 'name': user, 'namespace': project, }], 'userNames': ['system:serviceaccount:%s:zuul-worker' % project] } try: self.os_client.create_namespaced_role_binding(project, role_body) except ValueError: # https://github.com/ansible/ansible/issues/36939 pass resource = { 'namespace': project, 'host': self.os_client.api_client.configuration.host, 'skiptls': not self.os_client.api_client.configuration.verify_ssl, 'token': token, 'user': user, } self.log.info("%s: project created" % project) return resource