def _configure_namespace(provider_config): namespace_field = "namespace" if namespace_field not in provider_config: raise ValueError("Must specify namespace in Kubernetes config.") namespace = provider_config[namespace_field] field_selector = "metadata.name={}".format(namespace) try: namespaces = core_api().list_namespace( field_selector=field_selector).items except ApiException: logger.warning(log_prefix + not_checking_msg(namespace_field, namespace)) return namespace if len(namespaces) > 0: assert len(namespaces) == 1 logger.info(log_prefix + using_existing_msg(namespace_field, namespace)) return namespace logger.info(log_prefix + not_found_msg(namespace_field, namespace)) namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta( name=namespace)) core_api().create_namespace(namespace_config) logger.info(log_prefix + created_msg(namespace_field, namespace)) return namespace
def _configure_services(namespace, provider_config): service_field = "services" if service_field not in provider_config: logger.info(log_prefix + not_provided_msg(service_field)) return services = provider_config[service_field] for service in services: if "namespace" not in service["metadata"]: service["metadata"]["namespace"] = namespace elif service["metadata"]["namespace"] != namespace: raise InvalidNamespaceError(service_field, namespace) name = service["metadata"]["name"] field_selector = "metadata.name={}".format(name) services = core_api().list_namespaced_service( namespace, field_selector=field_selector).items if len(services) > 0: assert len(services) == 1 existing_service = services[0] if service == existing_service: logger.info(log_prefix + using_existing_msg("service", name)) return else: logger.info(log_prefix + updating_existing_msg("service", name)) core_api().patch_namespaced_service(name, namespace, service) else: logger.info(log_prefix + not_found_msg("service", name)) core_api().create_namespaced_service(namespace, service) logger.info(log_prefix + created_msg("service", name))
def terminate_node(self, node_id): # delete raycluster pod logger.info(log_prefix + "calling terminate_node") self.terminate_pods([node_id]) try: core_api().delete_namespaced_service(node_id, self.namespace) except ApiException: pass try: extensions_beta_api().delete_namespaced_ingress( node_id, self.namespace, ) except ApiException: pass
def _configure_autoscaler_service_account(namespace, provider_config): account_field = "autoscaler_service_account" if account_field not in provider_config: logger.info(log_prefix + not_provided_msg(account_field)) return account = provider_config[account_field] if "namespace" not in account["metadata"]: account["metadata"]["namespace"] = namespace elif account["metadata"]["namespace"] != namespace: raise InvalidNamespaceError(account_field, namespace) name = account["metadata"]["name"] field_selector = "metadata.name={}".format(name) accounts = core_api().list_namespaced_service_account( namespace, field_selector=field_selector).items if len(accounts) > 0: assert len(accounts) == 1 logger.info(log_prefix + using_existing_msg(account_field, name)) return logger.info(log_prefix + not_found_msg(account_field, name)) core_api().create_namespaced_service_account(namespace, account) logger.info(log_prefix + created_msg(account_field, name))
def non_terminated_nodes(self, tag_filters): # Match pods that are in the 'Pending' or 'Running' phase. # Unfortunately there is no OR operator in field selectors, so we # have to match on NOT any of the other phases. field_selector = ",".join([ "status.phase!=Failed", "status.phase!=Unknown", "status.phase!=Succeeded", "status.phase!=Terminating", ]) tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name label_selector = to_label_selector(tag_filters) pod_list = core_api().list_namespaced_pod( self.namespace, field_selector=field_selector, label_selector=label_selector) return [pod.metadata.name for pod in pod_list.items]
def set_node_tags(self, node_id, tags): pod = core_api().read_namespaced_pod(node_id, self.namespace) pod.metadata.labels.update(tags) core_api().patch_namespaced_pod(node_id, self.namespace, pod)
def internal_ip(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.pod_ip
def node_tags(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) return pod.metadata.labels
def is_terminated(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.phase not in ["Running", "Pending"]
def is_running(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.phase == "Running"
def create_node(self, node_config, tags, count): # create a raycluster conf = node_config.copy() pod_spec = conf.get("pod", conf) service_spec = conf.get("service") ingress_spec = conf.get("ingress") tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name pod_spec["metadata"]["namespace"] = self.namespace if "labels" in pod_spec["metadata"]: pod_spec["metadata"]["labels"].update(tags) else: pod_spec["metadata"]["labels"] = tags new_node_names = [] # init exist_cluster to None exist_cluster = None try: exist_cluster = custom_object_api().get_namespaced_custom_object( group=CRD_RAY_GROUP, version=CRD_RAY_VERSION, namespace=self.namespace, plural=CRD_RAY_PLURAL, name=self.cluster_name) logger.info(log_prefix + "calling get_namespaced_custom_object " "(cluster={}).".format(exist_cluster)) except ApiException: pass body = {} raycluster = None if exist_cluster is None: extensions = [] extension = self.generate_cluster_pod_meta(count, pod_spec, tags, new_node_names) extensions.append(extension) spec = self.generate_cluster_meta(body, pod_spec) spec['extensions'] = extensions body['spec'] = spec logger.info(log_prefix + "calling create_namespaced_custom_object") raycluster = custom_object_api().create_namespaced_custom_object( group=CRD_RAY_GROUP, version=CRD_RAY_VERSION, namespace=self.namespace, plural=CRD_RAY_PLURAL, body=body, pretty='true') else: extensions = exist_cluster['spec']['extensions'] extensions.append( self.generate_cluster_pod_meta(count, pod_spec, tags, new_node_names)) exist_cluster['spec']['extensions'] = extensions body = exist_cluster logger.info(log_prefix + "calling patch_namespaced_custom_object") raycluster = custom_object_api().patch_namespaced_custom_object( group=CRD_RAY_GROUP, version=CRD_RAY_VERSION, namespace=self.namespace, plural=CRD_RAY_PLURAL, name=self.cluster_name, body=body) new_svcs = [] if service_spec is not None: logger.info(log_prefix + "calling create_namespaced_service " "(count={}).".format(count)) for new_node_name in new_node_names: service_spec_copy = copy.deepcopy(service_spec) metadata = service_spec_copy.get("metadata", {}) metadata["name"] = new_node_name service_spec_copy["metadata"] = metadata append_owner_reference(body={ 'name': self.cluster_name, 'uid': raycluster['metadata']['uid'] }, obj=service_spec_copy) service_spec_copy["spec"]["selector"] = { "raycluster.component": new_node_name } svc = core_api().create_namespaced_service( self.namespace, service_spec_copy) new_svcs.append(svc) if ingress_spec is not None: logger.info(log_prefix + "calling create_namespaced_ingress " "(count={}).".format(count)) for new_svc in new_svcs: ingress_spec_copy = copy.deepcopy(ingress_spec) metadata = ingress_spec_copy.get("metadata", {}) metadata["name"] = new_svc.metadata.name ingress_spec_copy["metadata"] = metadata append_owner_reference(body={ 'name': self.cluster_name, 'uid': raycluster['metadata']['uid'] }, obj=ingress_spec_copy) ingress_spec_copy = _add_service_name_to_service_port( ingress_spec_copy, new_svc.metadata.name) extensions_beta_api().create_namespaced_ingress( self.namespace, ingress_spec_copy)