Example #1
0
def _configure_namespace(provider_config):
    namespace_field = "namespace"
    if namespace_field not in provider_config:
        raise ValueError("Must specify namespace in Kubernetes config.")

    namespace = provider_config[namespace_field]
    field_selector = "metadata.name={}".format(namespace)
    try:
        namespaces = core_api().list_namespace(
            field_selector=field_selector).items
    except ApiException:
        logger.warning(log_prefix +
                       not_checking_msg(namespace_field, namespace))
        return namespace

    if len(namespaces) > 0:
        assert len(namespaces) == 1
        logger.info(log_prefix +
                    using_existing_msg(namespace_field, namespace))
        return namespace

    logger.info(log_prefix + not_found_msg(namespace_field, namespace))
    namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta(
        name=namespace))
    core_api().create_namespace(namespace_config)
    logger.info(log_prefix + created_msg(namespace_field, namespace))
    return namespace
Example #2
0
def _configure_services(namespace, provider_config):
    service_field = "services"
    if service_field not in provider_config:
        logger.info(log_prefix + not_provided_msg(service_field))
        return

    services = provider_config[service_field]
    for service in services:
        if "namespace" not in service["metadata"]:
            service["metadata"]["namespace"] = namespace
        elif service["metadata"]["namespace"] != namespace:
            raise InvalidNamespaceError(service_field, namespace)

        name = service["metadata"]["name"]
        field_selector = "metadata.name={}".format(name)
        services = core_api().list_namespaced_service(
            namespace, field_selector=field_selector).items
        if len(services) > 0:
            assert len(services) == 1
            existing_service = services[0]
            if service == existing_service:
                logger.info(log_prefix + using_existing_msg("service", name))
                return
            else:
                logger.info(log_prefix +
                            updating_existing_msg("service", name))
                core_api().patch_namespaced_service(name, namespace, service)
        else:
            logger.info(log_prefix + not_found_msg("service", name))
            core_api().create_namespaced_service(namespace, service)
            logger.info(log_prefix + created_msg("service", name))
Example #3
0
    def terminate_node(self, node_id):
        # delete raycluster pod
        logger.info(log_prefix + "calling terminate_node")
        self.terminate_pods([node_id])

        try:
            core_api().delete_namespaced_service(node_id, self.namespace)
        except ApiException:
            pass
        try:
            extensions_beta_api().delete_namespaced_ingress(
                node_id,
                self.namespace,
            )
        except ApiException:
            pass
Example #4
0
def _configure_autoscaler_service_account(namespace, provider_config):
    account_field = "autoscaler_service_account"
    if account_field not in provider_config:
        logger.info(log_prefix + not_provided_msg(account_field))
        return

    account = provider_config[account_field]
    if "namespace" not in account["metadata"]:
        account["metadata"]["namespace"] = namespace
    elif account["metadata"]["namespace"] != namespace:
        raise InvalidNamespaceError(account_field, namespace)

    name = account["metadata"]["name"]
    field_selector = "metadata.name={}".format(name)
    accounts = core_api().list_namespaced_service_account(
        namespace, field_selector=field_selector).items
    if len(accounts) > 0:
        assert len(accounts) == 1
        logger.info(log_prefix + using_existing_msg(account_field, name))
        return

    logger.info(log_prefix + not_found_msg(account_field, name))
    core_api().create_namespaced_service_account(namespace, account)
    logger.info(log_prefix + created_msg(account_field, name))
Example #5
0
    def non_terminated_nodes(self, tag_filters):
        # Match pods that are in the 'Pending' or 'Running' phase.
        # Unfortunately there is no OR operator in field selectors, so we
        # have to match on NOT any of the other phases.
        field_selector = ",".join([
            "status.phase!=Failed",
            "status.phase!=Unknown",
            "status.phase!=Succeeded",
            "status.phase!=Terminating",
        ])

        tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name
        label_selector = to_label_selector(tag_filters)
        pod_list = core_api().list_namespaced_pod(
            self.namespace,
            field_selector=field_selector,
            label_selector=label_selector)

        return [pod.metadata.name for pod in pod_list.items]
Example #6
0
 def set_node_tags(self, node_id, tags):
     pod = core_api().read_namespaced_pod(node_id, self.namespace)
     pod.metadata.labels.update(tags)
     core_api().patch_namespaced_pod(node_id, self.namespace, pod)
Example #7
0
 def internal_ip(self, node_id):
     pod = core_api().read_namespaced_pod(node_id, self.namespace)
     return pod.status.pod_ip
Example #8
0
 def node_tags(self, node_id):
     pod = core_api().read_namespaced_pod(node_id, self.namespace)
     return pod.metadata.labels
Example #9
0
 def is_terminated(self, node_id):
     pod = core_api().read_namespaced_pod(node_id, self.namespace)
     return pod.status.phase not in ["Running", "Pending"]
Example #10
0
 def is_running(self, node_id):
     pod = core_api().read_namespaced_pod(node_id, self.namespace)
     return pod.status.phase == "Running"
Example #11
0
    def create_node(self, node_config, tags, count):
        # create a raycluster
        conf = node_config.copy()
        pod_spec = conf.get("pod", conf)
        service_spec = conf.get("service")
        ingress_spec = conf.get("ingress")
        tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name
        pod_spec["metadata"]["namespace"] = self.namespace
        if "labels" in pod_spec["metadata"]:
            pod_spec["metadata"]["labels"].update(tags)
        else:
            pod_spec["metadata"]["labels"] = tags

        new_node_names = []

        # init exist_cluster to None
        exist_cluster = None
        try:
            exist_cluster = custom_object_api().get_namespaced_custom_object(
                group=CRD_RAY_GROUP,
                version=CRD_RAY_VERSION,
                namespace=self.namespace,
                plural=CRD_RAY_PLURAL,
                name=self.cluster_name)
            logger.info(log_prefix + "calling get_namespaced_custom_object "
                        "(cluster={}).".format(exist_cluster))
        except ApiException:
            pass

        body = {}
        raycluster = None
        if exist_cluster is None:
            extensions = []
            extension = self.generate_cluster_pod_meta(count, pod_spec, tags,
                                                       new_node_names)
            extensions.append(extension)
            spec = self.generate_cluster_meta(body, pod_spec)
            spec['extensions'] = extensions
            body['spec'] = spec
            logger.info(log_prefix + "calling create_namespaced_custom_object")
            raycluster = custom_object_api().create_namespaced_custom_object(
                group=CRD_RAY_GROUP,
                version=CRD_RAY_VERSION,
                namespace=self.namespace,
                plural=CRD_RAY_PLURAL,
                body=body,
                pretty='true')
        else:
            extensions = exist_cluster['spec']['extensions']
            extensions.append(
                self.generate_cluster_pod_meta(count, pod_spec, tags,
                                               new_node_names))
            exist_cluster['spec']['extensions'] = extensions
            body = exist_cluster
            logger.info(log_prefix + "calling patch_namespaced_custom_object")
            raycluster = custom_object_api().patch_namespaced_custom_object(
                group=CRD_RAY_GROUP,
                version=CRD_RAY_VERSION,
                namespace=self.namespace,
                plural=CRD_RAY_PLURAL,
                name=self.cluster_name,
                body=body)

        new_svcs = []
        if service_spec is not None:
            logger.info(log_prefix + "calling create_namespaced_service "
                        "(count={}).".format(count))

            for new_node_name in new_node_names:
                service_spec_copy = copy.deepcopy(service_spec)
                metadata = service_spec_copy.get("metadata", {})
                metadata["name"] = new_node_name
                service_spec_copy["metadata"] = metadata
                append_owner_reference(body={
                    'name': self.cluster_name,
                    'uid': raycluster['metadata']['uid']
                },
                                       obj=service_spec_copy)
                service_spec_copy["spec"]["selector"] = {
                    "raycluster.component": new_node_name
                }
                svc = core_api().create_namespaced_service(
                    self.namespace, service_spec_copy)
                new_svcs.append(svc)

        if ingress_spec is not None:
            logger.info(log_prefix + "calling create_namespaced_ingress "
                        "(count={}).".format(count))
            for new_svc in new_svcs:
                ingress_spec_copy = copy.deepcopy(ingress_spec)
                metadata = ingress_spec_copy.get("metadata", {})
                metadata["name"] = new_svc.metadata.name
                ingress_spec_copy["metadata"] = metadata
                append_owner_reference(body={
                    'name': self.cluster_name,
                    'uid': raycluster['metadata']['uid']
                },
                                       obj=ingress_spec_copy)
                ingress_spec_copy = _add_service_name_to_service_port(
                    ingress_spec_copy, new_svc.metadata.name)
                extensions_beta_api().create_namespaced_ingress(
                    self.namespace, ingress_spec_copy)