Ejemplo n.º 1
0
    def get_affinity(self):
        """Determine the affinity term for the build pod.

        There are a two affinity strategies, which one is used depends on how
        the BinderHub is configured.

        In the default setup the affinity of each build pod is an "anti-affinity"
        which causes the pods to prefer to schedule on separate nodes.

        In a setup with docker-in-docker enabled pods for a particular
        repository prefer to schedule on the same node in order to reuse the
        docker layer cache of previous builds.
        """
        resp = self.api.list_namespaced_pod(
            self.namespace,
            label_selector="component=dind,app=binder",
            _request_timeout=KUBE_REQUEST_TIMEOUT,
            _preload_content=False,
        )
        dind_pods = json.loads(resp.read())

        if self.sticky_builds and dind_pods:
            node_names = [
                pod["spec"]["nodeName"] for pod in dind_pods["items"]
            ]
            ranked_nodes = rendezvous_rank(node_names, self.repo_url)
            best_node_name = ranked_nodes[0]

            affinity = client.V1Affinity(node_affinity=client.V1NodeAffinity(
                preferred_during_scheduling_ignored_during_execution=[
                    client.V1PreferredSchedulingTerm(
                        weight=100,
                        preference=client.V1NodeSelectorTerm(
                            match_expressions=[
                                client.V1NodeSelectorRequirement(
                                    key="kubernetes.io/hostname",
                                    operator="In",
                                    values=[best_node_name],
                                )
                            ]),
                    )
                ]))

        else:
            affinity = client.V1Affinity(
                pod_anti_affinity=client.V1PodAntiAffinity(
                    preferred_during_scheduling_ignored_during_execution=[
                        client.V1WeightedPodAffinityTerm(
                            weight=100,
                            pod_affinity_term=client.V1PodAffinityTerm(
                                topology_key="kubernetes.io/hostname",
                                label_selector=client.V1LabelSelector(
                                    match_labels=dict(
                                        component=self._component_label)),
                            ),
                        )
                    ]))

        return affinity
Ejemplo n.º 2
0
def create_affinity_term(item):
    return client.V1WeightedPodAffinityTerm(
        weight=item['weight'],
        pod_affinity_term=client.V1PodAffinityTerm(
            topology_key=item['label'],
            namespaces=item['namespaces'],
            label_selector=client.V1LabelSelector(match_expressions=[
                create_affinity_selector(s) for s in item['selectors']
            ], ),
        ))
Ejemplo n.º 3
0
    def deploy(self, image, name, ns, port, replicas=1, svc_type="NodePort", traffic_policy="Local", cluster_ip=None, ipv6=False):
        """
        Creates a deployment and corresponding service with the given
        parameters.
        """
        # Use a pod anti-affinity so that the scheduler prefers deploying the
        # pods on different nodes. This makes our tests more reliable, since
        # some tests expect pods to be scheduled to different nodes.
        selector = {'matchLabels': {'app': name}}
        terms = [client.V1WeightedPodAffinityTerm(
            pod_affinity_term=client.V1PodAffinityTerm(
                label_selector=selector,
                topology_key="kubernetes.io/hostname"),
            weight=100,
            )]
        anti_aff = client.V1PodAntiAffinity(
                preferred_during_scheduling_ignored_during_execution=terms)

        # Run a deployment with <replicas> copies of <image>, with the
        # pods labelled with "app": <name>.
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=name),
            spec=client.V1DeploymentSpec(
                replicas=replicas,
                selector=selector,
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(labels={"app": name}),
                    spec=client.V1PodSpec(
                        affinity=client.V1Affinity(pod_anti_affinity=anti_aff),
                        containers=[
                          client.V1Container(name=name,
                                             image=image,
                                             ports=[client.V1ContainerPort(container_port=port)]),
                    ]))))

        # Create the deployment.
        api_response = client.AppsV1Api().create_namespaced_deployment(
            body=deployment,
            namespace=ns)
        logger.debug("Deployment created. status='%s'" % str(api_response.status))

        # Create a service called <name> whose endpoints are the pods
        # with "app": <name>; i.e. those just created above.
        self.create_service(name, name, ns, port, svc_type, traffic_policy, ipv6=ipv6)
Ejemplo n.º 4
0
 def _generate_affinity(self):
     return k8s_client.V1Affinity(
         node_affinity=k8s_client.V1NodeAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PreferredSchedulingTerm(
                     weight=1,
                     preference=k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "possible-label-value-1",
                                     "possible-label-value-2",
                                 ],
                             )
                         ]),
                 )
             ],
             required_during_scheduling_ignored_during_execution=k8s_client.
             V1NodeSelector(node_selector_terms=[
                 k8s_client.V1NodeSelectorTerm(match_expressions=[
                     k8s_client.V1NodeSelectorRequirement(
                         key="some_node_label",
                         operator="In",
                         values=[
                             "required-label-value-1",
                             "required-label-value-2",
                         ],
                     )
                 ]),
             ]),
         ),
         pod_affinity=k8s_client.V1PodAffinity(
             required_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PodAffinityTerm(
                     label_selector=k8s_client.V1LabelSelector(
                         match_labels={
                             "some-pod-label-key": "some-pod-label-value"
                         }),
                     namespaces=["namespace-a", "namespace-b"],
                     topology_key="key-1",
                 )
             ]),
         pod_anti_affinity=k8s_client.V1PodAntiAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1WeightedPodAffinityTerm(
                     weight=1,
                     pod_affinity_term=k8s_client.V1PodAffinityTerm(
                         label_selector=k8s_client.V1LabelSelector(
                             match_expressions=[
                                 k8s_client.V1LabelSelectorRequirement(
                                     key="some_pod_label",
                                     operator="NotIn",
                                     values=[
                                         "forbidden-label-value-1",
                                         "forbidden-label-value-2",
                                     ],
                                 )
                             ]),
                         namespaces=["namespace-c"],
                         topology_key="key-2",
                     ),
                 )
             ]),
     )
Ejemplo n.º 5
0
    def submit(self):
        """Submit a image spec to openshift's s2i and wait for completion """
        volume_mounts = [
            client.V1VolumeMount(mount_path="/var/run/docker.sock",
                                 name="docker-socket")
        ]
        docker_socket_path = urlparse(self.docker_host).path
        volumes = [
            client.V1Volume(name="docker-socket",
                            host_path=client.V1HostPathVolumeSource(
                                path=docker_socket_path, type='Socket'))
        ]

        if self.push_secret:
            volume_mounts.append(
                client.V1VolumeMount(mount_path="/root/.docker",
                                     name='docker-push-secret'))
            volumes.append(
                client.V1Volume(name='docker-push-secret',
                                secret=client.V1SecretVolumeSource(
                                    secret_name=self.push_secret)))

        env = []
        if self.git_credentials:
            env.append(
                client.V1EnvVar(name='GIT_CREDENTIAL_ENV',
                                value=self.git_credentials))

        component_label = "binderhub-build"
        self.pod = client.V1Pod(
            metadata=client.V1ObjectMeta(
                name=self.name,
                labels={
                    "name": self.name,
                    "component": component_label,
                },
                annotations={
                    "binder-repo": self.repo_url,
                },
            ),
            spec=client.V1PodSpec(
                containers=[
                    client.V1Container(
                        image=self.build_image,
                        name="builder",
                        args=self.get_cmd(),
                        volume_mounts=volume_mounts,
                        resources=client.V1ResourceRequirements(
                            limits={'memory': self.memory_limit},
                            requests={'memory': self.memory_limit}),
                        env=env)
                ],
                tolerations=[
                    client.V1Toleration(
                        key='hub.jupyter.org/dedicated',
                        operator='Equal',
                        value='user',
                        effect='NoSchedule',
                    ),
                    # GKE currently does not permit creating taints on a node pool
                    # with a `/` in the key field
                    client.V1Toleration(
                        key='hub.jupyter.org_dedicated',
                        operator='Equal',
                        value='user',
                        effect='NoSchedule',
                    ),
                ],
                node_selector=self.node_selector,
                volumes=volumes,
                restart_policy="Never",
                affinity=client.V1Affinity(
                    pod_anti_affinity=client.V1PodAntiAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                weight=100,
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    topology_key="kubernetes.io/hostname",
                                    label_selector=client.V1LabelSelector(
                                        match_labels=dict(
                                            component=component_label))))
                        ]))))

        try:
            ret = self.api.create_namespaced_pod(self.namespace, self.pod)
        except client.rest.ApiException as e:
            if e.status == 409:
                # Someone else created it!
                app_log.info("Build %s already running", self.name)
                pass
            else:
                raise
        else:
            app_log.info("Started build %s", self.name)

        app_log.info("Watching build pod %s", self.name)
        while not self.stop_event.is_set():
            w = watch.Watch()
            try:
                for f in w.stream(
                        self.api.list_namespaced_pod,
                        self.namespace,
                        label_selector="name={}".format(self.name),
                        timeout_seconds=30,
                ):
                    if f['type'] == 'DELETED':
                        self.progress('pod.phasechange', 'Deleted')
                        return
                    self.pod = f['object']
                    if not self.stop_event.is_set():
                        self.progress('pod.phasechange', self.pod.status.phase)
                    if self.pod.status.phase == 'Succeeded':
                        self.cleanup()
                    elif self.pod.status.phase == 'Failed':
                        self.cleanup()
            except Exception as e:
                app_log.exception("Error in watch stream for %s", self.name)
                raise
            finally:
                w.stop()
            if self.stop_event.is_set():
                app_log.info("Stopping watch of %s", self.name)
                return
Ejemplo n.º 6
0
def update_deploy_v2():
    data = json.loads(request.get_data().decode('UTF-8'))
    current_app.logger.debug("接受到的数据:{}".format(data))
    namespace = handle_input(data.get('namespace'))
    deploy_name = handle_input(data.get('deploy_name'))
    action = handle_input(data.get('action'))

    image = None
    replicas = None
    toleration = None
    pod_anti_affinity = None
    pod_affinity = None
    node_affinity = None
    labels = None
    if action == "add_pod_anti_affinity":
        print("正在运行{}操作".format(action))
        affinity = handle_input(data.get('pod_anti_affinity'))
        affinity_type = handle_input(affinity.get('type'))

        labelSelector = handle_input(affinity.get('labelSelector'))
        key = handle_input(affinity.get('key'))
        value = handle_input(affinity.get('value'))

        topologyKey = handle_input(affinity.get('topologyKey'))
        if affinity_type == "required":
            if labelSelector == "matchExpressions":
                if not isinstance(value, list):
                    value = [value]
                operator = handle_input(affinity.get('operator'))
                if operator != 'In' and operator != 'NotIn':
                    value = None
                print(value)
                label_selector = client.V1LabelSelector(match_expressions=[
                    client.V1LabelSelectorRequirement(
                        key=key, operator=operator, values=value)
                ])
            elif labelSelector == "matchLabels":
                if isinstance(value, list):
                    return jsonify(
                        {"error": "{}模式下不支持values设置为数组".format(labelSelector)})
                label_selector = client.V1LabelSelector(
                    match_labels={key: value})
            else:
                return jsonify(
                    {"error": "不支持{} labelSelector".format(labelSelector)})
            client.V1Affinity
            pod_anti_affinity = client.V1PodAntiAffinity(
                required_during_scheduling_ignored_during_execution=[
                    client.V1PodAffinityTerm(label_selector=label_selector,
                                             topology_key=topologyKey)
                ])
            print("添加的互斥调度为:{}".format(pod_anti_affinity))
        elif affinity_type == "preferred":
            weight = string_to_int(handle_input(affinity.get('weight')))
            if weight == None:
                return jsonify(
                    {"error": "{}类型必须设置weight".format(affinity_type)})

            if labelSelector == "matchExpressions":
                if not isinstance(value, list):
                    value = [value]

                operator = handle_input(affinity.get('operator'))
                if operator != 'In' and operator != 'NotIn':
                    value = None
                label_selector = client.V1LabelSelector(match_expressions=[
                    client.V1LabelSelectorRequirement(
                        key=key, operator=operator, values=value)
                ])
            elif labelSelector == "matchLabels":
                if isinstance(value, list):
                    return jsonify(
                        {"error": "{}模式下不支持values设置为数组".format(labelSelector)})
                label_selector = client.V1LabelSelector(
                    match_labels={key: value})
            else:
                return jsonify(
                    {"error": "不支持{} labelSelector".format(labelSelector)})
            pod_anti_affinity = client.V1PodAntiAffinity(
                preferred_during_scheduling_ignored_during_execution=[
                    client.V1WeightedPodAffinityTerm(
                        pod_affinity_term=client.V1PodAffinityTerm(
                            label_selector=label_selector,
                            topology_key=topologyKey),
                        weight=weight)
                ])
            print("添加的互斥调度为:{}".format(pod_anti_affinity))
        else:
            return jsonify({"error": "不支持{}这种调度".format(affinity_type)})
    elif action == "delete_pod_anti_affinity":
        print("正在运行{}操作".format(action))
        pass
    elif action == "add_node_affinity":
        current_app.logger.debug("正在运行{}操作".format(action))
        affinity = handle_input(data.get('node_affinity'))
        node_affinity_type = handle_input(affinity.get('type'))

        nodeSelector = handle_input(affinity.get('nodeSelector'))
        key = handle_input(affinity.get('key'))
        value = handle_input(affinity.get('value'))
        operator = handle_input(affinity.get('operator'))
        values = []
        if operator == 'Exists' or operator == 'DoesNotExist':
            values == None
        else:
            if not isinstance(value, list):
                values.append(value)
            else:
                values = value

        if node_affinity_type == "preferred":
            weight = string_to_int(handle_input(affinity.get('weight')))
            if weight == None:
                return simple_error_handle(
                    "{}类型必须设置weight".format(node_affinity_type))
            preferred_term = []
            if nodeSelector == "matchExpressions":
                match_expressions = []
                expression = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_expressions.append(expression)
                preference = client.V1NodeSelectorTerm(
                    match_expressions=match_expressions)
            # nodeSelector == "matchFields"
            else:
                match_fields = []
                field = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_fields.append(field)
                preference = client.V1NodeSelectorTerm(
                    match_fields=match_fields)
            term = client.V1PreferredSchedulingTerm(
                weight=weight,
                preference=preference,
            )
            preferred_term.append(term)
            node_affinity = client.V1NodeAffinity(
                #直接append
                preferred_during_scheduling_ignored_during_execution=
                preferred_term)
        elif node_affinity_type == "required":
            current_app.logger.debug(
                "node_affinity_type:{}".format(node_affinity_type))
            node_selector_terms = []
            if nodeSelector == "matchExpressions":
                match_expressions = []
                expression = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_expressions.append(expression)
                term = client.V1NodeSelectorTerm(
                    match_expressions=match_expressions)
            else:
                match_fields = []
                field = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_fields.append(field)

                term = client.V1NodeSelectorTerm(match_fields=match_fields)
            node_selector_terms.append(term)
            node_affinity = client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=client.
                V1NodeSelector(node_selector_terms=node_selector_terms))
        else:
            return simple_error_handle("不支持{}这种调度".format(node_affinity_type))
    elif action == "delete_node_affinity":
        print("正在运行{}操作".format(action))
        pass
    elif action == "add_toleration":
        print("正在运行{}操作".format(action))
        t = handle_input(data.get("toleration"))
        print(type(toleration), toleration)

        effect = t.get('effect')
        key = t.get('key')
        operator = t.get('operator')
        value = t.get('value')
        toleration_seconds = handle_toleraion_seconds(
            t.get('toleration_seconds'))
        print("toleration_seconds:{}".format(toleration_seconds))
        toleration = client.V1Toleration(effect=effect,
                                         key=key,
                                         operator=operator,
                                         toleration_seconds=toleration_seconds,
                                         value=value)
        print(toleration)
        if not toleration:
            msg = "{}需要提供toleration(effect,key,operator,value,)".format(action)
            return jsonify({"error": msg})
    elif action == "delete_toleration":
        print("正在运行{}操作".format(action))
        t = handle_input(data.get("toleration"))
        effect = handle_toleration_item(t.get('effect'))
        key = handle_toleration_item(t.get('key'))
        operator = handle_toleration_item(t.get('operator'))
        value = handle_toleration_item(t.get('value'))
        toleration_seconds = handle_toleraion_seconds(
            t.get('toleration_seconds'))
        print("toleration_seconds:{}".format(toleration_seconds))

        # if (effect != None and key != None and operator != None):
        toleration = client.V1Toleration(effect=effect,
                                         key=key,
                                         operator=operator,
                                         toleration_seconds=toleration_seconds,
                                         value=value)
        if not toleration:
            msg = "{}需要提供toleration(effect,key,operator,value,)".format(action)
            return jsonify({"error": msg})
    elif action == "add_pod_affinity":
        pass
    elif action == "delete_pod_affinity":
        pass
    elif action == "update_replicas":
        replicas = handle_input(data.get('replicas'))
        if not replicas:
            msg = "{}需要提供replicas".format(action)
            return jsonify({"error": msg})
    elif action == "update_image":
        project = handle_input(data.get('project'))
        env = handle_input(data.get('env'))
        imageRepo = handle_input(data.get('imageRepo'))
        imageName = handle_input(data.get('imageName'))
        imageTag = handle_input(data.get('imageTag'))
        if (imageRepo != None and project != None and env != None
                and imageName != None and imageTag != None):
            image = "{}/{}-{}/{}:{}".format(imageRepo, project, env, imageName,
                                            imageTag)
        print("image值{}".format(image))
        if not image:
            msg = "{}需要提供image".format(action)
            return jsonify({"error": msg})
    elif action == "add_labels":
        pass
    elif action == "delete_labels":
        pass
    else:
        msg = "暂时不支持{}操作".format(action)
        print(msg)
        return jsonify({"error": msg})
    return update_deployment_v2(deploy_name=deploy_name, namespace=namespace, action=action, image=image, replicas=replicas,toleration=toleration,node_affinity=node_affinity,\
                pod_anti_affinity=pod_anti_affinity,pod_affinity=pod_affinity,labels=labels)
Ejemplo n.º 7
0
    def spawn(self, taskdef: TaskDefinition) -> KubernetesTask:
        try:
            self.emit_sync('prepare', taskdef=taskdef)

            volumes, mounts = create_volumes(taskdef.volumes)

            # container definition
            container = client.V1Container(
                name=taskdef.id,
                image=taskdef.image,
                env=self.create_env(taskdef),
                ports=self.create_ports(taskdef),
                image_pull_policy='Always',  # taskdef field??
                resources=client.V1ResourceRequirements(
                    requests={
                        'cpu': str(taskdef.cpu or '0'),
                        'memory': str(taskdef.memory or '0'),
                    },
                    limits={
                        'cpu': str(taskdef.cpu_limit or '0'),
                        'memory': str(taskdef.memory_limit or '0'),
                    },
                ),
                volume_mounts=mounts,
            )

            labels = {
                LABEL_TASK_ID: taskdef.id,
                LABEL_PARENT_ID: taskdef.parent,
                **taskdef.meta,
            }

            affinity = None

            if (taskdef.affinity is not None) and (taskdef.affinity != {}):
                affinity_label = {}
                if taskdef.affinity.get("label"):
                    affinity_label[taskdef.affinity["label"][
                        "key"]] = taskdef.affinity["label"]["value"]
                else:
                    affinity_label[
                        "cowait_default_affinity_key"] = "cowait_default_affinity_value"

                if taskdef.affinity["type"] == 'spread':
                    aff_def = client.V1PodAntiAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                elif taskdef.affinity["type"] == 'group':
                    aff_def = client.V1PodAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                else:
                    aff_def = None

                affinity = client.V1Affinity(
                    pod_anti_affinity=aff_def) if aff_def else None
                labels[list(affinity_label.keys())[0]] = list(
                    affinity_label.values())[0]

            pod = self.core.create_namespaced_pod(
                namespace=self.namespace,
                body=client.V1Pod(
                    metadata=client.V1ObjectMeta(
                        name=taskdef.id,
                        namespace=self.namespace,
                        labels=labels,
                    ),
                    spec=client.V1PodSpec(
                        hostname=taskdef.id,
                        restart_policy='Never',
                        image_pull_secrets=self.get_pull_secrets(),
                        volumes=volumes,
                        affinity=affinity,
                        containers=[container],
                        service_account_name=self.service_account,
                    ),
                ),
            )

            # wrap & return task
            # print('~~ created kubenetes pod', pod.metadata.name)
            task = KubernetesTask(self, taskdef, pod)
            self.emit_sync('spawn', task=task)
            return task

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Ejemplo n.º 8
0
def create_run_pod(k8s_settings, run_context):
    run_id = run_context.id
    run_name = run_context.run.to_json()["name"]

    labels = {
        "run-name": run_name,
        "run": run_id,
    }
    env = get_run_pod_env_vars(run_context)
    node_topology_key = "kubernetes.io/hostname"
    # NOTE(taylor): preference to run on nodes with other runs
    pod_affinities = [
        k8s_client.V1WeightedPodAffinityTerm(
            weight=50,
            pod_affinity_term=k8s_client.V1PodAffinityTerm(
                label_selector=k8s_client.V1LabelSelector(match_labels={
                    "type": "run",
                }, ),
                topology_key=node_topology_key,
            ),
        ),
    ]
    volumes = []
    volume_mounts = []
    experiment_id = run_context.experiment
    if experiment_id:
        labels.update({"experiment": experiment_id})
        # NOTE(taylor): highest preference to run on nodes with runs in the same experiment
        pod_affinities.append(
            k8s_client.V1WeightedPodAffinityTerm(
                weight=100,
                pod_affinity_term=k8s_client.V1PodAffinityTerm(
                    label_selector=k8s_client.V1LabelSelector(match_labels={
                        "type":
                        "run",
                        "experiment":
                        experiment_id,
                    }, ),
                    topology_key=node_topology_key,
                ),
            ))

    unacceptable_node_group_types = ["system"]
    requests = k8s_settings.resources.get("requests") or {}
    limits = k8s_settings.resources.get("limits") or {}
    # NOTE(taylor): Preventing GPU-less jobs from running on GPU nodes forces the cluster autoscaler to scale up
    # CPU nodes. This prevents a situation where the GPU nodes are not scaled down because they are occupied by
    # CPU workloads. The cluster autoscaler does not know that it should create CPU nodes when the GPUs are unused.
    # TODO(taylor): This could cause unexpected behavior if the cluster has no CPU nodes. Running CPU jobs on GPU
    # nodes could also be an opportunity for more efficient resource utilization, but is avoided for now because the
    # workloads cannot be migrated onto CPU nodes by the cluster autoscaler as mentioned above.
    # NOTE(taylor): Applying a NoSchedule taint to GPU nodes is another way to achieve this behavior, but does not work as
    # well out of the box with clusters that orchestrate doesn't provision. Applying a PreferNoSchedule
    # taint to GPU nodes does not resolve the workload migration issue when there are no CPU nodes.
    if all(
            float(group.get("nvidia.com/gpu", 0)) == 0
            for group in (requests, limits)):
        unacceptable_node_group_types.append("gpu")

    node_affinity = k8s_client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=k8s_client.
        V1NodeSelector(node_selector_terms=[
            k8s_client.V1NodeSelectorTerm(match_expressions=[
                k8s_client.V1NodeSelectorRequirement(
                    key="orchestrate.sigopt.com/node-group-type",
                    operator="NotIn",
                    values=unacceptable_node_group_types,
                )
            ], )
        ], ), )
    pod_affinity = k8s_client.V1PodAffinity(
        preferred_during_scheduling_ignored_during_execution=pod_affinities, )

    pod = k8s_client.V1Pod(
        metadata=k8s_client.V1ObjectMeta(
            owner_references=k8s_settings.owner_references,
            labels={
                "type": "run",
                **labels,
            },
            name=run_name,
        ),
        spec=k8s_client.V1PodSpec(
            affinity=k8s_client.V1Affinity(
                node_affinity=node_affinity,
                pod_affinity=pod_affinity,
            ),
            containers=[
                k8s_client.V1Container(
                    name="model-runner",
                    image=k8s_settings.image,
                    resources=k8s_client.V1ResourceRequirements(
                        **k8s_settings.resources),
                    image_pull_policy="Always",
                    command=[],
                    args=k8s_settings.args,
                    env=env,
                    volume_mounts=volume_mounts,
                    tty=True,
                ),
            ],
            volumes=volumes,
            restart_policy="Never",
        ),
    )
    k8s_settings.api.create_namespaced_pod(k8s_settings.namespace, pod)
    return pod
Ejemplo n.º 9
0
def apply_rekcurd_to_kubernetes(project_id: int,
                                application_id: str,
                                service_level: str,
                                version: str,
                                insecure_host: str,
                                insecure_port: int,
                                replicas_default: int,
                                replicas_minimum: int,
                                replicas_maximum: int,
                                autoscale_cpu_threshold: str,
                                policy_max_surge: int,
                                policy_max_unavailable: int,
                                policy_wait_seconds: int,
                                container_image: str,
                                resource_request_cpu: str,
                                resource_request_memory: str,
                                resource_limit_cpu: str,
                                resource_limit_memory: str,
                                commit_message: str,
                                service_model_assignment: int,
                                service_git_url: str = "",
                                service_git_branch: str = "",
                                service_boot_script: str = "",
                                debug_mode: bool = False,
                                service_id: str = None,
                                is_creation_mode: bool = False,
                                display_name: str = None,
                                description: str = None,
                                kubernetes_models=None,
                                **kwargs) -> str:
    """
    kubectl apply
    :param project_id:
    :param application_id:
    :param service_level:
    :param version:
    :param insecure_host:
    :param insecure_port:
    :param replicas_default:
    :param replicas_minimum:
    :param replicas_maximum:
    :param autoscale_cpu_threshold:
    :param policy_max_surge:
    :param policy_max_unavailable:
    :param policy_wait_seconds:
    :param container_image:
    :param resource_request_cpu:
    :param resource_request_memory:
    :param resource_limit_cpu:
    :param resource_limit_memory:
    :param commit_message:
    :param service_model_assignment:
    :param service_git_url:
    :param service_git_branch:
    :param service_boot_script:
    :param debug_mode:
    :param service_id:
    :param is_creation_mode:
    :param display_name:
    :param description:
    :param kubernetes_models:
    :param kwargs:
    :return:
    """
    __num_retry = 5
    progress_deadline_seconds = \
        int(__num_retry*policy_wait_seconds*replicas_maximum/(policy_max_surge+policy_max_unavailable))
    if service_id is None:
        is_creation_mode = True
        service_id = uuid.uuid4().hex
    if kubernetes_models is None:
        kubernetes_models = db.session.query(KubernetesModel).filter(
            KubernetesModel.project_id == project_id).all()
    data_server_model: DataServerModel = db.session.query(
        DataServerModel).filter(
            DataServerModel.project_id == project_id).first_or_404()
    application_model: ApplicationModel = db.session.query(
        ApplicationModel).filter(
            ApplicationModel.application_id == application_id).first_or_404()
    application_name = application_model.application_name
    model_model: ModelModel = db.session.query(ModelModel).filter(
        ModelModel.model_id == service_model_assignment).first_or_404()

    from kubernetes import client
    try:
        git_secret = load_secret(project_id, application_id, service_level,
                                 GIT_SECRET_PREFIX)
    except:
        git_secret = None
    volume_mounts = dict()
    volumes = dict()
    if git_secret:
        connector_name = "sec-git-name"
        secret_name = "sec-{}-{}".format(GIT_SECRET_PREFIX, application_id)
        volume_mounts = {
            'volume_mounts': [
                client.V1VolumeMount(name=connector_name,
                                     mount_path=GIT_SSH_MOUNT_DIR,
                                     read_only=True)
            ]
        }
        volumes = {
            'volumes': [
                client.V1Volume(name=connector_name,
                                secret=client.V1SecretVolumeSource(
                                    secret_name=secret_name,
                                    items=[
                                        client.V1KeyToPath(key=GIT_ID_RSA,
                                                           path=GIT_ID_RSA,
                                                           mode=GIT_SSH_MODE),
                                        client.V1KeyToPath(key=GIT_CONFIG,
                                                           path=GIT_CONFIG,
                                                           mode=GIT_SSH_MODE)
                                    ]))
            ]
        }

    for kubernetes_model in kubernetes_models:
        full_config_path = get_full_config_path(kubernetes_model.config_path)
        from kubernetes import config
        config.load_kube_config(full_config_path)

        pod_env = [
            client.V1EnvVar(name="REKCURD_SERVICE_UPDATE_FLAG",
                            value=commit_message),
            client.V1EnvVar(name="REKCURD_KUBERNETES_MODE", value="True"),
            client.V1EnvVar(name="REKCURD_DEBUG_MODE", value=str(debug_mode)),
            client.V1EnvVar(name="REKCURD_APPLICATION_NAME",
                            value=application_name),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_HOST",
                            value=insecure_host),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_PORT",
                            value=str(insecure_port)),
            client.V1EnvVar(name="REKCURD_SERVICE_ID", value=service_id),
            client.V1EnvVar(name="REKCURD_SERVICE_LEVEL", value=service_level),
            client.V1EnvVar(name="REKCURD_GRPC_PROTO_VERSION", value=version),
            client.V1EnvVar(name="REKCURD_MODEL_MODE",
                            value=data_server_model.data_server_mode.value),
            client.V1EnvVar(name="REKCURD_MODEL_FILE_PATH",
                            value=model_model.filepath),
            client.V1EnvVar(name="REKCURD_CEPH_ACCESS_KEY",
                            value=str(data_server_model.ceph_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_SECRET_KEY",
                            value=str(data_server_model.ceph_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_HOST",
                            value=str(data_server_model.ceph_host or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_PORT",
                            value=str(data_server_model.ceph_port or "1234")),
            client.V1EnvVar(name="REKCURD_CEPH_IS_SECURE",
                            value=str(data_server_model.ceph_is_secure
                                      or "False")),
            client.V1EnvVar(name="REKCURD_CEPH_BUCKET_NAME",
                            value=str(data_server_model.ceph_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_ACCESS_KEY",
                            value=str(data_server_model.aws_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_SECRET_KEY",
                            value=str(data_server_model.aws_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_BUCKET_NAME",
                            value=str(data_server_model.aws_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_ACCESS_KEY",
                            value=str(data_server_model.gcs_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_SECRET_KEY",
                            value=str(data_server_model.gcs_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_BUCKET_NAME",
                            value=str(data_server_model.gcs_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_URL",
                            value=service_git_url),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_BRANCH",
                            value=service_git_branch),
            client.V1EnvVar(name="REKCURD_SERVICE_BOOT_SHELL",
                            value=service_boot_script),
        ]
        """Namespace registration."""
        core_vi_api = client.CoreV1Api()
        try:
            core_vi_api.read_namespace(name=service_level)
        except:
            api.logger.info("\"{}\" namespace created".format(service_level))
            v1_namespace = client.V1Namespace(
                api_version="v1",
                kind="Namespace",
                metadata=client.V1ObjectMeta(name=service_level))
            core_vi_api.create_namespace(body=v1_namespace)
        """Create/patch Deployment."""
        v1_deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name="deploy-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1DeploymentSpec(
                min_ready_seconds=policy_wait_seconds,
                progress_deadline_seconds=progress_deadline_seconds,
                replicas=replicas_default,
                revision_history_limit=3,
                selector=client.V1LabelSelector(
                    match_labels={"sel": service_id}),
                strategy=client.V1DeploymentStrategy(
                    type="RollingUpdate",
                    rolling_update=client.V1RollingUpdateDeployment(
                        max_surge=policy_max_surge,
                        max_unavailable=policy_max_unavailable)),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        labels={
                            "rekcurd-worker": "True",
                            "id": application_id,
                            "name": application_name,
                            "sel": service_id
                        }),
                    spec=client.V1PodSpec(affinity=client.V1Affinity(
                        pod_anti_affinity=client.V1PodAntiAffinity(
                            preferred_during_scheduling_ignored_during_execution
                            =[
                                client.V1WeightedPodAffinityTerm(
                                    pod_affinity_term=client.V1PodAffinityTerm(
                                        label_selector=client.
                                        V1LabelSelector(match_expressions=[
                                            client.V1LabelSelectorRequirement(
                                                key="id",
                                                operator="In",
                                                values=[service_id])
                                        ]),
                                        topology_key="kubernetes.io/hostname"),
                                    weight=100)
                            ])),
                                          containers=[
                                              client.V1Container(
                                                  env=pod_env,
                                                  image=container_image,
                                                  image_pull_policy="Always",
                                                  name=service_id,
                                                  ports=[
                                                      client.V1ContainerPort(
                                                          container_port=
                                                          insecure_port)
                                                  ],
                                                  resources=client.
                                                  V1ResourceRequirements(
                                                      limits={
                                                          "cpu":
                                                          str(resource_limit_cpu
                                                              ),
                                                          "memory":
                                                          resource_limit_memory
                                                      },
                                                      requests={
                                                          "cpu":
                                                          str(resource_request_cpu
                                                              ),
                                                          "memory":
                                                          resource_request_memory
                                                      }),
                                                  security_context=client.
                                                  V1SecurityContext(
                                                      privileged=True),
                                                  **volume_mounts)
                                          ],
                                          node_selector={
                                              "host": service_level
                                          },
                                          **volumes))))
        apps_v1_api = client.AppsV1Api()
        if is_creation_mode:
            api.logger.info("Deployment created.")
            apps_v1_api.create_namespaced_deployment(body=v1_deployment,
                                                     namespace=service_level)
        else:
            api.logger.info("Deployment patched.")
            apps_v1_api.patch_namespaced_deployment(
                body=v1_deployment,
                name="deploy-{0}".format(service_id),
                namespace=service_level)
        """Create/patch Service."""
        v1_service = client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=client.V1ObjectMeta(name="svc-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1ServiceSpec(ports=[
                client.V1ServicePort(name="grpc-backend",
                                     port=insecure_port,
                                     protocol="TCP",
                                     target_port=insecure_port)
            ],
                                      selector={"sel": service_id}))
        core_vi_api = client.CoreV1Api()
        if is_creation_mode:
            api.logger.info("Service created.")
            core_vi_api.create_namespaced_service(namespace=service_level,
                                                  body=v1_service)
        else:
            api.logger.info("Service patched.")
            core_vi_api.patch_namespaced_service(
                namespace=service_level,
                name="svc-{0}".format(service_id),
                body=v1_service)
        """Create/patch Autoscaler."""
        v1_horizontal_pod_autoscaler = client.V1HorizontalPodAutoscaler(
            api_version="autoscaling/v1",
            kind="HorizontalPodAutoscaler",
            metadata=client.V1ObjectMeta(name="hpa-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1HorizontalPodAutoscalerSpec(
                max_replicas=replicas_maximum,
                min_replicas=replicas_minimum,
                scale_target_ref=client.V1CrossVersionObjectReference(
                    api_version="apps/v1",
                    kind="Deployment",
                    name="deploy-{0}".format(service_id)),
                target_cpu_utilization_percentage=autoscale_cpu_threshold))
        autoscaling_v1_api = client.AutoscalingV1Api()
        if is_creation_mode:
            api.logger.info("Autoscaler created.")
            autoscaling_v1_api.create_namespaced_horizontal_pod_autoscaler(
                namespace=service_level, body=v1_horizontal_pod_autoscaler)
        else:
            api.logger.info("Autoscaler patched.")
            autoscaling_v1_api.patch_namespaced_horizontal_pod_autoscaler(
                namespace=service_level,
                name="hpa-{0}".format(service_id),
                body=v1_horizontal_pod_autoscaler)
        """Create Istio ingress if this is the first application."""
        custom_object_api = client.CustomObjectsApi()
        try:
            custom_object_api.get_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                name="ing-vs-{0}".format(application_id),
            )
        except:
            ingress_virtual_service_body = {
                "apiVersion": "networking.istio.io/v1alpha3",
                "kind": "VirtualService",
                "metadata": {
                    "labels": {
                        "rekcurd-worker": "True",
                        "id": application_id,
                        "name": application_name
                    },
                    "name": "ing-vs-{0}".format(application_id),
                    "namespace": service_level
                },
                "spec": {
                    "hosts": ["*"],
                    "gateways": ["rekcurd-ingress-gateway"],
                    "http": [{
                        "match": [{
                            "headers": {
                                "x-rekcurd-application-name": {
                                    "exact": application_name
                                },
                                "x-rekcurd-sevice-level": {
                                    "exact": service_level
                                },
                                "x-rekcurd-grpc-version": {
                                    "exact": version
                                },
                            }
                        }],
                        "route": [{
                            "destination": {
                                "port": {
                                    "number": insecure_port
                                },
                                "host": "svc-{0}".format(service_id)
                            },
                            "weight": 100
                        }],
                        "retries": {
                            "attempts": 25,
                            "perTryTimeout": "1s"
                        }
                    }]
                }
            }
            api.logger.info("Istio created.")
            custom_object_api.create_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                body=ingress_virtual_service_body)
        """Add service model."""
        if is_creation_mode:
            if display_name is None:
                display_name = "{0}-{1}".format(service_level, service_id)
            service_model = ServiceModel(service_id=service_id,
                                         application_id=application_id,
                                         display_name=display_name,
                                         description=description,
                                         service_level=service_level,
                                         version=version,
                                         model_id=service_model_assignment,
                                         insecure_host=insecure_host,
                                         insecure_port=insecure_port)
            db.session.add(service_model)
            db.session.flush()
    """Finish."""
    return service_id
Ejemplo n.º 10
0
def get_statefulset_object(cluster_object):
    name = cluster_object['metadata']['name']
    namespace = cluster_object['metadata']['namespace']

    try:
        replicas = cluster_object['spec']['mongodb']['replicas']
    except KeyError:
        replicas = 3

    try:
        mongodb_limit_cpu = \
            cluster_object['spec']['mongodb']['mongodb_limit_cpu']
    except KeyError:
        mongodb_limit_cpu = '100m'

    try:
        mongodb_limit_memory = \
            cluster_object['spec']['mongodb']['mongodb_limit_memory']
    except KeyError:
        mongodb_limit_memory = '64Mi'

    try:
        hard_pod_anti_affinity = \
            cluster_object['spec']['mongodb']['hard_pod_anti_affinity']
    except KeyError:
        hard_pod_anti_affinity = True

    statefulset = client.V1beta1StatefulSet()

    # Metadata
    statefulset.metadata = client.V1ObjectMeta(
        name=name, namespace=namespace, labels=get_default_labels(name=name))

    # Spec
    statefulset.spec = client.V1beta1StatefulSetSpec(
        replicas=replicas,
        service_name=name,
        template=client.V1PodTemplateSpec())

    statefulset.spec.template.metadata = client.V1ObjectMeta(
        labels=get_default_labels(name=name))

    statefulset.spec.template.spec = client.V1PodSpec(containers=[])

    pod_affinity_term = client.V1PodAffinityTerm(
        topology_key='kubernetes.io/hostname',
        label_selector=client.V1LabelSelector(match_expressions=[
            client.V1LabelSelectorRequirement(
                key='cluster', operator='In', values=[name])
        ]))

    pod_anti_affinity = client.V1PodAntiAffinity(
        required_during_scheduling_ignored_during_execution=[
            pod_affinity_term
        ])

    if not hard_pod_anti_affinity:
        pod_anti_affinity = client.V1PodAntiAffinity(
            preferred_during_scheduling_ignored_during_execution=[
                client.V1WeightedPodAffinityTerm(
                    weight=100, pod_affinity_term=pod_affinity_term)
            ])

    statefulset.spec.template.spec.affinity = client.V1Affinity(
        pod_anti_affinity=pod_anti_affinity)

    # MongoDB container
    mongodb_port = client.V1ContainerPort(name='mongodb',
                                          container_port=27017,
                                          protocol='TCP')
    mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod')
    mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data',
                                                    read_only=False,
                                                    mount_path='/data/db')
    mongodb_resources = client.V1ResourceRequirements(limits={
        'cpu':
        mongodb_limit_cpu,
        'memory':
        mongodb_limit_memory
    },
                                                      requests={
                                                          'cpu':
                                                          mongodb_limit_cpu,
                                                          'memory':
                                                          mongodb_limit_memory
                                                      })
    mongodb_container = client.V1Container(
        name='mongod',
        env=[
            client.V1EnvVar(
                name='POD_IP',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='status.podIP')))
        ],
        command=[
            'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL',
            '--clusterAuthMode', 'x509', '--sslPEMKeyFile',
            '/etc/ssl/mongod/mongod.pem', '--sslCAFile',
            '/etc/ssl/mongod/ca.pem', '--bind_ip', '127.0.0.1,$(POD_IP)'
        ],
        image='mongo:3.6.4',
        ports=[mongodb_port],
        volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount],
        resources=mongodb_resources)

    # Metrics container
    metrics_port = client.V1ContainerPort(name='metrics',
                                          container_port=9001,
                                          protocol='TCP')
    metrics_resources = client.V1ResourceRequirements(limits={
        'cpu': '50m',
        'memory': '16Mi'
    },
                                                      requests={
                                                          'cpu': '50m',
                                                          'memory': '16Mi'
                                                      })
    metrics_secret_name = '{}-monitoring-credentials'.format(name)
    metrics_username_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_USERNAME',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='username')))
    metrics_password_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_PASSWORD',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='password')))
    metrics_container = client.V1Container(
        name='prometheus-exporter',
        image='quay.io/kubestack/prometheus-mongodb-exporter:latest',
        command=[
            '/bin/sh', '-c',
            '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem'
        ],  # flake8: noqa
        ports=[metrics_port],
        resources=metrics_resources,
        volume_mounts=[mongodb_tls_volumemount],
        env=[metrics_username_env_var, metrics_password_env_var])

    statefulset.spec.template.spec.containers = [
        mongodb_container, metrics_container
    ]

    ca_volume = client.V1Volume(name='mongo-ca',
                                secret=client.V1SecretVolumeSource(
                                    secret_name='{}-ca'.format(name),
                                    items=[
                                        client.V1KeyToPath(key='ca.pem',
                                                           path='ca.pem'),
                                        client.V1KeyToPath(key='ca-key.pem',
                                                           path='ca-key.pem')
                                    ]))
    tls_volume = client.V1Volume(name='mongo-tls',
                                 empty_dir=client.V1EmptyDirVolumeSource())
    data_volume = client.V1Volume(name='mongo-data',
                                  empty_dir=client.V1EmptyDirVolumeSource())
    statefulset.spec.template.spec.volumes = [
        ca_volume, tls_volume, data_volume
    ]

    # Init container
    tls_init_ca_volumemount = client.V1VolumeMount(
        name='mongo-ca', read_only=True, mount_path='/etc/ssl/mongod-ca')
    tls_init_mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=False, mount_path='/etc/ssl/mongod')
    tls_init_container = client.V1Container(
        name="cert-init",
        image="quay.io/kubestack/mongodb-init:latest",
        volume_mounts=[
            tls_init_ca_volumemount, tls_init_mongodb_tls_volumemount
        ],
        env=[
            client.V1EnvVar(
                name='METADATA_NAME',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.name'))),
            client.V1EnvVar(
                name='NAMESPACE',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.namespace')))
        ],
        command=["ansible-playbook", "member-cert.yml"])

    statefulset.spec.template.spec.init_containers = [tls_init_container]

    return statefulset