예제 #1
0
    def get_affinity(self):
        """Determine the affinity term for the build pod.

        There are a two affinity strategies, which one is used depends on how
        the BinderHub is configured.

        In the default setup the affinity of each build pod is an "anti-affinity"
        which causes the pods to prefer to schedule on separate nodes.

        In a setup with docker-in-docker enabled pods for a particular
        repository prefer to schedule on the same node in order to reuse the
        docker layer cache of previous builds.
        """
        resp = self.api.list_namespaced_pod(
            self.namespace,
            label_selector="component=dind,app=binder",
            _request_timeout=KUBE_REQUEST_TIMEOUT,
            _preload_content=False,
        )
        dind_pods = json.loads(resp.read())

        if self.sticky_builds and dind_pods:
            node_names = [
                pod["spec"]["nodeName"] for pod in dind_pods["items"]
            ]
            ranked_nodes = rendezvous_rank(node_names, self.repo_url)
            best_node_name = ranked_nodes[0]

            affinity = client.V1Affinity(node_affinity=client.V1NodeAffinity(
                preferred_during_scheduling_ignored_during_execution=[
                    client.V1PreferredSchedulingTerm(
                        weight=100,
                        preference=client.V1NodeSelectorTerm(
                            match_expressions=[
                                client.V1NodeSelectorRequirement(
                                    key="kubernetes.io/hostname",
                                    operator="In",
                                    values=[best_node_name],
                                )
                            ]),
                    )
                ]))

        else:
            affinity = client.V1Affinity(
                pod_anti_affinity=client.V1PodAntiAffinity(
                    preferred_during_scheduling_ignored_during_execution=[
                        client.V1WeightedPodAffinityTerm(
                            weight=100,
                            pod_affinity_term=client.V1PodAffinityTerm(
                                topology_key="kubernetes.io/hostname",
                                label_selector=client.V1LabelSelector(
                                    match_labels=dict(
                                        component=self._component_label)),
                            ),
                        )
                    ]))

        return affinity
예제 #2
0
    def test_pod_spec(self):
        cluster_spec = ClusterSpec(cluster_spec_json=test_spec)
        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, "other")

        self.assertEqual(
            pod.metadata.labels["elasticdl.org/app-name"], "elasticdl"
        )
        self.assertEqual(pod.metadata.labels["elasticdl.org/site"], "hangzhou")
        self.assertEqual(
            pod.metadata.annotations["tag.elasticdl.org/optimization"],
            "enabled",
        )
        expected_tolerations = [
            client.V1Toleration(
                effect="NoSchedule",
                key="elasticdl.org/logic-pool",
                operator="Equal",
                value="ElasticDL",
            )
        ]
        self.assertEqual(pod.spec.tolerations, expected_tolerations)
        match_expressions = [
            client.V1NodeSelectorRequirement(
                key="elasticdl.org/logic-pool",
                operator="In",
                values=["ElasticDL"],
            )
        ]

        expected_affinity = client.V1Affinity(
            node_affinity=client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=(
                    client.V1NodeSelector(
                        node_selector_terms=[
                            client.V1NodeSelectorTerm(
                                match_expressions=match_expressions
                            )
                        ]
                    )
                )
            )
        )
        self.assertEqual(pod.spec.affinity, expected_affinity)

        expected_env = []
        expected_env.append(client.V1EnvVar(name="LOG_ENABLED", value="true"))
        self.assertEqual(pod.spec.containers[0].env, expected_env)

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.MASTER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Sun")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.WORKER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Earth")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.PS)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Moon")
예제 #3
0
 def k8s_object(self):
     depl = client.AppsV1beta1Deployment(
         metadata=client.V1ObjectMeta(
             name=self.name,
             labels=self.labels
         ),
         spec=client.AppsV1beta1DeploymentSpec(
             strategy=client.AppsV1beta1DeploymentStrategy(
                 type='RollingUpdate',
                 rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                     max_surge=0
                 )
             ),
             template=client.V1PodTemplateSpec(
                 metadata=client.V1ObjectMeta(
                     labels=self.template_labels),
                 spec=client.V1PodSpec(
                     affinity=client.V1Affinity(
                         pod_anti_affinity=client.V1PodAntiAffinity(
                             required_during_scheduling_ignored_during_execution=[
                                 {"topologyKey": e2e_globals.ANTI_AFFINITY_KEY},
                             ]
                         ),
                     ),
                     volumes=[client.V1Volume(
                         name='data',
                         config_map=client.V1ConfigMapVolumeSource(
                             name=self.cfgmap_name)
                     )]
                     ,
                     containers=[client.V1Container(
                         image=e2e_globals.TEST_DEPLOYMENT_IMAGE,
                         name="testapp",
                         volume_mounts=[client.V1VolumeMount(
                             name='data',
                             mount_path='/usr/share/nginx/html')
                         ],
                         ports=[client.V1ContainerPort(
                             container_port=e2e_globals.TEST_CONTAINER_PORT)],
                         resources=client.V1ResourceRequirements(
                             requests={
                                 'cpu': '1m',
                                 'memory': '1Mi',
                             },
                         ),
                     )])),
             replicas=self.replicas)
     )
     if self.vol_claim_name is not None:
         volume = client.V1Volume(name='test-volume',
                                  persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                                      claim_name=self.vol_claim_name))
         mount = client.V1VolumeMount(
             name='test-volume',
             mount_path='/usr/blank'
         )
         depl.spec.template.spec.containers[0].volume_mounts.append(mount)
         depl.spec.template.spec.volumes.append(volume)
     return depl
예제 #4
0
    def deploy(self, image, name, ns, port, replicas=1, svc_type="NodePort", traffic_policy="Local", cluster_ip=None, ipv6=False):
        """
        Creates a deployment and corresponding service with the given
        parameters.
        """
        # Use a pod anti-affinity so that the scheduler prefers deploying the
        # pods on different nodes. This makes our tests more reliable, since
        # some tests expect pods to be scheduled to different nodes.
        selector = {'matchLabels': {'app': name}}
        terms = [client.V1WeightedPodAffinityTerm(
            pod_affinity_term=client.V1PodAffinityTerm(
                label_selector=selector,
                topology_key="kubernetes.io/hostname"),
            weight=100,
            )]
        anti_aff = client.V1PodAntiAffinity(
                preferred_during_scheduling_ignored_during_execution=terms)

        # Run a deployment with <replicas> copies of <image>, with the
        # pods labelled with "app": <name>.
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=name),
            spec=client.V1DeploymentSpec(
                replicas=replicas,
                selector=selector,
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(labels={"app": name}),
                    spec=client.V1PodSpec(
                        affinity=client.V1Affinity(pod_anti_affinity=anti_aff),
                        containers=[
                          client.V1Container(name=name,
                                             image=image,
                                             ports=[client.V1ContainerPort(container_port=port)]),
                    ]))))

        # Create the deployment.
        api_response = client.AppsV1Api().create_namespaced_deployment(
            body=deployment,
            namespace=ns)
        logger.debug("Deployment created. status='%s'" % str(api_response.status))

        # Create a service called <name> whose endpoints are the pods
        # with "app": <name>; i.e. those just created above.
        self.create_service(name, name, ns, port, svc_type, traffic_policy, ipv6=ipv6)
예제 #5
0
def create_affinity(affinity):
    affinities = []
    if affinity is None:
        return None
    elif isinstance(affinity, str):
        affinities = [{'mode': affinity}]
    elif isinstance(affinity, dict):
        affinities = [affinity]
    elif isinstance(affinity, list):
        pass
    else:
        raise ValueError('Illegal affinity definition')

    # fill with defaults
    affinities = [parse_affinity_item(item) for item in affinities]

    # sort into required/preferred, affinity/anti-affinity
    stack_req, stack_pref = [], []
    spread_req, spread_pref = [], []
    for item in affinities:
        term = create_affinity_term(item)
        if item['mode'] == 'stack':
            if item['required']:
                stack_req.append(term)
            else:
                stack_pref.append(term)
        elif item['mode'] == 'spread':
            if item['required']:
                spread_req.append(term)
            else:
                spread_pref.append(term)

    return client.V1Affinity(
        pod_affinity=client.V1PodAffinity(
            required_during_scheduling_ignored_during_execution=stack_req,
            preferred_during_scheduling_ignored_during_execution=stack_pref,
        ) if len(stack_req) + len(stack_pref) > 0 else None,
        pod_anti_affinity=client.V1PodAntiAffinity(
            required_during_scheduling_ignored_during_execution=spread_req,
            preferred_during_scheduling_ignored_during_execution=spread_pref,
        ) if len(spread_req) + len(spread_pref) > 0 else None,
    )
def create_deployment_old(config_file):
    """
    Create IBM Spectrum Scale CSI Operator deployment object in operator namespace using
    deployment_operator_image_for_crd and deployment_driver_image_for_crd parameters from
    config.json file

    Args:
        param1: config_file - configuration json file

    Returns:
       None

    Raises:
        Raises an exception on kubernetes client api failure and asserts

    """

    deployment_apps_api_instance = client.AppsV1Api()

    deployment_labels = {
        "app.kubernetes.io/instance": "ibm-spectrum-scale-csi-operator",
        "app.kubernetes.io/managed-by": "ibm-spectrum-scale-csi-operator",
        "app.kubernetes.io/name": "ibm-spectrum-scale-csi-operator",
        "product": "ibm-spectrum-scale-csi",
        "release": "ibm-spectrum-scale-csi-operator"
    }

    deployment_annotations = {
        "productID": "ibm-spectrum-scale-csi-operator",
        "productName": "IBM Spectrum Scale CSI Operator",
        "productVersion": "2.0.0"
    }

    deployment_metadata = client.V1ObjectMeta(
        name="ibm-spectrum-scale-csi-operator",
        labels=deployment_labels,
        namespace=namespace_value)

    deployment_selector = client.V1LabelSelector(
        match_labels={
            "app.kubernetes.io/name": "ibm-spectrum-scale-csi-operator"
        })

    podtemplate_metadata = client.V1ObjectMeta(
        labels=deployment_labels, annotations=deployment_annotations)

    pod_affinity = client.V1Affinity(node_affinity=client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=client.
        V1NodeSelector(node_selector_terms=[
            client.V1NodeSelectorTerm(match_expressions=[
                client.V1NodeSelectorRequirement(key="beta.kubernetes.io/arch",
                                                 operator="Exists")
            ])
        ])))
    ansible_pod_container = client.V1Container(
        image=config_file["deployment_operator_image_for_crd"],
        command=[
            "/usr/local/bin/ao-logs", "/tmp/ansible-operator/runner", "stdout"
        ],
        liveness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=10,
            period_seconds=30),
        readiness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=3,
            period_seconds=1),
        name="ansible",
        image_pull_policy="IfNotPresent",
        security_context=client.V1SecurityContext(
            capabilities=client.V1Capabilities(drop=["ALL"])),
        volume_mounts=[
            client.V1VolumeMount(mount_path="/tmp/ansible-operator/runner",
                                 name="runner",
                                 read_only=True)
        ],
        env=[
            client.V1EnvVar(
                name="CSI_DRIVER_IMAGE",
                value=config_file["deployment_driver_image_for_crd"])
        ])

    operator_pod_container = client.V1Container(
        image=config_file["deployment_operator_image_for_crd"],
        name="operator",
        image_pull_policy="IfNotPresent",
        liveness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=10,
            period_seconds=30),
        readiness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=3,
            period_seconds=1),
        security_context=client.V1SecurityContext(
            capabilities=client.V1Capabilities(drop=["ALL"])),
        env=[
            client.V1EnvVar(name="WATCH_NAMESPACE",
                            value_from=client.V1EnvVarSource(
                                field_ref=client.V1ObjectFieldSelector(
                                    field_path="metadata.namespace"))),
            client.V1EnvVar(name="POD_NAME",
                            value_from=client.V1EnvVarSource(
                                field_ref=client.V1ObjectFieldSelector(
                                    field_path="metadata.name"))),
            client.V1EnvVar(name="OPERATOR_NAME",
                            value="ibm-spectrum-scale-csi-operator"),
            client.V1EnvVar(
                name="CSI_DRIVER_IMAGE",
                value=config_file["deployment_driver_image_for_crd"])
        ],
        volume_mounts=[
            client.V1VolumeMount(mount_path="/tmp/ansible-operator/runner",
                                 name="runner")
        ])
    pod_spec = client.V1PodSpec(
        affinity=pod_affinity,
        containers=[ansible_pod_container, operator_pod_container],
        service_account_name="ibm-spectrum-scale-csi-operator",
        volumes=[
            client.V1Volume(
                empty_dir=client.V1EmptyDirVolumeSource(medium="Memory"),
                name="runner")
        ])

    podtemplate_spec = client.V1PodTemplateSpec(metadata=podtemplate_metadata,
                                                spec=pod_spec)

    deployment_spec = client.V1DeploymentSpec(replicas=1,
                                              selector=deployment_selector,
                                              template=podtemplate_spec)

    body_dep = client.V1Deployment(kind='Deployment',
                                   api_version='apps/v1',
                                   metadata=deployment_metadata,
                                   spec=deployment_spec)

    try:
        LOGGER.info("creating deployment for operator")
        deployment_apps_api_response = deployment_apps_api_instance.create_namespaced_deployment(
            namespace=namespace_value, body=body_dep)
        LOGGER.debug(str(deployment_apps_api_response))
    except ApiException as e:
        LOGGER.error(
            f"Exception when calling RbacAuthorizationV1Api->create_namespaced_deployment: {e}"
        )
        assert False
예제 #7
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     containers = []
     volumes = []
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/docker/logs', name='logs'))
     volumes.append(
         client.V1Volume(name='logs',
                         host_path=client.V1HostPathVolumeSource(
                             path='/opt/logs', type='DirectoryOrCreate')))
     if self.mounts:
         for path in self.mounts:
             volume_mounts.append(
                 client.V1VolumeMount(mount_path=path,
                                      name=self.mounts[path]))
             volumes.append(
                 client.V1Volume(name=self.mounts[path],
                                 host_path=client.V1HostPathVolumeSource(
                                     path=path, type='DirectoryOrCreate')))
     liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                     tcp_socket=client.V1TCPSocketAction(
                                         port=int(self.container_port[0])))
     readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                      tcp_socket=client.V1TCPSocketAction(
                                          port=int(self.container_port[0])))
     if self.healthcheck:
         liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                         http_get=client.V1HTTPGetAction(
                                             path=self.healthcheck,
                                             port=int(
                                                 self.container_port[0])))
         readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                          http_get=client.V1HTTPGetAction(
                                              path=self.healthcheck,
                                              port=int(
                                                  self.container_port[0])))
     Env = [
         client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
         client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8'),
         client.V1EnvVar(name='POD_NAME',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='metadata.name'))),
         client.V1EnvVar(name='POD_IP',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='status.podIP'))),
     ]
     container = client.V1Container(
         name=self.dm_name,
         image=self.image,
         ports=[
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ],
         image_pull_policy='Always',
         env=Env,
         resources=client.V1ResourceRequirements(limits=self.re_limits,
                                                 requests=self.re_requests),
         volume_mounts=volume_mounts,
         liveness_probe=liveness_probe,
         readiness_probe=readiness_probe)
     containers.append(container)
     if self.sidecar:
         sidecar_container = client.V1Container(
             name='sidecar-%s' % self.dm_name,
             image=self.sidecar,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts)
         containers.append(sidecar_container)
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=containers,
             image_pull_secrets=[secrets],
             volumes=volumes,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='project',
                                     operator='In',
                                     values=['moji'])
                             ]),
                         weight=30),
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='deploy',
                                     operator='In',
                                     values=[self.dm_name])
                             ]),
                         weight=70)
                 ]))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
예제 #8
0
    def submit(self):
        """Submit a image spec to openshift's s2i and wait for completion """
        volume_mounts = [
            client.V1VolumeMount(mount_path="/var/run/docker.sock",
                                 name="docker-socket")
        ]
        docker_socket_path = urlparse(self.docker_host).path
        volumes = [
            client.V1Volume(name="docker-socket",
                            host_path=client.V1HostPathVolumeSource(
                                path=docker_socket_path, type='Socket'))
        ]

        if self.push_secret:
            volume_mounts.append(
                client.V1VolumeMount(mount_path="/root/.docker",
                                     name='docker-push-secret'))
            volumes.append(
                client.V1Volume(name='docker-push-secret',
                                secret=client.V1SecretVolumeSource(
                                    secret_name=self.push_secret)))

        env = []
        if self.git_credentials:
            env.append(
                client.V1EnvVar(name='GIT_CREDENTIAL_ENV',
                                value=self.git_credentials))

        component_label = "binderhub-build"
        self.pod = client.V1Pod(
            metadata=client.V1ObjectMeta(
                name=self.name,
                labels={
                    "name": self.name,
                    "component": component_label,
                },
                annotations={
                    "binder-repo": self.repo_url,
                },
            ),
            spec=client.V1PodSpec(
                containers=[
                    client.V1Container(
                        image=self.build_image,
                        name="builder",
                        args=self.get_cmd(),
                        volume_mounts=volume_mounts,
                        resources=client.V1ResourceRequirements(
                            limits={'memory': self.memory_limit},
                            requests={'memory': self.memory_limit}),
                        env=env)
                ],
                tolerations=[
                    client.V1Toleration(
                        key='hub.jupyter.org/dedicated',
                        operator='Equal',
                        value='user',
                        effect='NoSchedule',
                    ),
                    # GKE currently does not permit creating taints on a node pool
                    # with a `/` in the key field
                    client.V1Toleration(
                        key='hub.jupyter.org_dedicated',
                        operator='Equal',
                        value='user',
                        effect='NoSchedule',
                    ),
                ],
                node_selector=self.node_selector,
                volumes=volumes,
                restart_policy="Never",
                affinity=client.V1Affinity(
                    pod_anti_affinity=client.V1PodAntiAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                weight=100,
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    topology_key="kubernetes.io/hostname",
                                    label_selector=client.V1LabelSelector(
                                        match_labels=dict(
                                            component=component_label))))
                        ]))))

        try:
            ret = self.api.create_namespaced_pod(self.namespace, self.pod)
        except client.rest.ApiException as e:
            if e.status == 409:
                # Someone else created it!
                app_log.info("Build %s already running", self.name)
                pass
            else:
                raise
        else:
            app_log.info("Started build %s", self.name)

        app_log.info("Watching build pod %s", self.name)
        while not self.stop_event.is_set():
            w = watch.Watch()
            try:
                for f in w.stream(
                        self.api.list_namespaced_pod,
                        self.namespace,
                        label_selector="name={}".format(self.name),
                        timeout_seconds=30,
                ):
                    if f['type'] == 'DELETED':
                        self.progress('pod.phasechange', 'Deleted')
                        return
                    self.pod = f['object']
                    if not self.stop_event.is_set():
                        self.progress('pod.phasechange', self.pod.status.phase)
                    if self.pod.status.phase == 'Succeeded':
                        self.cleanup()
                    elif self.pod.status.phase == 'Failed':
                        self.cleanup()
            except Exception as e:
                app_log.exception("Error in watch stream for %s", self.name)
                raise
            finally:
                w.stop()
            if self.stop_event.is_set():
                app_log.info("Stopping watch of %s", self.name)
                return
예제 #9
0
def clean_pod_template(pod_template, match_node_purpose="prefer", pod_type="worker"):
    """ Normalize pod template and check for type errors """
    if isinstance(pod_template, str):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a yaml filename then use "
            "KubeCluster.from_yaml"
        )
        raise TypeError(msg % pod_template)

    if isinstance(pod_template, dict):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a dictionary specification then use "
            "KubeCluster.from_dict"
        )
        raise TypeError(msg % str(pod_template))

    pod_template = copy.deepcopy(pod_template)

    # Make sure metadata / labels / env objects exist, so they can be modified
    # later without a lot of `is None` checks
    if pod_template.metadata is None:
        pod_template.metadata = client.V1ObjectMeta()
    if pod_template.metadata.labels is None:
        pod_template.metadata.labels = {}

    if pod_template.spec.containers[0].env is None:
        pod_template.spec.containers[0].env = []

    # add default tolerations
    tolerations = [
        client.V1Toleration(
            key="k8s.dask.org/dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
        # GKE currently does not permit creating taints on a node pool
        # with a `/` in the key field
        client.V1Toleration(
            key="k8s.dask.org_dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
    ]

    if pod_template.spec.tolerations is None:
        pod_template.spec.tolerations = tolerations
    else:
        pod_template.spec.tolerations.extend(tolerations)

    # add default node affinity to k8s.dask.org/node-purpose=worker
    if match_node_purpose != "ignore":
        # for readability
        affinity = pod_template.spec.affinity

        if affinity is None:
            affinity = client.V1Affinity()
        if affinity.node_affinity is None:
            affinity.node_affinity = client.V1NodeAffinity()

        # a common object for both a preferred and a required node affinity
        node_selector_term = client.V1NodeSelectorTerm(
            match_expressions=[
                client.V1NodeSelectorRequirement(
                    key="k8s.dask.org/node-purpose", operator="In", values=[pod_type]
                )
            ]
        )

        if match_node_purpose == "require":
            if (
                affinity.node_affinity.required_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.required_during_scheduling_ignored_during_execution = client.V1NodeSelector(
                    node_selector_terms=[]
                )
            affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.append(
                node_selector_term
            )
        elif match_node_purpose == "prefer":
            if (
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution = (
                    []
                )
            preferred_scheduling_terms = [
                client.V1PreferredSchedulingTerm(
                    preference=node_selector_term, weight=100
                )
            ]
            affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.extend(
                preferred_scheduling_terms
            )
        else:
            raise ValueError(
                'Attribute must be one of "ignore", "prefer", or "require".'
            )
        pod_template.spec.affinity = affinity

    return pod_template
예제 #10
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     containers = []
     volumes = []
     ports = []
     liveness_probe = None
     readiness_probe = None
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/docker/logs', name='logs'))
     volumes.append(
         client.V1Volume(name='logs',
                         host_path=client.V1HostPathVolumeSource(
                             path='/opt/logs', type='DirectoryOrCreate')))
     if self.mounts:
         for path in self.mounts:
             volume_mounts.append(
                 client.V1VolumeMount(mount_path=path,
                                      name=self.mounts[path]))
             volumes.append(
                 client.V1Volume(name=self.mounts[path],
                                 host_path=client.V1HostPathVolumeSource(
                                     path=path, type='DirectoryOrCreate')))
     if self.container_port:
         ports = [
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ]
         liveness_probe = client.V1Probe(
             initial_delay_seconds=15,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0])))
         readiness_probe = client.V1Probe(
             initial_delay_seconds=15,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0])))
         if self.healthcheck:
             liveness_probe = client.V1Probe(
                 initial_delay_seconds=15,
                 http_get=client.V1HTTPGetAction(
                     path=self.healthcheck,
                     port=int(self.container_port[0])))
             readiness_probe = client.V1Probe(
                 initial_delay_seconds=15,
                 http_get=client.V1HTTPGetAction(
                     path=self.healthcheck,
                     port=int(self.container_port[0])))
     Env = [
         client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
         client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8'),
         client.V1EnvVar(name='POD_NAME',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='metadata.name'))),
         client.V1EnvVar(name='POD_IP',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='status.podIP'))),
     ]
     container = client.V1Container(name=self.dm_name,
                                    image=self.image,
                                    ports=ports,
                                    image_pull_policy='Always',
                                    env=Env,
                                    resources=client.V1ResourceRequirements(
                                        limits=self.re_limits,
                                        requests=self.re_requests),
                                    volume_mounts=volume_mounts)
     if liveness_probe and readiness_probe:
         container = client.V1Container(
             name=self.dm_name,
             image=self.image,
             ports=ports,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts,
             liveness_probe=liveness_probe,
             readiness_probe=readiness_probe)
     containers.append(container)
     if self.sidecar:
         sidecar_container = client.V1Container(
             name='sidecar-%s' % self.dm_name,
             image=self.sidecar,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts)
         containers.append(sidecar_container)
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     preference_key = self.dm_name
     project_values = ['xxxx']
     host_aliases = []
     db_docker_hosts = db_op.docker_hosts
     values = db_docker_hosts.query.with_entities(
         db_docker_hosts.ip, db_docker_hosts.hostname).filter(
             and_(db_docker_hosts.deployment == self.dm_name,
                  db_docker_hosts.context == self.context)).all()
     db_op.DB.session.remove()
     if values:
         ips = []
         for value in values:
             try:
                 ip, hostname = value
                 key = "op_docker_hosts_%s" % ip
                 Redis.lpush(key, hostname)
                 ips.append(ip)
             except Exception as e:
                 logging.error(e)
         for ip in set(ips):
             try:
                 key = "op_docker_hosts_%s" % ip
                 if Redis.exists(key):
                     hostnames = Redis.lrange(key, 0, -1)
                     if hostnames:
                         host_aliases.append(
                             client.V1HostAlias(hostnames=hostnames, ip=ip))
                 Redis.delete(key)
             except Exception as e:
                 logging.error(e)
     if self.labels:
         if 'deploy' in self.labels:
             preference_key = self.labels['deploy']
         if 'project' in self.labels:
             project_values = [self.labels['project']]
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=containers,
             image_pull_secrets=[secrets],
             volumes=volumes,
             host_aliases=host_aliases,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key=preference_key,
                                     operator='In',
                                     values=['mark'])
                             ]),
                         weight=100)
                 ],
                 required_during_scheduling_ignored_during_execution=client.
                 V1NodeSelector(node_selector_terms=[
                     client.V1NodeSelectorTerm(match_expressions=[
                         client.V1NodeSelectorRequirement(
                             key='project',
                             operator='In',
                             values=project_values)
                     ])
                 ])))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
예제 #11
0
def update_deployment_v2(deploy_name,
                         namespace,
                         action,
                         image=None,
                         replicas=None,
                         toleration=None,
                         node_affinity=None,
                         pod_anti_affinity=None,
                         pod_affinity=None,
                         labels=None):
    current_app.logger.debug("命名空间:{},deploy_name: {}".format(
        namespace, deploy_name))
    deployment = get_deployment_by_name(namespace, deploy_name)
    # print(deployment)
    if (deployment == None):
        # return jsonify({"error": "1003", "msg": "找不到该deployment"})
        return error_with_status(error="找不到该deployment", msg="", code=1003)
    if action == "add_pod_anti_affinity":
        if not pod_anti_affinity:
            msg = "{}需要提供pod_anti_affinity".format(action)
            # return jsonify({"error":msg})
            return error_with_status(error="", msg=msg, code=1003)
        # 修复affinity为空的bug
        affinity = deployment.spec.template.spec.affinity
        if not affinity:
            affinity = client.V1Affinity(pod_anti_affinity=pod_anti_affinity)
            deployment.spec.template.spec.affinity = affinity
        # 修复affinity为空的bug
        else:
            print("pod_anti_affinity已经存在,使用更新模式")
            action = "update_affinity"
            deployment.spec.template.spec.affinity.pod_anti_affinity = pod_anti_affinity
    elif action == "delete_pod_anti_affinity":
        print("正在运行{}操作".format(action))
        affinity = deployment.spec.template.spec.affinity
        if not affinity:
            return simple_error_handle("还没设置亲和性")
        pod_anti_affinity = affinity.pod_anti_affinity
        if not pod_anti_affinity:
            return simple_error_handle("还没设置互斥调度")
        deployment.spec.template.spec.affinity.pod_anti_affinity = None
        print("删除互斥调度后")
        print(deployment.spec.template.spec.affinity)
    elif action == "add_toleration":
        print("正在运行{}操作".format(action))
        if not toleration:
            msg = "{}需要提供toleration".format(action)
            return jsonify({"error": msg})
        t = deployment.spec.template.spec.tolerations
        if t == None:
            t = []
        t.append(toleration)
        deployment.spec.template.spec.tolerations = t
    elif action == "add_node_affinity":
        # current_app.logger.debug(node_affinity)
        if not node_affinity:
            # return simple_error_handle("{}需要提供node_affinity".format(action))
            msg = "{}需要提供node_affinity".format(action)
            return simple_error_handle(msg=msg, code=1003)
        # 修复affinity为空的bug
        affinity = deployment.spec.template.spec.affinity
        current_app.logger.debug("添加前affinity:    " +
                                 json.dumps(affinity, cls=MyEncoder))
        current_app.logger.debug("即将添加node_affinity:    " +
                                 json.dumps(node_affinity, cls=MyEncoder))
        if not affinity:
            affinity = client.V1Affinity(node_affinity=node_affinity)
            deployment.spec.template.spec.affinity = affinity
        # 修复affinity为空的bug
        else:
            print("affinity已经存在,使用更新模式")
            action = "update_affinity"
            deployment.spec.template.spec.affinity.node_affinity = node_affinity
    elif action == "delete_node_affinity":
        current_app.logger.debug("正在运行{}操作".format(action))
        affinity = deployment.spec.template.spec.affinity
        if not affinity:
            return simple_error_handle("还没设置亲和性")
        node_affinity = affinity.node_affinity
        if not node_affinity:
            return simple_error_handle("还没设置互斥调度")
        deployment.spec.template.spec.affinity.node_affinity = None
        print("删除节点亲和性后")
        print(deployment.spec.template.spec.affinity)
    elif action == "delete_toleration":
        print("正在运行{}操作".format(action))
        if not toleration:
            msg = "{}需要提供toleration".format(action)
            return jsonify({"error": msg})
        t = deployment.spec.template.spec.tolerations
        # print("deployment {} toleration删除前:{}".format(deploy_name,t),type(t))
        if t == None:
            return jsonify(
                {"error": "deployment {} toleration为空".format(deploy_name)})
        print(type(toleration), toleration)
        try:
            i = t.index(toleration)
        except ValueError as e:
            print(e)
            return jsonify({"error": "没有此toleration"})
        deployment.spec.template.spec.tolerations.pop(i)
        # return jsonify({"info":i})

    elif action == "add_pod_affinity":
        pass
    elif action == "delete_pod_affinity":
        pass
    elif action == "update_replicas":
        deployment.spec.replicas = replicas
    elif action == "update_image":
        if not image:
            msg = "{}需要提供image".format(action)
            return jsonify({"error": msg})
        deployment.spec.template.spec.containers[0].image = image
    elif action == "add_labels":
        pass
    elif action == "delete_labels":
        pass
    else:
        return simple_error_handle("暂时不支持{}操作".format(action))
    try:
        print(action)
        if action == "delete_pod_anti_affinity" or action == "delete_node_affinity" or action == "update_affinity":
            print("正在执行替换")
            ResponseNotReady = client.AppsV1Api(
            ).replace_namespaced_deployment(name=deploy_name,
                                            namespace=namespace,
                                            body=deployment)
        else:
            ResponseNotReady = client.AppsV1Api().patch_namespaced_deployment(
                name=deploy_name, namespace=namespace, body=deployment)
    except ApiException as e:
        print(e)
        body = json.loads(e.body)
        msg = {
            "status": e.status,
            "reason": e.reason,
            "message": body['message']
        }
        return error_with_status(error="创建失败", msg=msg, status=1000)

    return jsonify({"ok": "deployment 执行{}成功".format(action)})
예제 #12
0
def apply_rekcurd_to_kubernetes(project_id: int,
                                application_id: str,
                                service_level: str,
                                version: str,
                                insecure_host: str,
                                insecure_port: int,
                                replicas_default: int,
                                replicas_minimum: int,
                                replicas_maximum: int,
                                autoscale_cpu_threshold: str,
                                policy_max_surge: int,
                                policy_max_unavailable: int,
                                policy_wait_seconds: int,
                                container_image: str,
                                resource_request_cpu: str,
                                resource_request_memory: str,
                                resource_limit_cpu: str,
                                resource_limit_memory: str,
                                commit_message: str,
                                service_model_assignment: int,
                                service_git_url: str = "",
                                service_git_branch: str = "",
                                service_boot_script: str = "",
                                debug_mode: bool = False,
                                service_id: str = None,
                                is_creation_mode: bool = False,
                                display_name: str = None,
                                description: str = None,
                                kubernetes_models=None,
                                **kwargs) -> str:
    """
    kubectl apply
    :param project_id:
    :param application_id:
    :param service_level:
    :param version:
    :param insecure_host:
    :param insecure_port:
    :param replicas_default:
    :param replicas_minimum:
    :param replicas_maximum:
    :param autoscale_cpu_threshold:
    :param policy_max_surge:
    :param policy_max_unavailable:
    :param policy_wait_seconds:
    :param container_image:
    :param resource_request_cpu:
    :param resource_request_memory:
    :param resource_limit_cpu:
    :param resource_limit_memory:
    :param commit_message:
    :param service_model_assignment:
    :param service_git_url:
    :param service_git_branch:
    :param service_boot_script:
    :param debug_mode:
    :param service_id:
    :param is_creation_mode:
    :param display_name:
    :param description:
    :param kubernetes_models:
    :param kwargs:
    :return:
    """
    __num_retry = 5
    progress_deadline_seconds = \
        int(__num_retry*policy_wait_seconds*replicas_maximum/(policy_max_surge+policy_max_unavailable))
    if service_id is None:
        is_creation_mode = True
        service_id = uuid.uuid4().hex
    if kubernetes_models is None:
        kubernetes_models = db.session.query(KubernetesModel).filter(
            KubernetesModel.project_id == project_id).all()
    data_server_model: DataServerModel = db.session.query(
        DataServerModel).filter(
            DataServerModel.project_id == project_id).first_or_404()
    application_model: ApplicationModel = db.session.query(
        ApplicationModel).filter(
            ApplicationModel.application_id == application_id).first_or_404()
    application_name = application_model.application_name
    model_model: ModelModel = db.session.query(ModelModel).filter(
        ModelModel.model_id == service_model_assignment).first_or_404()

    from kubernetes import client
    try:
        git_secret = load_secret(project_id, application_id, service_level,
                                 GIT_SECRET_PREFIX)
    except:
        git_secret = None
    volume_mounts = dict()
    volumes = dict()
    if git_secret:
        connector_name = "sec-git-name"
        secret_name = "sec-{}-{}".format(GIT_SECRET_PREFIX, application_id)
        volume_mounts = {
            'volume_mounts': [
                client.V1VolumeMount(name=connector_name,
                                     mount_path=GIT_SSH_MOUNT_DIR,
                                     read_only=True)
            ]
        }
        volumes = {
            'volumes': [
                client.V1Volume(name=connector_name,
                                secret=client.V1SecretVolumeSource(
                                    secret_name=secret_name,
                                    items=[
                                        client.V1KeyToPath(key=GIT_ID_RSA,
                                                           path=GIT_ID_RSA,
                                                           mode=GIT_SSH_MODE),
                                        client.V1KeyToPath(key=GIT_CONFIG,
                                                           path=GIT_CONFIG,
                                                           mode=GIT_SSH_MODE)
                                    ]))
            ]
        }

    for kubernetes_model in kubernetes_models:
        full_config_path = get_full_config_path(kubernetes_model.config_path)
        from kubernetes import config
        config.load_kube_config(full_config_path)

        pod_env = [
            client.V1EnvVar(name="REKCURD_SERVICE_UPDATE_FLAG",
                            value=commit_message),
            client.V1EnvVar(name="REKCURD_KUBERNETES_MODE", value="True"),
            client.V1EnvVar(name="REKCURD_DEBUG_MODE", value=str(debug_mode)),
            client.V1EnvVar(name="REKCURD_APPLICATION_NAME",
                            value=application_name),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_HOST",
                            value=insecure_host),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_PORT",
                            value=str(insecure_port)),
            client.V1EnvVar(name="REKCURD_SERVICE_ID", value=service_id),
            client.V1EnvVar(name="REKCURD_SERVICE_LEVEL", value=service_level),
            client.V1EnvVar(name="REKCURD_GRPC_PROTO_VERSION", value=version),
            client.V1EnvVar(name="REKCURD_MODEL_MODE",
                            value=data_server_model.data_server_mode.value),
            client.V1EnvVar(name="REKCURD_MODEL_FILE_PATH",
                            value=model_model.filepath),
            client.V1EnvVar(name="REKCURD_CEPH_ACCESS_KEY",
                            value=str(data_server_model.ceph_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_SECRET_KEY",
                            value=str(data_server_model.ceph_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_HOST",
                            value=str(data_server_model.ceph_host or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_PORT",
                            value=str(data_server_model.ceph_port or "1234")),
            client.V1EnvVar(name="REKCURD_CEPH_IS_SECURE",
                            value=str(data_server_model.ceph_is_secure
                                      or "False")),
            client.V1EnvVar(name="REKCURD_CEPH_BUCKET_NAME",
                            value=str(data_server_model.ceph_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_ACCESS_KEY",
                            value=str(data_server_model.aws_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_SECRET_KEY",
                            value=str(data_server_model.aws_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_BUCKET_NAME",
                            value=str(data_server_model.aws_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_ACCESS_KEY",
                            value=str(data_server_model.gcs_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_SECRET_KEY",
                            value=str(data_server_model.gcs_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_BUCKET_NAME",
                            value=str(data_server_model.gcs_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_URL",
                            value=service_git_url),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_BRANCH",
                            value=service_git_branch),
            client.V1EnvVar(name="REKCURD_SERVICE_BOOT_SHELL",
                            value=service_boot_script),
        ]
        """Namespace registration."""
        core_vi_api = client.CoreV1Api()
        try:
            core_vi_api.read_namespace(name=service_level)
        except:
            api.logger.info("\"{}\" namespace created".format(service_level))
            v1_namespace = client.V1Namespace(
                api_version="v1",
                kind="Namespace",
                metadata=client.V1ObjectMeta(name=service_level))
            core_vi_api.create_namespace(body=v1_namespace)
        """Create/patch Deployment."""
        v1_deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name="deploy-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1DeploymentSpec(
                min_ready_seconds=policy_wait_seconds,
                progress_deadline_seconds=progress_deadline_seconds,
                replicas=replicas_default,
                revision_history_limit=3,
                selector=client.V1LabelSelector(
                    match_labels={"sel": service_id}),
                strategy=client.V1DeploymentStrategy(
                    type="RollingUpdate",
                    rolling_update=client.V1RollingUpdateDeployment(
                        max_surge=policy_max_surge,
                        max_unavailable=policy_max_unavailable)),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        labels={
                            "rekcurd-worker": "True",
                            "id": application_id,
                            "name": application_name,
                            "sel": service_id
                        }),
                    spec=client.V1PodSpec(affinity=client.V1Affinity(
                        pod_anti_affinity=client.V1PodAntiAffinity(
                            preferred_during_scheduling_ignored_during_execution
                            =[
                                client.V1WeightedPodAffinityTerm(
                                    pod_affinity_term=client.V1PodAffinityTerm(
                                        label_selector=client.
                                        V1LabelSelector(match_expressions=[
                                            client.V1LabelSelectorRequirement(
                                                key="id",
                                                operator="In",
                                                values=[service_id])
                                        ]),
                                        topology_key="kubernetes.io/hostname"),
                                    weight=100)
                            ])),
                                          containers=[
                                              client.V1Container(
                                                  env=pod_env,
                                                  image=container_image,
                                                  image_pull_policy="Always",
                                                  name=service_id,
                                                  ports=[
                                                      client.V1ContainerPort(
                                                          container_port=
                                                          insecure_port)
                                                  ],
                                                  resources=client.
                                                  V1ResourceRequirements(
                                                      limits={
                                                          "cpu":
                                                          str(resource_limit_cpu
                                                              ),
                                                          "memory":
                                                          resource_limit_memory
                                                      },
                                                      requests={
                                                          "cpu":
                                                          str(resource_request_cpu
                                                              ),
                                                          "memory":
                                                          resource_request_memory
                                                      }),
                                                  security_context=client.
                                                  V1SecurityContext(
                                                      privileged=True),
                                                  **volume_mounts)
                                          ],
                                          node_selector={
                                              "host": service_level
                                          },
                                          **volumes))))
        apps_v1_api = client.AppsV1Api()
        if is_creation_mode:
            api.logger.info("Deployment created.")
            apps_v1_api.create_namespaced_deployment(body=v1_deployment,
                                                     namespace=service_level)
        else:
            api.logger.info("Deployment patched.")
            apps_v1_api.patch_namespaced_deployment(
                body=v1_deployment,
                name="deploy-{0}".format(service_id),
                namespace=service_level)
        """Create/patch Service."""
        v1_service = client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=client.V1ObjectMeta(name="svc-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1ServiceSpec(ports=[
                client.V1ServicePort(name="grpc-backend",
                                     port=insecure_port,
                                     protocol="TCP",
                                     target_port=insecure_port)
            ],
                                      selector={"sel": service_id}))
        core_vi_api = client.CoreV1Api()
        if is_creation_mode:
            api.logger.info("Service created.")
            core_vi_api.create_namespaced_service(namespace=service_level,
                                                  body=v1_service)
        else:
            api.logger.info("Service patched.")
            core_vi_api.patch_namespaced_service(
                namespace=service_level,
                name="svc-{0}".format(service_id),
                body=v1_service)
        """Create/patch Autoscaler."""
        v1_horizontal_pod_autoscaler = client.V1HorizontalPodAutoscaler(
            api_version="autoscaling/v1",
            kind="HorizontalPodAutoscaler",
            metadata=client.V1ObjectMeta(name="hpa-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1HorizontalPodAutoscalerSpec(
                max_replicas=replicas_maximum,
                min_replicas=replicas_minimum,
                scale_target_ref=client.V1CrossVersionObjectReference(
                    api_version="apps/v1",
                    kind="Deployment",
                    name="deploy-{0}".format(service_id)),
                target_cpu_utilization_percentage=autoscale_cpu_threshold))
        autoscaling_v1_api = client.AutoscalingV1Api()
        if is_creation_mode:
            api.logger.info("Autoscaler created.")
            autoscaling_v1_api.create_namespaced_horizontal_pod_autoscaler(
                namespace=service_level, body=v1_horizontal_pod_autoscaler)
        else:
            api.logger.info("Autoscaler patched.")
            autoscaling_v1_api.patch_namespaced_horizontal_pod_autoscaler(
                namespace=service_level,
                name="hpa-{0}".format(service_id),
                body=v1_horizontal_pod_autoscaler)
        """Create Istio ingress if this is the first application."""
        custom_object_api = client.CustomObjectsApi()
        try:
            custom_object_api.get_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                name="ing-vs-{0}".format(application_id),
            )
        except:
            ingress_virtual_service_body = {
                "apiVersion": "networking.istio.io/v1alpha3",
                "kind": "VirtualService",
                "metadata": {
                    "labels": {
                        "rekcurd-worker": "True",
                        "id": application_id,
                        "name": application_name
                    },
                    "name": "ing-vs-{0}".format(application_id),
                    "namespace": service_level
                },
                "spec": {
                    "hosts": ["*"],
                    "gateways": ["rekcurd-ingress-gateway"],
                    "http": [{
                        "match": [{
                            "headers": {
                                "x-rekcurd-application-name": {
                                    "exact": application_name
                                },
                                "x-rekcurd-sevice-level": {
                                    "exact": service_level
                                },
                                "x-rekcurd-grpc-version": {
                                    "exact": version
                                },
                            }
                        }],
                        "route": [{
                            "destination": {
                                "port": {
                                    "number": insecure_port
                                },
                                "host": "svc-{0}".format(service_id)
                            },
                            "weight": 100
                        }],
                        "retries": {
                            "attempts": 25,
                            "perTryTimeout": "1s"
                        }
                    }]
                }
            }
            api.logger.info("Istio created.")
            custom_object_api.create_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                body=ingress_virtual_service_body)
        """Add service model."""
        if is_creation_mode:
            if display_name is None:
                display_name = "{0}-{1}".format(service_level, service_id)
            service_model = ServiceModel(service_id=service_id,
                                         application_id=application_id,
                                         display_name=display_name,
                                         description=description,
                                         service_level=service_level,
                                         version=version,
                                         model_id=service_model_assignment,
                                         insecure_host=insecure_host,
                                         insecure_port=insecure_port)
            db.session.add(service_model)
            db.session.flush()
    """Finish."""
    return service_id
예제 #13
0
def get_statefulset_object(cluster_object):
    name = cluster_object['metadata']['name']
    namespace = cluster_object['metadata']['namespace']

    try:
        replicas = cluster_object['spec']['mongodb']['replicas']
    except KeyError:
        replicas = 3

    try:
        mongodb_limit_cpu = \
            cluster_object['spec']['mongodb']['mongodb_limit_cpu']
    except KeyError:
        mongodb_limit_cpu = '100m'

    try:
        mongodb_limit_memory = \
            cluster_object['spec']['mongodb']['mongodb_limit_memory']
    except KeyError:
        mongodb_limit_memory = '64Mi'

    statefulset = client.V1beta1StatefulSet()

    # Metadata
    statefulset.metadata = client.V1ObjectMeta(
        name=name, namespace=namespace, labels=get_default_labels(name=name))

    # Spec
    statefulset.spec = client.V1beta1StatefulSetSpec(replicas=replicas,
                                                     service_name=name)

    statefulset.spec.template = client.V1PodTemplateSpec()
    statefulset.spec.template.metadata = client.V1ObjectMeta(
        labels=get_default_labels(name=name))

    statefulset.spec.template.spec = client.V1PodSpec()
    statefulset.spec.template.spec.affinity = client.V1Affinity(
        pod_anti_affinity=client.V1PodAntiAffinity(
            required_during_scheduling_ignored_during_execution=[
                client.V1PodAffinityTerm(
                    topology_key='kubernetes.io/hostname',
                    label_selector=client.V1LabelSelector(match_expressions=[
                        client.V1LabelSelectorRequirement(
                            key='cluster', operator='In', values=[name])
                    ]))
            ]))
    # MongoDB container
    mongodb_port = client.V1ContainerPort(name='mongodb',
                                          container_port=27017,
                                          protocol='TCP')
    mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod')
    mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data',
                                                    read_only=False,
                                                    mount_path='/data/db')
    mongodb_resources = client.V1ResourceRequirements(limits={
        'cpu':
        mongodb_limit_cpu,
        'memory':
        mongodb_limit_memory
    },
                                                      requests={
                                                          'cpu':
                                                          mongodb_limit_cpu,
                                                          'memory':
                                                          mongodb_limit_memory
                                                      })
    mongodb_container = client.V1Container(
        name='mongod',
        command=[
            'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL',
            '--clusterAuthMode', 'x509', '--sslPEMKeyFile',
            '/etc/ssl/mongod/mongod.pem', '--sslCAFile',
            '/etc/ssl/mongod/ca.pem'
        ],
        image='mongo:3.4.1',
        ports=[mongodb_port],
        volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount],
        resources=mongodb_resources)

    # Metrics container
    metrics_port = client.V1ContainerPort(name='metrics',
                                          container_port=9001,
                                          protocol='TCP')
    metrics_resources = client.V1ResourceRequirements(limits={
        'cpu': '50m',
        'memory': '16Mi'
    },
                                                      requests={
                                                          'cpu': '50m',
                                                          'memory': '16Mi'
                                                      })
    metrics_secret_name = '{}-monitoring-credentials'.format(name)
    metrics_username_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_USERNAME',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='username')))
    metrics_password_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_PASSWORD',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='password')))
    metrics_container = client.V1Container(
        name='prometheus-exporter',
        image='quay.io/kubestack/prometheus-mongodb-exporter:latest',
        command=[
            '/bin/sh', '-c',
            '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem'
        ],  # flake8: noqa
        ports=[metrics_port],
        resources=metrics_resources,
        volume_mounts=[mongodb_tls_volumemount],
        env=[metrics_username_env_var, metrics_password_env_var])

    statefulset.spec.template.spec.containers = [
        mongodb_container, metrics_container
    ]

    ca_volume = client.V1Volume(name='mongo-ca',
                                secret=client.V1SecretVolumeSource(
                                    secret_name='{}-ca'.format(name),
                                    items=[
                                        client.V1KeyToPath(key='ca.pem',
                                                           path='ca.pem'),
                                        client.V1KeyToPath(key='ca-key.pem',
                                                           path='ca-key.pem')
                                    ]))
    tls_volume = client.V1Volume(name='mongo-tls',
                                 empty_dir=client.V1EmptyDirVolumeSource())
    data_volume = client.V1Volume(name='mongo-data',
                                  empty_dir=client.V1EmptyDirVolumeSource())
    statefulset.spec.template.spec.volumes = [
        ca_volume, tls_volume, data_volume
    ]

    # Init container
    # For now use annotation format for init_container to support K8s >= 1.5
    statefulset.spec.template.metadata.annotations = {
        'pod.beta.kubernetes.io/init-containers':
        '[{"name": "cert-init","image": "quay.io/kubestack/mongodb-init:latest","volumeMounts": [{"readOnly": true,"mountPath": "/etc/ssl/mongod-ca","name": "mongo-ca"}, {"mountPath": "/etc/ssl/mongod","name": "mongo-tls"}],"env": [{"name": "METADATA_NAME","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.name"}}}, {"name": "NAMESPACE","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.namespace"}}}],"command": ["ansible-playbook","member-cert.yml"],"imagePullPolicy": "Always"}]'
    }  # flake8: noqa

    # tls_init_ca_volumemount = client.V1VolumeMount(
    #     name='mongo-ca',
    #     read_only=True,
    #     mount_path='/etc/ssl/mongod-ca')
    # tls_init_container = client.V1Container(
    #     name="cert-init",
    #     image="quay.io/kubestack/mongodb-init:latest",
    #     volume_mounts=[tls_init_ca_volumemount, mongodb_tls_volumemount],
    #     env=[
    #         client.V1EnvVar(
    #             name='METADATA_NAME',
    #             value_from=client.V1EnvVarSource(
    #                 field_ref=client.V1ObjectFieldSelector(
    #                     api_version='v1',
    #                     field_path='metadata.name'))),
    #         client.V1EnvVar(
    #             name='NAMESPACE',
    #             value_from=client.V1EnvVarSource(
    #                 field_ref=client.V1ObjectFieldSelector(
    #                     api_version='v1',
    #                     field_path='metadata.namespace')))],
    #     command=["ansible-playbook", "member-cert.yml"])
    #
    # statefulset.spec.template.spec.init_containers = [tls_init_container]

    return statefulset
예제 #14
0
def submit_job(args, command=None):
    container_image = args.container
    container_name = args.name

    body = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=container_name))
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()

    labels = {
        'hugin-job': "1",
        'hugin-job-name': f'{container_name}'
    }
    template.template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels)
    )

    tolerations = []
    env = []
    if args.environment:
        for env_spec in args.environment:
            env_name,env_value = env_spec.split("=", 1)
            env.append(client.V1EnvVar(name=env_name, value=env_value))

    containe_args = dict(
        name=f"container-{container_name}",
        image=container_image,
        env=env,
    )

    if args.gpu:
        tolerations.append(client.V1Toleration(
        key='nvidia.com/gpu', operator='Exists', effect='NoSchedule'))
        containe_args['resources'] = client.V1ResourceRequirements(limits={"nvidia.com/gpu": 1})
    if command or args.command:
        containe_args['command'] = command if command else args.command

    container = client.V1Container(**containe_args)
    pull_secrets = []
    if args.pull_secret is not None:
        pull_secrets.append(client.V1LocalObjectReference(name=args.pull_secret))
    pod_args = dict(containers=[container],
                    restart_policy='Never',
                    image_pull_secrets=pull_secrets)


    if tolerations:
        pod_args['tolerations'] = tolerations

    if args.node_selector is not None:
        parts = args.node_selector.split("=", 1)
        if len(parts) == 2:
            affinity = client.V1Affinity(
                node_affinity=client.V1NodeAffinity(
                    required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                        node_selector_terms=[client.V1NodeSelectorTerm(
                            match_expressions=[client.V1NodeSelectorRequirement(
                                key=parts[0], operator='In', values=[parts[1]])]
                        )]
                    )
                )
            )
            pod_args['affinity'] = affinity

    template.template.spec = client.V1PodSpec(**pod_args)
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=1800, template=template.template)
    try:
        api_response = batch_v1.create_namespaced_job("default", body, pretty=True)
        #print (api_response)
    except client.exceptions.ApiException as e:
        logging.critical(f"Failed to start job: {e.reason}")
예제 #15
0
         "chart": "mlbench-2.0.0",
         "component": "worker",
         "release": os.environ.get("MLBENCH_KUBE_RELEASENAME"),
         "heritage": "Helm",
         "set": "",
     }),
 spec=client.V1PodSpec(
     service_account_name="mlbench-worker-sa",
     affinity=client.V1Affinity(
         pod_anti_affinity=client.V1PodAntiAffinity(
             required_during_scheduling_ignored_during_execution=[
                 client.V1PodAffinityTerm(
                     label_selector=client.V1LabelSelector(
                         match_expressions=[
                             client.V1LabelSelectorRequirement(
                                 key="component",
                                 operator="In",
                                 values=["worker"],
                             )
                         ]),
                     topology_key="kubernetes.io/hostname",
                 )
             ])),
     containers=[
         client.V1Container(
             name="",
             image="",
             image_pull_policy="Always",
             stdin=True,
             tty=True,
             ports=[
예제 #16
0
    def spawn(self, taskdef: TaskDefinition) -> KubernetesTask:
        try:
            self.emit_sync('prepare', taskdef=taskdef)

            volumes, mounts = create_volumes(taskdef.volumes)

            # container definition
            container = client.V1Container(
                name=taskdef.id,
                image=taskdef.image,
                env=self.create_env(taskdef),
                ports=self.create_ports(taskdef),
                image_pull_policy='Always',  # taskdef field??
                resources=client.V1ResourceRequirements(
                    requests={
                        'cpu': str(taskdef.cpu or '0'),
                        'memory': str(taskdef.memory or '0'),
                    },
                    limits={
                        'cpu': str(taskdef.cpu_limit or '0'),
                        'memory': str(taskdef.memory_limit or '0'),
                    },
                ),
                volume_mounts=mounts,
            )

            labels = {
                LABEL_TASK_ID: taskdef.id,
                LABEL_PARENT_ID: taskdef.parent,
                **taskdef.meta,
            }

            affinity = None

            if (taskdef.affinity is not None) and (taskdef.affinity != {}):
                affinity_label = {}
                if taskdef.affinity.get("label"):
                    affinity_label[taskdef.affinity["label"][
                        "key"]] = taskdef.affinity["label"]["value"]
                else:
                    affinity_label[
                        "cowait_default_affinity_key"] = "cowait_default_affinity_value"

                if taskdef.affinity["type"] == 'spread':
                    aff_def = client.V1PodAntiAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                elif taskdef.affinity["type"] == 'group':
                    aff_def = client.V1PodAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                else:
                    aff_def = None

                affinity = client.V1Affinity(
                    pod_anti_affinity=aff_def) if aff_def else None
                labels[list(affinity_label.keys())[0]] = list(
                    affinity_label.values())[0]

            pod = self.core.create_namespaced_pod(
                namespace=self.namespace,
                body=client.V1Pod(
                    metadata=client.V1ObjectMeta(
                        name=taskdef.id,
                        namespace=self.namespace,
                        labels=labels,
                    ),
                    spec=client.V1PodSpec(
                        hostname=taskdef.id,
                        restart_policy='Never',
                        image_pull_secrets=self.get_pull_secrets(),
                        volumes=volumes,
                        affinity=affinity,
                        containers=[container],
                        service_account_name=self.service_account,
                    ),
                ),
            )

            # wrap & return task
            # print('~~ created kubenetes pod', pod.metadata.name)
            task = KubernetesTask(self, taskdef, pod)
            self.emit_sync('spawn', task=task)
            return task

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
    def k8s_deployment_generator(k8s_config: K8sConfiguration):
        # add containers
        containers = []
        k8s_containers = []
        # add actuator container
        k8s_container = client.V1Container(
            name="nodemanager-actuator",
            image=k8s_config.actuator_image,
            ports=[
                client.V1ContainerPort(container_port=k8s_config.actuator_port)
            ],
            volume_mounts=[
                client.V1VolumeMount(name="docker-sock", mount_path="/var/run")
            ],
            image_pull_policy=k8s_config.k8s_image_pull_policy)
        k8s_containers.append(k8s_container)

        # add CPU containers
        base_port = 8501
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "nodemanager-rest-cpu-" + str(i + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image,
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model=model.name,
                          version=model.version,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.CPU,
                          quota=None))
            base_port += 1

        # add GPU containers
        for gpu in range(k8s_config.available_gpus):
            container_name = "nodemanager-rest-gpu-" + str(gpu + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image + "-gpu",
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ],
                env=[
                    client.V1EnvVar(name="NVIDIA_VISIBLE_DEVICES",
                                    value=str(gpu + 1))
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model="all",
                          version=1,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.GPU,
                          quota=None))
            base_port += 1

        # add volumes
        volumes = [
            client.V1Volume(
                name="docker-sock",
                host_path=client.V1HostPathVolumeSource(path="/var/run")),
            client.V1Volume(name="shared-models",
                            empty_dir=client.V1EmptyDirVolumeSource())
        ]

        # set pod affinity
        affinity = client.V1Affinity(pod_anti_affinity=client.V1PodAffinity(
            required_during_scheduling_ignored_during_execution=[
                client.V1PodAffinityTerm(topology_key="kubernetes.io/hostname")
            ]))

        # init containers
        init_containers = []
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "tfs-init-" + str(i + 1)
            init_containers.append(
                client.V1Container(
                    name=container_name,
                    image=k8s_config.tfs_init_image,
                    args=[
                        "-f", "/home/models/", "-d",
                        "/home/models/" + model.name, "-c",
                        k8s_config.tfs_config_endpoint, "-m",
                        model.tfs_model_url
                    ],
                    image_pull_policy=k8s_config.k8s_image_pull_policy,
                    volume_mounts=[
                        client.V1VolumeMount(
                            name="shared-models",
                            mount_path=k8s_config.tfs_models_path)
                    ]))

        # add pod spec
        pod_spec = client.V1PodSpec(containers=k8s_containers,
                                    volumes=volumes,
                                    affinity=affinity,
                                    init_containers=init_containers,
                                    host_network=k8s_config.k8s_host_network,
                                    dns_policy="Default")
        # add pod template spec
        pod_template_spec = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={"run": "nodemanager"}),
            spec=pod_spec)
        # add deployment spec
        deployment_spec = client.V1DeploymentSpec(
            selector=client.V1LabelSelector(
                match_labels={"run": "nodemanager"}),
            template=pod_template_spec,
            replicas=k8s_config.initial_replicas)
        # build deployment
        deployment = client.V1Deployment(api_version="apps/v1",
                                         kind="Deployment",
                                         metadata=client.V1ObjectMeta(
                                             name="nodemanager-deploy",
                                             labels={"run": "nodemanager"}),
                                         spec=deployment_spec)

        return containers, deployment
예제 #18
0
def create_run_pod(k8s_settings, run_context):
    run_id = run_context.id
    run_name = run_context.run.to_json()["name"]

    labels = {
        "run-name": run_name,
        "run": run_id,
    }
    env = get_run_pod_env_vars(run_context)
    node_topology_key = "kubernetes.io/hostname"
    # NOTE(taylor): preference to run on nodes with other runs
    pod_affinities = [
        k8s_client.V1WeightedPodAffinityTerm(
            weight=50,
            pod_affinity_term=k8s_client.V1PodAffinityTerm(
                label_selector=k8s_client.V1LabelSelector(match_labels={
                    "type": "run",
                }, ),
                topology_key=node_topology_key,
            ),
        ),
    ]
    volumes = []
    volume_mounts = []
    experiment_id = run_context.experiment
    if experiment_id:
        labels.update({"experiment": experiment_id})
        # NOTE(taylor): highest preference to run on nodes with runs in the same experiment
        pod_affinities.append(
            k8s_client.V1WeightedPodAffinityTerm(
                weight=100,
                pod_affinity_term=k8s_client.V1PodAffinityTerm(
                    label_selector=k8s_client.V1LabelSelector(match_labels={
                        "type":
                        "run",
                        "experiment":
                        experiment_id,
                    }, ),
                    topology_key=node_topology_key,
                ),
            ))

    unacceptable_node_group_types = ["system"]
    requests = k8s_settings.resources.get("requests") or {}
    limits = k8s_settings.resources.get("limits") or {}
    # NOTE(taylor): Preventing GPU-less jobs from running on GPU nodes forces the cluster autoscaler to scale up
    # CPU nodes. This prevents a situation where the GPU nodes are not scaled down because they are occupied by
    # CPU workloads. The cluster autoscaler does not know that it should create CPU nodes when the GPUs are unused.
    # TODO(taylor): This could cause unexpected behavior if the cluster has no CPU nodes. Running CPU jobs on GPU
    # nodes could also be an opportunity for more efficient resource utilization, but is avoided for now because the
    # workloads cannot be migrated onto CPU nodes by the cluster autoscaler as mentioned above.
    # NOTE(taylor): Applying a NoSchedule taint to GPU nodes is another way to achieve this behavior, but does not work as
    # well out of the box with clusters that orchestrate doesn't provision. Applying a PreferNoSchedule
    # taint to GPU nodes does not resolve the workload migration issue when there are no CPU nodes.
    if all(
            float(group.get("nvidia.com/gpu", 0)) == 0
            for group in (requests, limits)):
        unacceptable_node_group_types.append("gpu")

    node_affinity = k8s_client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=k8s_client.
        V1NodeSelector(node_selector_terms=[
            k8s_client.V1NodeSelectorTerm(match_expressions=[
                k8s_client.V1NodeSelectorRequirement(
                    key="orchestrate.sigopt.com/node-group-type",
                    operator="NotIn",
                    values=unacceptable_node_group_types,
                )
            ], )
        ], ), )
    pod_affinity = k8s_client.V1PodAffinity(
        preferred_during_scheduling_ignored_during_execution=pod_affinities, )

    pod = k8s_client.V1Pod(
        metadata=k8s_client.V1ObjectMeta(
            owner_references=k8s_settings.owner_references,
            labels={
                "type": "run",
                **labels,
            },
            name=run_name,
        ),
        spec=k8s_client.V1PodSpec(
            affinity=k8s_client.V1Affinity(
                node_affinity=node_affinity,
                pod_affinity=pod_affinity,
            ),
            containers=[
                k8s_client.V1Container(
                    name="model-runner",
                    image=k8s_settings.image,
                    resources=k8s_client.V1ResourceRequirements(
                        **k8s_settings.resources),
                    image_pull_policy="Always",
                    command=[],
                    args=k8s_settings.args,
                    env=env,
                    volume_mounts=volume_mounts,
                    tty=True,
                ),
            ],
            volumes=volumes,
            restart_policy="Never",
        ),
    )
    k8s_settings.api.create_namespaced_pod(k8s_settings.namespace, pod)
    return pod
예제 #19
0
파일: base.py 프로젝트: AlonMaor14/mlrun
 def _generate_affinity(self):
     return k8s_client.V1Affinity(
         node_affinity=k8s_client.V1NodeAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PreferredSchedulingTerm(
                     weight=1,
                     preference=k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "possible-label-value-1",
                                     "possible-label-value-2",
                                 ],
                             )
                         ]),
                 )
             ],
             required_during_scheduling_ignored_during_execution=k8s_client.
             V1NodeSelector(node_selector_terms=[
                 k8s_client.V1NodeSelectorTerm(match_expressions=[
                     k8s_client.V1NodeSelectorRequirement(
                         key="some_node_label",
                         operator="In",
                         values=[
                             "required-label-value-1",
                             "required-label-value-2",
                         ],
                     )
                 ]),
             ]),
         ),
         pod_affinity=k8s_client.V1PodAffinity(
             required_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PodAffinityTerm(
                     label_selector=k8s_client.V1LabelSelector(
                         match_labels={
                             "some-pod-label-key": "some-pod-label-value"
                         }),
                     namespaces=["namespace-a", "namespace-b"],
                     topology_key="key-1",
                 )
             ]),
         pod_anti_affinity=k8s_client.V1PodAntiAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1WeightedPodAffinityTerm(
                     weight=1,
                     pod_affinity_term=k8s_client.V1PodAffinityTerm(
                         label_selector=k8s_client.V1LabelSelector(
                             match_expressions=[
                                 k8s_client.V1LabelSelectorRequirement(
                                     key="some_pod_label",
                                     operator="NotIn",
                                     values=[
                                         "forbidden-label-value-1",
                                         "forbidden-label-value-2",
                                     ],
                                 )
                             ]),
                         namespaces=["namespace-c"],
                         topology_key="key-2",
                     ),
                 )
             ]),
     )
예제 #20
0
    def createStatefulSet(cls, cluster_object: V1MongoClusterConfiguration) -> client.V1beta1StatefulSet:
        """
        Creates a the stateful set configuration for the given cluster.
        :param cluster_object: The cluster object from the YAML file.
        :return: The stateful set object.
        """

        # Parse cluster data object.
        name = cluster_object.metadata.name
        namespace = cluster_object.metadata.namespace
        replicas = cluster_object.spec.mongodb.replicas
        storage_mount_path = cluster_object.spec.mongodb.host_path or cls.DEFAULT_STORAGE_MOUNT_PATH
        host_path = cluster_object.spec.mongodb.host_path
        cpu_limit = cluster_object.spec.mongodb.cpu_limit or cls.DEFAULT_CPU_LIMIT
        memory_limit = cluster_object.spec.mongodb.memory_limit or cls.DEFAULT_MEMORY_LIMIT
        run_as_user = cluster_object.spec.mongodb.run_as_user or cls.DEFAULT_RUN_AS_USER
        service_account = cluster_object.spec.mongodb.service_account or cls.DEFAULT_SERVICE_ACCOUNT
        wired_tiger_cache_size = cluster_object.spec.mongodb.wired_tiger_cache_size or cls.DEFAULT_CACHE_SIZE
        secret_name = cls.ADMIN_SECRET_NAME_FORMAT.format(name)

        # create container
        mongo_container = client.V1Container(
            name=name,
            env=[client.V1EnvVar(
                name="POD_IP",
                value_from=client.V1EnvVarSource(
                    field_ref = client.V1ObjectFieldSelector(
                        api_version = "v1",
                        field_path = "status.podIP"
                    )
                )
            ),
            client.V1EnvVar(
                name="MONGODB_PASSWORD",
                value_from=client.V1EnvVarSource(
                    secret_key_ref=client.V1SecretKeySelector(
                        key="database-password",
                        name=secret_name
                    )
                )
            ),
            client.V1EnvVar(
                name="MONGODB_USER",
                value_from=client.V1EnvVarSource(
                    secret_key_ref=client.V1SecretKeySelector(
                        key="database-user",
                        name=secret_name
                    )
                )
            ),
            client.V1EnvVar(
                name="MONGODB_DATABASE",
                value_from=client.V1EnvVarSource(
                    secret_key_ref=client.V1SecretKeySelector(
                        key="database-name",
                        name=secret_name
                    )
                )
            ),
            client.V1EnvVar(
                name="MONGODB_ADMIN_PASSWORD",
                value_from=client.V1EnvVarSource(
                    secret_key_ref=client.V1SecretKeySelector(
                        key="database-admin-password",
                        name=secret_name
                    )
                )
            ),
            client.V1EnvVar(
              name="WIREDTIGER_CACHE_SIZE",
              value=wired_tiger_cache_size
            ),
            client.V1EnvVar(
                name="MONGODB_REPLICA_NAME",
                value=name
            ),
            client.V1EnvVar(
                name="MONGODB_SERVICE_NAME",
                value="svc-" + name + "-internal"
            ),
            client.V1EnvVar(
                name="MONGODB_KEYFILE_VALUE",
                value="supersecretkeyfile123"
            )],
            liveness_probe=client.V1Probe(failure_threshold=3,
                                          initial_delay_seconds=30,
                                          period_seconds=30,
                                          success_threshold=1,
                                          tcp_socket=client.V1TCPSocketAction(port=cls.MONGO_PORT),
                                          timeout_seconds=1
            ),
            command=cls.MONGO_COMMAND.split(),
            image=cls.MONGO_IMAGE,
            image_pull_policy="Always",
            ports=[client.V1ContainerPort(
                name="mongodb",
                container_port=cls.MONGO_PORT,
                protocol="TCP"
            )],
            readiness_probe=client.V1Probe(_exec=client.V1ExecAction(command=["/bin/sh", "-i", "-c", "mongo 127.0.0.1:27017/$MONGODB_DATABASE -u $MONGODB_USER -p $MONGODB_PASSWORD --eval=\"quit()\""]),
                                           failure_threshold=3,
                                           initial_delay_seconds=10,
                                           period_seconds=10,
                                           success_threshold=1,
                                           timeout_seconds=1
                                           ),
            security_context=client.V1SecurityContext(
                run_as_user=int(run_as_user),
                se_linux_options=client.V1SELinuxOptions(
                    level="s0",
                    type="spc_t"
                )
            ),
            termination_message_path="/dev/termination-log",
            volume_mounts=[client.V1VolumeMount(
                name="mongo-data",
                read_only=False,
                mount_path=storage_mount_path
            )],
            resources=client.V1ResourceRequirements(
                limits={"cpu": cpu_limit, "memory": memory_limit},
                requests={"cpu": cpu_limit, "memory": memory_limit}
            )
        )

        #create affinity rules
        affinity = client.V1Affinity(
            pod_anti_affinity=client.V1PodAntiAffinity(
                required_during_scheduling_ignored_during_execution=[
                    client.V1PodAffinityTerm(label_selector=client.V1LabelSelector(
                        match_expressions=[client.V1LabelSelectorRequirement(
                            key="app",
                            operator="In",
                            values=[name]
                        )]
                    ),
                     topology_key="kubernetes.io/hostname")
                ]
            )
        )

        volumes = [client.V1Volume(
            name="mongo-data",
            host_path=client.V1HostPathVolumeSource(path=host_path)
        )]

        # Create stateful set.
        return client.V1beta1StatefulSet(
            metadata = client.V1ObjectMeta(annotations={"service.alpha.kubernetes.io/tolerate-unready-endpoints": "true"},
                                           name=name,
                                           namespace=namespace,
                                           labels=cls.createDefaultLabels(name)),
            spec = client.V1beta1StatefulSetSpec(
                replicas = replicas,
                service_name = "svc-" + name + "-internal",
                template = client.V1PodTemplateSpec(
                    metadata = client.V1ObjectMeta(labels=cls.createDefaultLabels(name)),
                    spec = client.V1PodSpec(affinity = affinity,
                                            containers=[mongo_container],
                                            node_selector={"compute":"mongodb"},
                                            service_account=service_account,
                                            #restart_policy="Never",
                                            volumes=volumes
                    )
                ),
            ),
        )
예제 #21
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/opt/logs', name='logs'))
     if self.dm_name == 'launch':
         volume_mounts.append(
             client.V1VolumeMount(mount_path='/opt/%s/conf' % self.dm_name,
                                  name=self.dm_name))
     container = client.V1Container(
         name=self.dm_name,
         image=self.image,
         ports=[
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ],
         image_pull_policy='Always',
         env=[
             client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
             client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8')
         ],
         resources=client.V1ResourceRequirements(limits=self.re_limits,
                                                 requests=self.re_requests),
         volume_mounts=volume_mounts,
         liveness_probe=client.V1Probe(
             initial_delay_seconds=30,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0]))),
         readiness_probe=client.V1Probe(
             initial_delay_seconds=30,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0]))))
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     volumes = []
     volume = client.V1Volume(
         name='logs',
         host_path=client.V1HostPathVolumeSource(path='/opt/logs'))
     volumes.append(volume)
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=[container],
             image_pull_secrets=[secrets],
             volumes=volumes,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='project',
                                     operator='In',
                                     values=['moji'])
                             ]),
                         weight=30),
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='deploy',
                                     operator='In',
                                     values=[self.dm_name])
                             ]),
                         weight=70)
                 ]))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
예제 #22
0
def add(ip, game_id, params):
    game=get_game_by_id(game_id)
    game.validate_params(params)
    uid=uuid.uuid4().hex[:12]
    name="gaas-{}".format(uid)
    labels={
        "app": "gaas",
        "game": game_id,
        "server": uid,
        "creator": ip,
    }
    metadata=client.V1ObjectMeta(
        labels=labels,
        name=name,
    )
    ip_ext=alloc_ip()
    extra_env=[client.V1EnvVar(
        name="IP_ALLOC",
        value=ip_ext
    ), client.V1EnvVar(
        name="IP_CREATOR",
        value=ip
    )]
    containers = game.make_deployment(params)
    generic_ports = []
    # TODO(bluecmd): Hack to work around that not all
    # ports are routed to the VIP by default. This allows
    # outgoing connections from inside the pod on the VIP.
    for p in range(50000, 50016):
        generic_ports.append(client.V1ServicePort(
            name="internal-tcp-" + str(p), port=p, target_port=p, protocol="TCP"))
        generic_ports.append(client.V1ServicePort(
            name="internal-udp-" + str(p), port=p, target_port=p, protocol="UDP"))
    for container in containers:
        if container.env:
            container.env.extend(extra_env)
        else:
            container.env = extra_env
        if not container.resources:
            container.resources=client.V1ResourceRequirements(
                limits={
                    "cpu": "4",
                    "memory": "32G"
                },
                requests={
                    "cpu": "2",
                    "memory": "16G"
                }
            )
    deployment=client.V1Deployment(
            spec=client.V1DeploymentSpec(
                replicas=1,
                strategy=client.AppsV1beta1DeploymentStrategy(
                    rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                        max_surge=0,
                        max_unavailable=1
                    )
                ),
                selector=client.V1LabelSelector(
                    match_labels=labels,
                ),
                template=client.V1PodTemplateSpec(
                    spec=client.V1PodSpec(
                        containers=containers,
                        termination_grace_period_seconds=0,
                        # TODO(bluecmd): Hack to work around that not all
                        # ports are routed to the VIP by default. This allows
                        # outgoing connections from inside the pod on the VIP.
                        security_context=client.V1PodSecurityContext(
                            sysctls=[client.V1Sysctl(
                                name='net.ipv4.ip_local_port_range',
                                value='50000 50015')]),
                        affinity=client.V1Affinity(
                            node_affinity=client.V1NodeAffinity(
                                required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                                    node_selector_terms=[
                                        client.V1NodeSelectorTerm(
                                            match_expressions=[
                                                client.V1NodeSelectorRequirement(
                                                    key="kubernetes.io/role",
                                                    operator="NotIn",
                                                    values=["shared"]
                                                )
                                            ]
                                        )
                                    ]
                                )
                            )
                        )
                    )
                )
            )
    )
    service=client.V1Service(
        spec=client.V1ServiceSpec(
            type="ClusterIP",
            selector=labels,
            ports=game.make_service(params) + generic_ports,
            external_i_ps=[ip_ext],
        )
    )
    deployment.metadata=metadata
    deployment.spec.template.metadata=metadata
    service.metadata=metadata
    service.metadata.annotations={"kube-router.io/service.dsr": "tunnel"}

    client.AppsV1Api().create_namespaced_deployment(
        namespace=NAMESPACE,
        body=deployment,
    )

    service_resp = client.CoreV1Api().create_namespaced_service(
        namespace=NAMESPACE,
        body=service,
    )

    return {"uid": uid, "ip": ip}
예제 #23
0
def add_servers():

    if count_servers():
        return "You can't have more than 2 servers", 403
    game_id = request.json['game_id']
    params = request.json['parms']
    u_ip = request.remote_addr

    game = get_game_by_id(game_id)
    try:
        game.validate_params(params, game)
    except Exception as e:
        return str(e), 404

    uid = uuid.uuid4().hex[:12]
    name = "gaas-{}".format(uid)
    labels = {
        "app": "gaas",
        "game": game_id,
        "server": uid,
        "creator": u_ip,
    }
    metadata = client.V1ObjectMeta(
        labels=labels,
        name=name,
    )
    ip_ext = alloc_ip()
    extra_env = [
        client.V1EnvVar(name="IP_ALLOC", value=ip_ext),
        client.V1EnvVar(name="IP_CREATOR", value=u_ip)
    ]
    containers = game.make_deployment(params)
    for container in containers:
        if container.env:
            container.env.extend(extra_env)
        else:
            container.env = extra_env
        if not container.resources:
            container.resources = client.V1ResourceRequirements(limits={
                "cpu": "2",
                "memory": "1G"
            },
                                                                requests={
                                                                    "cpu": "1",
                                                                    "memory":
                                                                    "1G"
                                                                })
    deployment = client.V1Deployment(spec=client.V1DeploymentSpec(
        replicas=1,
        strategy=client.AppsV1beta1DeploymentStrategy(
            rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                max_surge=0, max_unavailable=1)),
        selector=client.V1LabelSelector(match_labels=labels, ),
        template=client.V1PodTemplateSpec(spec=client.V1PodSpec(
            containers=containers,
            termination_grace_period_seconds=0,
            affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=client.
                V1NodeSelector(node_selector_terms=[
                    client.V1NodeSelectorTerm(match_expressions=[
                        client.V1NodeSelectorRequirement(
                            key="kubernetes.io/role",
                            operator="NotIn",
                            values=["shared"])
                    ])
                ])))))))
    service = client.V1Service(spec=client.V1ServiceSpec(
        type="ClusterIP",
        selector=labels,
        ports=game.make_service(params),
    ))
    deployment.metadata = metadata
    deployment.spec.template.metadata = metadata
    service.metadata = metadata

    client.AppsV1Api().create_namespaced_deployment(
        namespace="gaas",
        body=deployment,
    )

    service_resp = client.CoreV1Api().create_namespaced_service(
        namespace="gaas",
        body=service,
    )
    return {"uid": uid, "ip": u_ip}
예제 #24
0
def get_statefulset_object(cluster_object):
    name = cluster_object['metadata']['name']
    namespace = cluster_object['metadata']['namespace']

    try:
        replicas = cluster_object['spec']['mongodb']['replicas']
    except KeyError:
        replicas = 3

    try:
        mongodb_limit_cpu = \
            cluster_object['spec']['mongodb']['mongodb_limit_cpu']
    except KeyError:
        mongodb_limit_cpu = '100m'

    try:
        mongodb_limit_memory = \
            cluster_object['spec']['mongodb']['mongodb_limit_memory']
    except KeyError:
        mongodb_limit_memory = '64Mi'

    try:
        hard_pod_anti_affinity = \
            cluster_object['spec']['mongodb']['hard_pod_anti_affinity']
    except KeyError:
        hard_pod_anti_affinity = True

    statefulset = client.V1beta1StatefulSet()

    # Metadata
    statefulset.metadata = client.V1ObjectMeta(
        name=name, namespace=namespace, labels=get_default_labels(name=name))

    # Spec
    statefulset.spec = client.V1beta1StatefulSetSpec(
        replicas=replicas,
        service_name=name,
        template=client.V1PodTemplateSpec())

    statefulset.spec.template.metadata = client.V1ObjectMeta(
        labels=get_default_labels(name=name))

    statefulset.spec.template.spec = client.V1PodSpec(containers=[])

    pod_affinity_term = client.V1PodAffinityTerm(
        topology_key='kubernetes.io/hostname',
        label_selector=client.V1LabelSelector(match_expressions=[
            client.V1LabelSelectorRequirement(
                key='cluster', operator='In', values=[name])
        ]))

    pod_anti_affinity = client.V1PodAntiAffinity(
        required_during_scheduling_ignored_during_execution=[
            pod_affinity_term
        ])

    if not hard_pod_anti_affinity:
        pod_anti_affinity = client.V1PodAntiAffinity(
            preferred_during_scheduling_ignored_during_execution=[
                client.V1WeightedPodAffinityTerm(
                    weight=100, pod_affinity_term=pod_affinity_term)
            ])

    statefulset.spec.template.spec.affinity = client.V1Affinity(
        pod_anti_affinity=pod_anti_affinity)

    # MongoDB container
    mongodb_port = client.V1ContainerPort(name='mongodb',
                                          container_port=27017,
                                          protocol='TCP')
    mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod')
    mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data',
                                                    read_only=False,
                                                    mount_path='/data/db')
    mongodb_resources = client.V1ResourceRequirements(limits={
        'cpu':
        mongodb_limit_cpu,
        'memory':
        mongodb_limit_memory
    },
                                                      requests={
                                                          'cpu':
                                                          mongodb_limit_cpu,
                                                          'memory':
                                                          mongodb_limit_memory
                                                      })
    mongodb_container = client.V1Container(
        name='mongod',
        env=[
            client.V1EnvVar(
                name='POD_IP',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='status.podIP')))
        ],
        command=[
            'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL',
            '--clusterAuthMode', 'x509', '--sslPEMKeyFile',
            '/etc/ssl/mongod/mongod.pem', '--sslCAFile',
            '/etc/ssl/mongod/ca.pem', '--bind_ip', '127.0.0.1,$(POD_IP)'
        ],
        image='mongo:3.6.4',
        ports=[mongodb_port],
        volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount],
        resources=mongodb_resources)

    # Metrics container
    metrics_port = client.V1ContainerPort(name='metrics',
                                          container_port=9001,
                                          protocol='TCP')
    metrics_resources = client.V1ResourceRequirements(limits={
        'cpu': '50m',
        'memory': '16Mi'
    },
                                                      requests={
                                                          'cpu': '50m',
                                                          'memory': '16Mi'
                                                      })
    metrics_secret_name = '{}-monitoring-credentials'.format(name)
    metrics_username_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_USERNAME',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='username')))
    metrics_password_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_PASSWORD',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='password')))
    metrics_container = client.V1Container(
        name='prometheus-exporter',
        image='quay.io/kubestack/prometheus-mongodb-exporter:latest',
        command=[
            '/bin/sh', '-c',
            '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem'
        ],  # flake8: noqa
        ports=[metrics_port],
        resources=metrics_resources,
        volume_mounts=[mongodb_tls_volumemount],
        env=[metrics_username_env_var, metrics_password_env_var])

    statefulset.spec.template.spec.containers = [
        mongodb_container, metrics_container
    ]

    ca_volume = client.V1Volume(name='mongo-ca',
                                secret=client.V1SecretVolumeSource(
                                    secret_name='{}-ca'.format(name),
                                    items=[
                                        client.V1KeyToPath(key='ca.pem',
                                                           path='ca.pem'),
                                        client.V1KeyToPath(key='ca-key.pem',
                                                           path='ca-key.pem')
                                    ]))
    tls_volume = client.V1Volume(name='mongo-tls',
                                 empty_dir=client.V1EmptyDirVolumeSource())
    data_volume = client.V1Volume(name='mongo-data',
                                  empty_dir=client.V1EmptyDirVolumeSource())
    statefulset.spec.template.spec.volumes = [
        ca_volume, tls_volume, data_volume
    ]

    # Init container
    tls_init_ca_volumemount = client.V1VolumeMount(
        name='mongo-ca', read_only=True, mount_path='/etc/ssl/mongod-ca')
    tls_init_mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=False, mount_path='/etc/ssl/mongod')
    tls_init_container = client.V1Container(
        name="cert-init",
        image="quay.io/kubestack/mongodb-init:latest",
        volume_mounts=[
            tls_init_ca_volumemount, tls_init_mongodb_tls_volumemount
        ],
        env=[
            client.V1EnvVar(
                name='METADATA_NAME',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.name'))),
            client.V1EnvVar(
                name='NAMESPACE',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.namespace')))
        ],
        command=["ansible-playbook", "member-cert.yml"])

    statefulset.spec.template.spec.init_containers = [tls_init_container]

    return statefulset