Beispiel #1
0
def create_affinity(affinity):
    affinities = []
    if affinity is None:
        return None
    elif isinstance(affinity, str):
        affinities = [{'mode': affinity}]
    elif isinstance(affinity, dict):
        affinities = [affinity]
    elif isinstance(affinity, list):
        pass
    else:
        raise ValueError('Illegal affinity definition')

    # fill with defaults
    affinities = [parse_affinity_item(item) for item in affinities]

    # sort into required/preferred, affinity/anti-affinity
    stack_req, stack_pref = [], []
    spread_req, spread_pref = [], []
    for item in affinities:
        term = create_affinity_term(item)
        if item['mode'] == 'stack':
            if item['required']:
                stack_req.append(term)
            else:
                stack_pref.append(term)
        elif item['mode'] == 'spread':
            if item['required']:
                spread_req.append(term)
            else:
                spread_pref.append(term)

    return client.V1Affinity(
        pod_affinity=client.V1PodAffinity(
            required_during_scheduling_ignored_during_execution=stack_req,
            preferred_during_scheduling_ignored_during_execution=stack_pref,
        ) if len(stack_req) + len(stack_pref) > 0 else None,
        pod_anti_affinity=client.V1PodAntiAffinity(
            required_during_scheduling_ignored_during_execution=spread_req,
            preferred_during_scheduling_ignored_during_execution=spread_pref,
        ) if len(spread_req) + len(spread_pref) > 0 else None,
    )
Beispiel #2
0
 def _generate_affinity(self):
     return k8s_client.V1Affinity(
         node_affinity=k8s_client.V1NodeAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PreferredSchedulingTerm(
                     weight=1,
                     preference=k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "possible-label-value-1",
                                     "possible-label-value-2",
                                 ],
                             )
                         ]),
                 )
             ],
             required_during_scheduling_ignored_during_execution=k8s_client.
             V1NodeSelector(node_selector_terms=[
                 k8s_client.V1NodeSelectorTerm(match_expressions=[
                     k8s_client.V1NodeSelectorRequirement(
                         key="some_node_label",
                         operator="In",
                         values=[
                             "required-label-value-1",
                             "required-label-value-2",
                         ],
                     )
                 ]),
             ]),
         ),
         pod_affinity=k8s_client.V1PodAffinity(
             required_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PodAffinityTerm(
                     label_selector=k8s_client.V1LabelSelector(
                         match_labels={
                             "some-pod-label-key": "some-pod-label-value"
                         }),
                     namespaces=["namespace-a", "namespace-b"],
                     topology_key="key-1",
                 )
             ]),
         pod_anti_affinity=k8s_client.V1PodAntiAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1WeightedPodAffinityTerm(
                     weight=1,
                     pod_affinity_term=k8s_client.V1PodAffinityTerm(
                         label_selector=k8s_client.V1LabelSelector(
                             match_expressions=[
                                 k8s_client.V1LabelSelectorRequirement(
                                     key="some_pod_label",
                                     operator="NotIn",
                                     values=[
                                         "forbidden-label-value-1",
                                         "forbidden-label-value-2",
                                     ],
                                 )
                             ]),
                         namespaces=["namespace-c"],
                         topology_key="key-2",
                     ),
                 )
             ]),
     )
    def k8s_deployment_generator(k8s_config: K8sConfiguration):
        # add containers
        containers = []
        k8s_containers = []
        # add actuator container
        k8s_container = client.V1Container(
            name="nodemanager-actuator",
            image=k8s_config.actuator_image,
            ports=[
                client.V1ContainerPort(container_port=k8s_config.actuator_port)
            ],
            volume_mounts=[
                client.V1VolumeMount(name="docker-sock", mount_path="/var/run")
            ],
            image_pull_policy=k8s_config.k8s_image_pull_policy)
        k8s_containers.append(k8s_container)

        # add CPU containers
        base_port = 8501
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "nodemanager-rest-cpu-" + str(i + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image,
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model=model.name,
                          version=model.version,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.CPU,
                          quota=None))
            base_port += 1

        # add GPU containers
        for gpu in range(k8s_config.available_gpus):
            container_name = "nodemanager-rest-gpu-" + str(gpu + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image + "-gpu",
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ],
                env=[
                    client.V1EnvVar(name="NVIDIA_VISIBLE_DEVICES",
                                    value=str(gpu + 1))
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model="all",
                          version=1,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.GPU,
                          quota=None))
            base_port += 1

        # add volumes
        volumes = [
            client.V1Volume(
                name="docker-sock",
                host_path=client.V1HostPathVolumeSource(path="/var/run")),
            client.V1Volume(name="shared-models",
                            empty_dir=client.V1EmptyDirVolumeSource())
        ]

        # set pod affinity
        affinity = client.V1Affinity(pod_anti_affinity=client.V1PodAffinity(
            required_during_scheduling_ignored_during_execution=[
                client.V1PodAffinityTerm(topology_key="kubernetes.io/hostname")
            ]))

        # init containers
        init_containers = []
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "tfs-init-" + str(i + 1)
            init_containers.append(
                client.V1Container(
                    name=container_name,
                    image=k8s_config.tfs_init_image,
                    args=[
                        "-f", "/home/models/", "-d",
                        "/home/models/" + model.name, "-c",
                        k8s_config.tfs_config_endpoint, "-m",
                        model.tfs_model_url
                    ],
                    image_pull_policy=k8s_config.k8s_image_pull_policy,
                    volume_mounts=[
                        client.V1VolumeMount(
                            name="shared-models",
                            mount_path=k8s_config.tfs_models_path)
                    ]))

        # add pod spec
        pod_spec = client.V1PodSpec(containers=k8s_containers,
                                    volumes=volumes,
                                    affinity=affinity,
                                    init_containers=init_containers,
                                    host_network=k8s_config.k8s_host_network,
                                    dns_policy="Default")
        # add pod template spec
        pod_template_spec = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={"run": "nodemanager"}),
            spec=pod_spec)
        # add deployment spec
        deployment_spec = client.V1DeploymentSpec(
            selector=client.V1LabelSelector(
                match_labels={"run": "nodemanager"}),
            template=pod_template_spec,
            replicas=k8s_config.initial_replicas)
        # build deployment
        deployment = client.V1Deployment(api_version="apps/v1",
                                         kind="Deployment",
                                         metadata=client.V1ObjectMeta(
                                             name="nodemanager-deploy",
                                             labels={"run": "nodemanager"}),
                                         spec=deployment_spec)

        return containers, deployment
Beispiel #4
0
    def spawn(self, taskdef: TaskDefinition) -> KubernetesTask:
        try:
            self.emit_sync('prepare', taskdef=taskdef)

            volumes, mounts = create_volumes(taskdef.volumes)

            # container definition
            container = client.V1Container(
                name=taskdef.id,
                image=taskdef.image,
                env=self.create_env(taskdef),
                ports=self.create_ports(taskdef),
                image_pull_policy='Always',  # taskdef field??
                resources=client.V1ResourceRequirements(
                    requests={
                        'cpu': str(taskdef.cpu or '0'),
                        'memory': str(taskdef.memory or '0'),
                    },
                    limits={
                        'cpu': str(taskdef.cpu_limit or '0'),
                        'memory': str(taskdef.memory_limit or '0'),
                    },
                ),
                volume_mounts=mounts,
            )

            labels = {
                LABEL_TASK_ID: taskdef.id,
                LABEL_PARENT_ID: taskdef.parent,
                **taskdef.meta,
            }

            affinity = None

            if (taskdef.affinity is not None) and (taskdef.affinity != {}):
                affinity_label = {}
                if taskdef.affinity.get("label"):
                    affinity_label[taskdef.affinity["label"][
                        "key"]] = taskdef.affinity["label"]["value"]
                else:
                    affinity_label[
                        "cowait_default_affinity_key"] = "cowait_default_affinity_value"

                if taskdef.affinity["type"] == 'spread':
                    aff_def = client.V1PodAntiAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                elif taskdef.affinity["type"] == 'group':
                    aff_def = client.V1PodAffinity(
                        preferred_during_scheduling_ignored_during_execution=[
                            client.V1WeightedPodAffinityTerm(
                                pod_affinity_term=client.V1PodAffinityTerm(
                                    label_selector=client.
                                    V1LabelSelector(match_expressions=[
                                        client.V1LabelSelectorRequirement(
                                            key=list(affinity_label.keys())[0],
                                            operator="In",
                                            values=[
                                                list(affinity_label.values())
                                                [0]
                                            ],
                                        )
                                    ]),
                                    topology_key="kubernetes.io/hostname",
                                ),
                                weight=50)
                        ])

                else:
                    aff_def = None

                affinity = client.V1Affinity(
                    pod_anti_affinity=aff_def) if aff_def else None
                labels[list(affinity_label.keys())[0]] = list(
                    affinity_label.values())[0]

            pod = self.core.create_namespaced_pod(
                namespace=self.namespace,
                body=client.V1Pod(
                    metadata=client.V1ObjectMeta(
                        name=taskdef.id,
                        namespace=self.namespace,
                        labels=labels,
                    ),
                    spec=client.V1PodSpec(
                        hostname=taskdef.id,
                        restart_policy='Never',
                        image_pull_secrets=self.get_pull_secrets(),
                        volumes=volumes,
                        affinity=affinity,
                        containers=[container],
                        service_account_name=self.service_account,
                    ),
                ),
            )

            # wrap & return task
            # print('~~ created kubenetes pod', pod.metadata.name)
            task = KubernetesTask(self, taskdef, pod)
            self.emit_sync('spawn', task=task)
            return task

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Beispiel #5
0
def create_run_pod(k8s_settings, run_context):
    run_id = run_context.id
    run_name = run_context.run.to_json()["name"]

    labels = {
        "run-name": run_name,
        "run": run_id,
    }
    env = get_run_pod_env_vars(run_context)
    node_topology_key = "kubernetes.io/hostname"
    # NOTE(taylor): preference to run on nodes with other runs
    pod_affinities = [
        k8s_client.V1WeightedPodAffinityTerm(
            weight=50,
            pod_affinity_term=k8s_client.V1PodAffinityTerm(
                label_selector=k8s_client.V1LabelSelector(match_labels={
                    "type": "run",
                }, ),
                topology_key=node_topology_key,
            ),
        ),
    ]
    volumes = []
    volume_mounts = []
    experiment_id = run_context.experiment
    if experiment_id:
        labels.update({"experiment": experiment_id})
        # NOTE(taylor): highest preference to run on nodes with runs in the same experiment
        pod_affinities.append(
            k8s_client.V1WeightedPodAffinityTerm(
                weight=100,
                pod_affinity_term=k8s_client.V1PodAffinityTerm(
                    label_selector=k8s_client.V1LabelSelector(match_labels={
                        "type":
                        "run",
                        "experiment":
                        experiment_id,
                    }, ),
                    topology_key=node_topology_key,
                ),
            ))

    unacceptable_node_group_types = ["system"]
    requests = k8s_settings.resources.get("requests") or {}
    limits = k8s_settings.resources.get("limits") or {}
    # NOTE(taylor): Preventing GPU-less jobs from running on GPU nodes forces the cluster autoscaler to scale up
    # CPU nodes. This prevents a situation where the GPU nodes are not scaled down because they are occupied by
    # CPU workloads. The cluster autoscaler does not know that it should create CPU nodes when the GPUs are unused.
    # TODO(taylor): This could cause unexpected behavior if the cluster has no CPU nodes. Running CPU jobs on GPU
    # nodes could also be an opportunity for more efficient resource utilization, but is avoided for now because the
    # workloads cannot be migrated onto CPU nodes by the cluster autoscaler as mentioned above.
    # NOTE(taylor): Applying a NoSchedule taint to GPU nodes is another way to achieve this behavior, but does not work as
    # well out of the box with clusters that orchestrate doesn't provision. Applying a PreferNoSchedule
    # taint to GPU nodes does not resolve the workload migration issue when there are no CPU nodes.
    if all(
            float(group.get("nvidia.com/gpu", 0)) == 0
            for group in (requests, limits)):
        unacceptable_node_group_types.append("gpu")

    node_affinity = k8s_client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=k8s_client.
        V1NodeSelector(node_selector_terms=[
            k8s_client.V1NodeSelectorTerm(match_expressions=[
                k8s_client.V1NodeSelectorRequirement(
                    key="orchestrate.sigopt.com/node-group-type",
                    operator="NotIn",
                    values=unacceptable_node_group_types,
                )
            ], )
        ], ), )
    pod_affinity = k8s_client.V1PodAffinity(
        preferred_during_scheduling_ignored_during_execution=pod_affinities, )

    pod = k8s_client.V1Pod(
        metadata=k8s_client.V1ObjectMeta(
            owner_references=k8s_settings.owner_references,
            labels={
                "type": "run",
                **labels,
            },
            name=run_name,
        ),
        spec=k8s_client.V1PodSpec(
            affinity=k8s_client.V1Affinity(
                node_affinity=node_affinity,
                pod_affinity=pod_affinity,
            ),
            containers=[
                k8s_client.V1Container(
                    name="model-runner",
                    image=k8s_settings.image,
                    resources=k8s_client.V1ResourceRequirements(
                        **k8s_settings.resources),
                    image_pull_policy="Always",
                    command=[],
                    args=k8s_settings.args,
                    env=env,
                    volume_mounts=volume_mounts,
                    tty=True,
                ),
            ],
            volumes=volumes,
            restart_policy="Never",
        ),
    )
    k8s_settings.api.create_namespaced_pod(k8s_settings.namespace, pod)
    return pod