Esempio n. 1
0
    def test_pod_spec(self):
        cluster_spec = ClusterSpec(cluster_spec_json=test_spec)
        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, "other")

        self.assertEqual(
            pod.metadata.labels["elasticdl.org/app-name"], "elasticdl"
        )
        self.assertEqual(pod.metadata.labels["elasticdl.org/site"], "hangzhou")
        self.assertEqual(
            pod.metadata.annotations["tag.elasticdl.org/optimization"],
            "enabled",
        )
        expected_tolerations = [
            client.V1Toleration(
                effect="NoSchedule",
                key="elasticdl.org/logic-pool",
                operator="Equal",
                value="ElasticDL",
            )
        ]
        self.assertEqual(pod.spec.tolerations, expected_tolerations)
        match_expressions = [
            client.V1NodeSelectorRequirement(
                key="elasticdl.org/logic-pool",
                operator="In",
                values=["ElasticDL"],
            )
        ]

        expected_affinity = client.V1Affinity(
            node_affinity=client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=(
                    client.V1NodeSelector(
                        node_selector_terms=[
                            client.V1NodeSelectorTerm(
                                match_expressions=match_expressions
                            )
                        ]
                    )
                )
            )
        )
        self.assertEqual(pod.spec.affinity, expected_affinity)

        expected_env = []
        expected_env.append(client.V1EnvVar(name="LOG_ENABLED", value="true"))
        self.assertEqual(pod.spec.containers[0].env, expected_env)

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.MASTER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Sun")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.WORKER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Earth")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.PS)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Moon")
def pv_create(core_v1_api, pv_name):
    core_v1_api = core_v1_api
    body = client.V1PersistentVolume(
        api_version="v1",
        kind="PersistentVolume",
        metadata=client.V1ObjectMeta(name=pv_name, labels={"key": "localpvs"}),
        spec=client.V1PersistentVolumeSpec(
            capacity={"storage": "0.5Gi"},
            volume_mode="Filesystem",
            access_modes=["ReadWriteOnce"],
            persistent_volume_reclaim_policy="Recycle",
            local={
                "path":
                "/home/damu/Documents/kubernet/project/CDN_project/volumes/{_name}"
                .format(_name=pv_name)
            },
            node_affinity=client.V1VolumeNodeAffinity(
                required=client.V1NodeSelector([
                    client.V1NodeSelectorTerm(match_expressions=[
                        client.V1NodeSelectorRequirement(
                            key="kubernetes.io/hostname",
                            operator="In",
                            values=["minikube"])
                    ])
                ]))))
    core_v1_api.create_persistent_volume(body=body)
Esempio n. 3
0
def template(context):
    labels = {"app": context["name"]}

    template_spec = client.V1PodSpec(containers=[
        client.V1Container(name=context["name"], image=context["image"])
    ])

    if "nodeSelector" in context:
        template_spec.node_selector = client.V1NodeSelector(
            node_selector_terms=context["nodeSelector"])

    return client.V1Deployment(
        api_version="extensions/v1beta1",
        kind="Deployment",
        metadata=client.V1ObjectMeta(name=context["name"]),
        spec=client.V1DeploymentSpec(
            replicas=context["replicas"],
            selector=client.V1LabelSelector(match_labels=labels),
            template=client.V1PodTemplateSpec(
                metadata=client.V1ObjectMeta(labels=labels),
                spec=template_spec),
        ),
    )
Esempio n. 4
0
def clean_pod_template(pod_template, match_node_purpose="prefer", pod_type="worker"):
    """ Normalize pod template and check for type errors """
    if isinstance(pod_template, str):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a yaml filename then use "
            "KubeCluster.from_yaml"
        )
        raise TypeError(msg % pod_template)

    if isinstance(pod_template, dict):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a dictionary specification then use "
            "KubeCluster.from_dict"
        )
        raise TypeError(msg % str(pod_template))

    pod_template = copy.deepcopy(pod_template)

    # Make sure metadata / labels / env objects exist, so they can be modified
    # later without a lot of `is None` checks
    if pod_template.metadata is None:
        pod_template.metadata = client.V1ObjectMeta()
    if pod_template.metadata.labels is None:
        pod_template.metadata.labels = {}

    if pod_template.spec.containers[0].env is None:
        pod_template.spec.containers[0].env = []

    # add default tolerations
    tolerations = [
        client.V1Toleration(
            key="k8s.dask.org/dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
        # GKE currently does not permit creating taints on a node pool
        # with a `/` in the key field
        client.V1Toleration(
            key="k8s.dask.org_dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
    ]

    if pod_template.spec.tolerations is None:
        pod_template.spec.tolerations = tolerations
    else:
        pod_template.spec.tolerations.extend(tolerations)

    # add default node affinity to k8s.dask.org/node-purpose=worker
    if match_node_purpose != "ignore":
        # for readability
        affinity = pod_template.spec.affinity

        if affinity is None:
            affinity = client.V1Affinity()
        if affinity.node_affinity is None:
            affinity.node_affinity = client.V1NodeAffinity()

        # a common object for both a preferred and a required node affinity
        node_selector_term = client.V1NodeSelectorTerm(
            match_expressions=[
                client.V1NodeSelectorRequirement(
                    key="k8s.dask.org/node-purpose", operator="In", values=[pod_type]
                )
            ]
        )

        if match_node_purpose == "require":
            if (
                affinity.node_affinity.required_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.required_during_scheduling_ignored_during_execution = client.V1NodeSelector(
                    node_selector_terms=[]
                )
            affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.append(
                node_selector_term
            )
        elif match_node_purpose == "prefer":
            if (
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution = (
                    []
                )
            preferred_scheduling_terms = [
                client.V1PreferredSchedulingTerm(
                    preference=node_selector_term, weight=100
                )
            ]
            affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.extend(
                preferred_scheduling_terms
            )
        else:
            raise ValueError(
                'Attribute must be one of "ignore", "prefer", or "require".'
            )
        pod_template.spec.affinity = affinity

    return pod_template
Esempio n. 5
0
 def _generate_affinity(self):
     return k8s_client.V1Affinity(
         node_affinity=k8s_client.V1NodeAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PreferredSchedulingTerm(
                     weight=1,
                     preference=k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "possible-label-value-1",
                                     "possible-label-value-2",
                                 ],
                             )
                         ]
                     ),
                 )
             ],
             required_during_scheduling_ignored_during_execution=k8s_client.V1NodeSelector(
                 node_selector_terms=[
                     k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "required-label-value-1",
                                     "required-label-value-2",
                                 ],
                             )
                         ]
                     ),
                 ]
             ),
         ),
         pod_affinity=k8s_client.V1PodAffinity(
             required_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PodAffinityTerm(
                     label_selector=k8s_client.V1LabelSelector(
                         match_labels={"some-pod-label-key": "some-pod-label-value"}
                     ),
                     namespaces=["namespace-a", "namespace-b"],
                     topology_key="key-1",
                 )
             ]
         ),
         pod_anti_affinity=k8s_client.V1PodAntiAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1WeightedPodAffinityTerm(
                     weight=1,
                     pod_affinity_term=k8s_client.V1PodAffinityTerm(
                         label_selector=k8s_client.V1LabelSelector(
                             match_expressions=[
                                 k8s_client.V1LabelSelectorRequirement(
                                     key="some_pod_label",
                                     operator="NotIn",
                                     values=[
                                         "forbidden-label-value-1",
                                         "forbidden-label-value-2",
                                     ],
                                 )
                             ]
                         ),
                         namespaces=["namespace-c"],
                         topology_key="key-2",
                     ),
                 )
             ]
         ),
     )
Esempio n. 6
0
def submit_job(args, command=None):
    container_image = args.container
    container_name = args.name

    body = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=container_name))
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()

    labels = {
        'hugin-job': "1",
        'hugin-job-name': f'{container_name}'
    }
    template.template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels)
    )

    tolerations = []
    env = []
    if args.environment:
        for env_spec in args.environment:
            env_name,env_value = env_spec.split("=", 1)
            env.append(client.V1EnvVar(name=env_name, value=env_value))

    containe_args = dict(
        name=f"container-{container_name}",
        image=container_image,
        env=env,
    )

    if args.gpu:
        tolerations.append(client.V1Toleration(
        key='nvidia.com/gpu', operator='Exists', effect='NoSchedule'))
        containe_args['resources'] = client.V1ResourceRequirements(limits={"nvidia.com/gpu": 1})
    if command or args.command:
        containe_args['command'] = command if command else args.command

    container = client.V1Container(**containe_args)
    pull_secrets = []
    if args.pull_secret is not None:
        pull_secrets.append(client.V1LocalObjectReference(name=args.pull_secret))
    pod_args = dict(containers=[container],
                    restart_policy='Never',
                    image_pull_secrets=pull_secrets)


    if tolerations:
        pod_args['tolerations'] = tolerations

    if args.node_selector is not None:
        parts = args.node_selector.split("=", 1)
        if len(parts) == 2:
            affinity = client.V1Affinity(
                node_affinity=client.V1NodeAffinity(
                    required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                        node_selector_terms=[client.V1NodeSelectorTerm(
                            match_expressions=[client.V1NodeSelectorRequirement(
                                key=parts[0], operator='In', values=[parts[1]])]
                        )]
                    )
                )
            )
            pod_args['affinity'] = affinity

    template.template.spec = client.V1PodSpec(**pod_args)
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=1800, template=template.template)
    try:
        api_response = batch_v1.create_namespaced_job("default", body, pretty=True)
        #print (api_response)
    except client.exceptions.ApiException as e:
        logging.critical(f"Failed to start job: {e.reason}")
Esempio n. 7
0
def add(ip, game_id, params):
    game=get_game_by_id(game_id)
    game.validate_params(params)
    uid=uuid.uuid4().hex[:12]
    name="gaas-{}".format(uid)
    labels={
        "app": "gaas",
        "game": game_id,
        "server": uid,
        "creator": ip,
    }
    metadata=client.V1ObjectMeta(
        labels=labels,
        name=name,
    )
    ip_ext=alloc_ip()
    extra_env=[client.V1EnvVar(
        name="IP_ALLOC",
        value=ip_ext
    ), client.V1EnvVar(
        name="IP_CREATOR",
        value=ip
    )]
    containers = game.make_deployment(params)
    generic_ports = []
    # TODO(bluecmd): Hack to work around that not all
    # ports are routed to the VIP by default. This allows
    # outgoing connections from inside the pod on the VIP.
    for p in range(50000, 50016):
        generic_ports.append(client.V1ServicePort(
            name="internal-tcp-" + str(p), port=p, target_port=p, protocol="TCP"))
        generic_ports.append(client.V1ServicePort(
            name="internal-udp-" + str(p), port=p, target_port=p, protocol="UDP"))
    for container in containers:
        if container.env:
            container.env.extend(extra_env)
        else:
            container.env = extra_env
        if not container.resources:
            container.resources=client.V1ResourceRequirements(
                limits={
                    "cpu": "4",
                    "memory": "32G"
                },
                requests={
                    "cpu": "2",
                    "memory": "16G"
                }
            )
    deployment=client.V1Deployment(
            spec=client.V1DeploymentSpec(
                replicas=1,
                strategy=client.AppsV1beta1DeploymentStrategy(
                    rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                        max_surge=0,
                        max_unavailable=1
                    )
                ),
                selector=client.V1LabelSelector(
                    match_labels=labels,
                ),
                template=client.V1PodTemplateSpec(
                    spec=client.V1PodSpec(
                        containers=containers,
                        termination_grace_period_seconds=0,
                        # TODO(bluecmd): Hack to work around that not all
                        # ports are routed to the VIP by default. This allows
                        # outgoing connections from inside the pod on the VIP.
                        security_context=client.V1PodSecurityContext(
                            sysctls=[client.V1Sysctl(
                                name='net.ipv4.ip_local_port_range',
                                value='50000 50015')]),
                        affinity=client.V1Affinity(
                            node_affinity=client.V1NodeAffinity(
                                required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                                    node_selector_terms=[
                                        client.V1NodeSelectorTerm(
                                            match_expressions=[
                                                client.V1NodeSelectorRequirement(
                                                    key="kubernetes.io/role",
                                                    operator="NotIn",
                                                    values=["shared"]
                                                )
                                            ]
                                        )
                                    ]
                                )
                            )
                        )
                    )
                )
            )
    )
    service=client.V1Service(
        spec=client.V1ServiceSpec(
            type="ClusterIP",
            selector=labels,
            ports=game.make_service(params) + generic_ports,
            external_i_ps=[ip_ext],
        )
    )
    deployment.metadata=metadata
    deployment.spec.template.metadata=metadata
    service.metadata=metadata
    service.metadata.annotations={"kube-router.io/service.dsr": "tunnel"}

    client.AppsV1Api().create_namespaced_deployment(
        namespace=NAMESPACE,
        body=deployment,
    )

    service_resp = client.CoreV1Api().create_namespaced_service(
        namespace=NAMESPACE,
        body=service,
    )

    return {"uid": uid, "ip": ip}
Esempio n. 8
0
    def from_runs(cls, id: str, runs: List[Run]):
        k8s_name = 'tensorboard-' + id
        run_names_hash = K8STensorboardInstance.generate_run_names_hash(runs)

        volume_mounts = []

        for run in runs:
            mount = k8s.V1VolumeMount(
                name=cls.EXPERIMENTS_OUTPUT_VOLUME_NAME,
                mount_path=os.path.join(
                    cls.TENSORBOARD_CONTAINER_MOUNT_PATH_PREFIX, run.owner,
                    run.name),
                sub_path=os.path.join(run.owner, run.name))
            volume_mounts.append(mount)

        deployment_labels = {
            'name': k8s_name,
            'type': 'nauta-tensorboard',
            'nauta_app_name': 'tensorboard',
            'id': id,
            'runs-hash': run_names_hash
        }

        tensorboard_command = [
            "tensorboard", "--logdir",
            cls.TENSORBOARD_CONTAINER_MOUNT_PATH_PREFIX, "--port", "6006",
            "--host", "127.0.0.1"
        ]

        nauta_config = NautaPlatformConfig.incluster_init()

        tensorboard_image = nauta_config.get_tensorboard_image()
        tensorboard_proxy_image = nauta_config.get_activity_proxy_image()

        deployment = k8s.V1Deployment(
            api_version='apps/v1',
            kind='Deployment',
            metadata=k8s.V1ObjectMeta(name=k8s_name, labels=deployment_labels),
            spec=k8s.V1DeploymentSpec(
                replicas=1,
                selector=k8s.V1LabelSelector(match_labels=deployment_labels),
                template=k8s.V1PodTemplateSpec(
                    metadata=k8s.V1ObjectMeta(labels=deployment_labels),
                    spec=k8s.V1PodSpec(
                        tolerations=[
                            k8s.V1Toleration(key='master',
                                             operator='Exists',
                                             effect='NoSchedule')
                        ],
                        affinity=k8s.
                        V1Affinity(node_affinity=k8s.V1NodeAffinity(
                            required_during_scheduling_ignored_during_execution
                            =k8s.V1NodeSelector(node_selector_terms=[
                                k8s.V1NodeSelectorTerm(match_expressions=[
                                    k8s.V1NodeSelectorRequirement(
                                        key="master",
                                        operator="In",
                                        values=["True"])
                                ])
                            ]))),
                        containers=[
                            k8s.V1Container(name='app',
                                            image=tensorboard_image,
                                            command=tensorboard_command,
                                            volume_mounts=volume_mounts),
                            k8s.V1Container(
                                name='proxy',
                                image=tensorboard_proxy_image,
                                ports=[k8s.V1ContainerPort(container_port=80)],
                                readiness_probe=k8s.V1Probe(
                                    period_seconds=5,
                                    http_get=k8s.V1HTTPGetAction(
                                        path='/healthz', port=80)))
                        ],
                        volumes=[
                            k8s.V1Volume(
                                name=cls.EXPERIMENTS_OUTPUT_VOLUME_NAME,
                                persistent_volume_claim=  # noqa
                                k8s.V1PersistentVolumeClaimVolumeSource(
                                    claim_name=cls.
                                    EXPERIMENTS_OUTPUT_VOLUME_NAME,
                                    read_only=True))
                        ]))))

        service = k8s.V1Service(
            api_version='v1',
            kind='Service',
            metadata=k8s.V1ObjectMeta(name=k8s_name,
                                      labels={
                                          'name': k8s_name,
                                          'type': 'nauta-tensorboard',
                                          'nauta_app_name': 'tensorboard',
                                          'id': id
                                      }),
            spec=k8s.V1ServiceSpec(
                type='ClusterIP',
                ports=[k8s.V1ServicePort(name='web', port=80, target_port=80)],
                selector={
                    'name': k8s_name,
                    'type': 'nauta-tensorboard',
                    'nauta_app_name': 'tensorboard',
                    'id': id
                }))

        ingress = k8s.V1beta1Ingress(
            api_version='extensions/v1beta1',
            kind='Ingress',
            metadata=k8s.V1ObjectMeta(
                name=k8s_name,
                labels={
                    'name': k8s_name,
                    'type': 'nauta-tensorboard',
                    'nauta_app_name': 'tensorboard',
                    'id': id
                },
                annotations={
                    'nauta.ingress.kubernetes.io/rewrite-target': '/',
                    'kubernetes.io/ingress.class': 'nauta-ingress'
                }),
            spec=k8s.V1beta1IngressSpec(rules=[
                k8s.V1beta1IngressRule(
                    host='localhost',
                    http=k8s.V1beta1HTTPIngressRuleValue(paths=[
                        k8s.V1beta1HTTPIngressPath(
                            path='/tb/' + id + "/",
                            backend=k8s.V1beta1IngressBackend(
                                service_name=k8s_name, service_port=80))
                    ]))
            ]))

        return cls(deployment=deployment, service=service, ingress=ingress)