Example #1
0
 def _get_pod_template(self, template):
     if template:
         metadata = self._get_metadata(template.get('metadata'))
         spec = self._get_pod_spec(spec=template.get('spec', {}))
         return client.V1PodTemplateSpec(metadata=metadata, spec=spec)
Example #2
0
def create_cron_job(name,
                    configmap_name,
                    init_container_name,
                    init_container_image,
                    init_container_command,
                    container_name,
                    container_image,
                    container_command,
                    schedule,
                    namespace="default",
                    env_vars={}):
    try:
        # Body是对象体
        body = client.V1beta1CronJob(api_version="batch/v1beta1",
                                     kind="CronJob")
        # 对象需要 Metadata,每个JOB必须有一个不同的名称!
        body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
        # 添加 Status
        body.status = client.V1beta1CronJobStatus()

        template = client.V1PodTemplateSpec()

        # 在Env中传递Arguments:
        env_list = []
        for env_name, env_value in env_vars.items():
            env_list.append(client.V1EnvVar(name=env_name, value=env_value))

        container = client.V1Container(command=container_command,
                                       env=env_list,
                                       image=container_image,
                                       image_pull_policy="IfNotPresent",
                                       name=container_name)

        volume_mount = client.V1VolumeMount(name="share-volume",
                                            mount_path=mount_path)
        container.volume_mounts = [volume_mount]
        container.args = [mount_path + '']

        init_container = client.V1Container(command=init_container_command,
                                            image=init_container_image,
                                            image_pull_policy="IfNotPresent",
                                            name=init_container_name)

        init_volume_mount = client.V1VolumeMount(name="config-volume",
                                                 mount_path=init_mount_path)
        init_container.volume_mounts = [volume_mount, init_volume_mount]

        share_volume = client.V1Volume(name="share-volume", empty_dir={})

        config_map = client.V1ConfigMapVolumeSource(name=configmap_name)
        config_map_volume = client.V1Volume(name="config-volume",
                                            config_map=config_map)

        template.spec = client.V1PodSpec(
            active_deadline_seconds=600,
            containers=[container],
            restart_policy='OnFailure',
            volumes=[config_map_volume, share_volume],
            init_containers=[init_container])

        job_template = client.V1beta1JobTemplateSpec()
        job_template.spec = client.V1JobSpec(template=template)

        body.spec = client.V1beta1CronJobSpec(starting_deadline_seconds=600,
                                              job_template=job_template,
                                              schedule=schedule)

        # To make an asynchronous HTTP request
        thread = batch_v1_beta1_api.create_namespaced_cron_job(namespace,
                                                               body,
                                                               async_req=True,
                                                               pretty=True)
        result = thread.get()

        return True, result

    except Exception as ex:
        print(ex)
        return False, ""
Example #3
0
    def build(self):
        logging.info("Building image using cluster builder.")
        install_reqs_before_copy = self.preprocessor.is_update_file_present()
        if self.dockerfile_path:
            dockerfile_path = self.dockerfile_path
        else:
            dockerfile_path = dockerfile.write_dockerfile(
                path_prefix=self.preprocessor.path_prefix,
                base_image=self.base_image,
                install_reqs_files=self.preprocessor.input_files,
                install_reqs_before_copy=install_reqs_before_copy)

        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_path, context_hash = self.preprocessor.context_tar_gz()
        if self.version:
            self.image_tag = self.full_image_name(self.version)
        else:
            self.image_tag = self.full_image_name(context_hash)
        self.context_source.prepare(context_path)
        labels = {'extender-builder': 'kaniko'}
        labels['extender-build-id'] = str(uuid.uuid1())
        pod_spec = self.context_source.generate_pod_spec(
            self.image_tag, self.push)
        for fn in self.pod_spec_mutators:
            fn(self.manager, pod_spec, self.namespace)

        pod_spec_template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                generate_name="extender-builder-",
                labels=labels,
                namespace=self.namespace,
                annotations={"sidecar.istio.io/inject": "false"},
            ),
            spec=pod_spec)
        job_spec = client.V1JobSpec(
            template=pod_spec_template,
            parallelism=1,
            completions=1,
            backoff_limit=0,
        )
        build_job = client.V1Job(api_version="batch/v1",
                                 kind="Job",
                                 metadata=client.V1ObjectMeta(
                                     generate_name="extender-builder-",
                                     labels=labels,
                                 ),
                                 spec=job_spec)
        created_job = client. \
            BatchV1Api(). \
            create_namespaced_job(self.namespace, build_job)

        self.manager.log(name=created_job.metadata.name,
                         namespace=created_job.metadata.namespace,
                         selectors=labels,
                         container="kaniko")

        # Invoke upstream clean ups
        self.context_source.cleanup()
        # Cleanup build_job if requested by user
        # Otherwise build_job will be cleaned up by Kubernetes GC
        if self.cleanup:
            logging.warning("Cleaning up job {}...".format(
                created_job.metadata.name))
            client. \
                BatchV1Api(). \
                delete_namespaced_job(
                    created_job.metadata.name,
                    created_job.metadata.namespace,
                    body=client.V1DeleteOptions(propagation_policy='Foreground')
                )
Example #4
0
    def create_deployment_object(self):
        filebeat_container = client.V1Container(
            name='filebeat',
            image='harbor.uletm.com/public/filebeat:6.3.2',
            image_pull_policy='Always',
            volume_mounts=[{
                "name": "filebeat-config",
                "mountPath": "/usr/share/filebeat/filebeat.yml",
                "subPath": "filebeat.yml"
            }, {
                "name": "app-logs",
                "mountPath": "/data/logs/tomcat"
            }],
            resources={
                "limits": {
                    "cpu": "100m",
                    "memory": "200Mi"
                },
                "requests": {
                    "cpu": "100m",
                    "memory": "100Mi"
                }
            })
        app_container = client.V1Container(
            env=self.env,
            name=self.container_name,
            image=self.image,
            image_pull_policy='Always',
            volume_mounts=[{
                "name": "app-logs",
                "mountPath": "/data/logs/tomcat"
            }],
            liveness_probe={
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 2,
                "successThreshold": 1,
                "tcpSocket": {
                    "port": self.container_port
                },
                "timeoutSeconds": 2
            },
            ports=[
                client.V1ContainerPort(container_port=self.container_port,
                                       name='8080tcp00',
                                       protocol='TCP')
            ],
            readiness_probe={
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 2,
                "successThreshold": 2,
                "tcpSocket": {
                    "port": self.container_port
                },
                "timeoutSeconds": 2
            },
            resources={
                "limits": self.limits,
                "requests": self.limits
            },
            security_context={
                "allowPrivilegeEscalation": False,
                "capabilities": {},
                "privileged": False,
                "procMount": "Default",
                "readOnlyRootFilesystem": False,
                "runAsNonRoot": False
            },
            stdin=True,
            termination_message_path='/dev/termination-log',
            termination_message_policy='File',
            tty=True)

        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels=self.pod_labels),
            spec=client.V1PodSpec(
                affinity={
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [{
                                "matchExpressions": [{
                                    "key":
                                    "node-type",
                                    "operator":
                                    "In",
                                    "values": [self.server_type]
                                }]
                            }]
                        }
                    }
                },
                containers=[app_container, filebeat_container],
                dns_policy="ClusterFirst",
                image_pull_secrets=self.image_pull_secrets,
                priority=0,
                restart_policy="Always",
                scheduler_name="default-scheduler",
                security_context={},
                service_account="default",
                service_account_name="default",
                termination_grace_period_seconds=30,
                tolerations=[{
                    "effect": "NoExecute",
                    "key": "node.kubernetes.io/not-ready",
                    "operator": "Exists",
                    "tolerationSeconds": 300
                }, {
                    "effect": "NoExecute",
                    "key": "node.kubernetes.io/unreachable",
                    "operator": "Exists",
                    "tolerationSeconds": 300
                }],
                volumes=self.volumes))

        spec = client.V1DeploymentSpec(
            progress_deadline_seconds=600,
            replicas=self.pod_num,
            revision_history_limit=10,
            selector={
                "matchLabels": {
                    "workload.user.cattle.io/workloadselector":
                    self.workload_selector
                }
            },
            strategy={
                "rollingUpdate": {
                    "maxSurge": 1,
                    "maxUnavailable": 0
                },
                "type": "RollingUpdate"
            },
            template=template)

        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(
                namespace=self.namespace,
                name=self.deployment_name,
                generation=10,
                labels={
                    "cattle.io/creator":
                    "norman",
                    "workload.user.cattle.io/workloadselector":
                    self.workload_selector
                }),
            spec=spec)

        return deployment
Example #5
0
def test_large_k8s_cluster_cronjob_prop():
    """
    Creates 5000 jobs and 5000 pods. Each job has an owner
    reference of a cronjob. Asserts that pods acquire the
    cronjob property from the jobs owner reference.
    """
    job_count = 5000
    pods_per_job = 1
    with fake_k8s_api_server(print_logs=False) as [
            fake_k8s_client, k8s_envvars
    ]:
        v1_client = client.CoreV1Api(fake_k8s_client)
        batchv1_client = client.BatchV1Api(fake_k8s_client)
        jobs = {}
        for i in range(0, job_count):
            cj_name = f"cj-{i}"
            cj_uid = f"cjuid{i}"
            job_name = cj_name + "-job"
            job_uid = cj_uid + "-job"
            jobs[job_uid] = {
                "cj_name": cj_name,
                "cj_uid": cj_uid,
                "job_name": job_name,
                "job_uid": job_uid,
                "pod_uids": [],
                "pod_names": [],
            }

            batchv1_client.create_namespaced_job(
                body={
                    "apiVersion":
                    "batch/v1",
                    "kind":
                    "Job",
                    "metadata": {
                        "name":
                        job_name,
                        "uid":
                        job_uid,
                        "namespace":
                        "default",
                        "ownerReferences": [{
                            "kind": "CronJob",
                            "name": cj_name,
                            "uid": cj_uid
                        }],
                    },
                    "spec":
                    client.V1JobSpec(completions=1,
                                     parallelism=1,
                                     template=client.V1PodTemplateSpec()),
                    "status": {},
                },
                namespace="default",
            )

            for j in range(0, pods_per_job):
                pod_name = f"pod-{job_name}-{j}"
                pod_uid = f"abcdef{i}-{j}"
                jobs[job_uid]["pod_uids"].append(pod_uid)
                jobs[job_uid]["pod_names"].append(pod_name)
                v1_client.create_namespaced_pod(
                    body={
                        "apiVersion": "v1",
                        "kind": "Pod",
                        "metadata": {
                            "name":
                            pod_name,
                            "uid":
                            pod_uid,
                            "namespace":
                            "default",
                            "labels": {
                                "app": "my-app"
                            },
                            "ownerReferences": [{
                                "kind": "Job",
                                "name": job_name,
                                "uid": job_uid
                            }],
                        },
                        "spec": {},
                    },
                    namespace="default",
                )
        with Agent.run(
                dedent(f"""
          writer:
            maxRequests: 100
            propertiesMaxRequests: 100
            propertiesHistorySize: 10000
          monitors:
           - type: kubernetes-cluster
             alwaysClusterReporter: true
             intervalSeconds: 10
             kubernetesAPI:
                skipVerify: true
                authType: none
        """),
                profiling=True,
                debug=False,
                extra_env=k8s_envvars,
        ) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"kubernetes_pod_name": "pod-cj-0-job-0"}))
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"kubernetes_pod_name": "pod-cj-4999-job-0"}))

            ## get heap usage with 5k pods
            heap_profile_baseline = agent.pprof_client.get_heap_profile()

            def has_all_cronjob_props():
                for _, job in jobs.items():
                    for pod_uid in job["pod_uids"]:
                        if not has_dim_prop(
                                agent.fake_services,
                                dim_name="kubernetes_pod_uid",
                                dim_value=pod_uid,
                                prop_name="cronJob",
                                prop_value=job["cj_name"],
                        ):
                            return False
                        if not has_dim_prop(
                                agent.fake_services,
                                dim_name="kubernetes_pod_uid",
                                dim_value=pod_uid,
                                prop_name="cronJob_uid",
                                prop_value=job["cj_uid"],
                        ):
                            return False
                    return True

            def has_all_job_props():
                for _, job in jobs.items():
                    for pod_uid in job["pod_uids"]:
                        if not has_dim_prop(
                                agent.fake_services,
                                dim_name="kubernetes_pod_uid",
                                dim_value=pod_uid,
                                prop_name="job",
                                prop_value=job["job_name"],
                        ):
                            return False
                        if not has_dim_prop(
                                agent.fake_services,
                                dim_name="kubernetes_pod_uid",
                                dim_value=pod_uid,
                                prop_name="job_uid",
                                prop_value=job["job_uid"],
                        ):
                            return False
                    return True

            assert wait_for(has_all_job_props,
                            interval_seconds=2,
                            timeout_seconds=60)
            assert wait_for(has_all_cronjob_props,
                            interval_seconds=2,
                            timeout_seconds=60)

            for _, job in jobs.items():
                batchv1_client.delete_namespaced_job(name=job["job_name"],
                                                     namespace="default",
                                                     body={})
                for pod_name in job["pod_names"]:
                    v1_client.delete_namespaced_pod(name=pod_name,
                                                    namespace="default",
                                                    body={})

            agent.pprof_client.assert_goroutine_count_under(200)
            agent.pprof_client.assert_heap_alloc_under(
                heap_profile_baseline.total * 1.2)
Example #6
0
    def _create_deployment_object(self):
        """Creates the deployment object for the grader service using environment variables

        Returns:
          V1Deployment: a valid kubernetes deployment object
        """
        # Configureate Pod template container
        # Volumes to mount as subPaths of PV
        sub_path_grader_home = str(self.course_dir.parent).strip('/')
        sub_path_exchange = str(
            self.exchange_dir.relative_to(EXCHANGE_MNT_ROOT))
        # define the container to launch
        container = client.V1Container(
            name='grader-notebook',
            image=GRADER_IMAGE_NAME,
            command=[
                'start-notebook.sh', f'--group=formgrade-{self.course_id}'
            ],
            ports=[client.V1ContainerPort(container_port=8888)],
            working_dir=f'/home/{self.grader_name}',
            resources=client.V1ResourceRequirements(requests={
                "cpu": "100m",
                "memory": "200Mi"
            },
                                                    limits={
                                                        "cpu": "500m",
                                                        "memory": "1G"
                                                    }),
            security_context=client.V1SecurityContext(
                allow_privilege_escalation=False),
            env=[
                client.V1EnvVar(name='JUPYTERHUB_SERVICE_NAME',
                                value=self.course_id),
                client.V1EnvVar(name='JUPYTERHUB_API_TOKEN',
                                value=self.grader_token),
                # we're using the K8s Service name 'hub' (defined in the jhub helm chart)
                # to connect from our grader-notebooks
                client.V1EnvVar(name='JUPYTERHUB_API_URL',
                                value='http://hub:8081/hub/api'),
                client.V1EnvVar(name='JUPYTERHUB_BASE_URL', value='/'),
                client.V1EnvVar(name='JUPYTERHUB_SERVICE_PREFIX',
                                value=f'/services/{self.course_id}/'),
                client.V1EnvVar(name='JUPYTERHUB_CLIENT_ID',
                                value=f'service-{self.course_id}'),
                client.V1EnvVar(name='JUPYTERHUB_USER',
                                value=self.grader_name),
                client.V1EnvVar(name='NB_UID', value=str(NB_UID)),
                client.V1EnvVar(name='NB_GID', value=str(NB_GID)),
                client.V1EnvVar(name='NB_USER', value=self.grader_name),
            ],
            volume_mounts=[
                client.V1VolumeMount(mount_path=f'/home/{self.grader_name}',
                                     name=GRADER_PVC,
                                     sub_path=sub_path_grader_home),
                client.V1VolumeMount(mount_path='/srv/nbgrader/exchange',
                                     name=GRADER_EXCHANGE_SHARED_PVC,
                                     sub_path=sub_path_exchange),
            ],
        )
        # Create and configurate a spec section
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={
                'component': self.grader_name,
                'app': 'illumidesk'
            }),
            spec=client.V1PodSpec(
                containers=[container],
                security_context=client.V1PodSecurityContext(run_as_user=0),
                volumes=[
                    client.V1Volume(
                        name=GRADER_PVC,
                        persistent_volume_claim=client.
                        V1PersistentVolumeClaimVolumeSource(
                            claim_name=GRADER_PVC),
                    ),
                    client.V1Volume(
                        name=GRADER_EXCHANGE_SHARED_PVC,
                        persistent_volume_claim=client.
                        V1PersistentVolumeClaimVolumeSource(
                            claim_name=GRADER_EXCHANGE_SHARED_PVC),
                    ),
                ],
            ),
        )
        # Create the specification of deployment
        spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector={'matchLabels': {
                'component': self.grader_name
            }})
        # Instantiate the deployment object
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=self.grader_name),
            spec=spec)

        return deployment
Example #7
0
    def __init__(self,
                 name,
                 master,
                 containers,
                 labels={},
                 replicas=1,
                 api_version='extensions/v1beta1',
                 cluster_name=None,
                 namespace='default',
                 generate_name=None,
                 min_ready_seconds=0,
                 progress_deadline_seconds=600,
                 deployment_strategy_type='RollingUpdate',
                 dns_policy='ClusterFirstWithHostNet',
                 deployment_strategy_rolling_update=None,
                 selectors=None,
                 deployment_object=None,
                 volumes=[]):
        """
        Create a new deployment object in the specified cluster with specified label.

        @param name,,str name of the deployment.
        @param master,, KubernetesMaster the master object that has all the clients and the config to connect to.
        @param containers,, list(V1Container) this can be produces using the self.define_container method on mater.
        @param labels,,list({string:string}) labels to apply to the pod
        @param replicas,, number of replicas to maintain until the end of lifetime of deployment.
        @param cluster_name,,str cluster to create the pod on
        @param namespace,, str namespace to relate the pod to
        @param dns_policy,,str set DNS policy for containers within the pod. One of 'ClusterFirstWithHostNet', 'ClusterFirst' or 'Default'. Defaults to "ClusterFirst". To have DNS options set along with hostNetwork, you have to specify DNS policy explicitly to 'ClusterFirstWithHostNet'.
        @param selector,,{string:string}  is a selector which must be true for the deployment to fit on a node or cluster.
        @param generate_name,, str the generated name for deployment.
        @param progress_deadline_seconds,, int The maximum time in seconds for a deployment to make progress before it is considered to be failed. The deployment controller will continue to process failed deployments and a condition with a ProgressDeadlineExceeded reason will be surfaced in the deployment status. Note that progress will not be estimated during the time a deployment is paused. Defaults to 600s.
        @param deployment_strategy_type,, str Type of deployment. Can be "Recreate" or "RollingUpdate". Default is RollingUpdate.
        @param deployment_strategy_rolling_update,, {maxSurge:int, maxUnavailable: int}
        @param min_ready_seconds,, int Minimum number of seconds for which a newly created pod should be ready without any of its container crashing, for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready)
        @param volumes,, list(V1Volume) can be created from the define_?_volume methods
        """
        JSBASE.__init__(self)
        self.object = deployment_object
        if not deployment_object:
            kind = 'Deployment'
            labels.update({'app': name})
            # Create and configure pod spec section
            pod_spec = client.V1PodTemplateSpec(
                metadata=client.V1ObjectMeta(labels=labels),
                spec=client.V1PodSpec(containers=containers,
                                      dns_policy=dns_policy,
                                      volumes=volumes))

            # create deployment_strategy
            deployment_strategy = client.AppsV1beta1DeploymentStrategy(
                rolling_update=deployment_strategy_rolling_update,
                type=deployment_strategy_type)

            # Create the specification of deployment
            selector = None
            if selectors:
                selector = client.V1LabelSelector([], selectors)

            deployment_spec = client.ExtensionsV1beta1DeploymentSpec(
                replicas=replicas,
                template=pod_spec,
                progress_deadline_seconds=progress_deadline_seconds,
                min_ready_seconds=min_ready_seconds,
                strategy=deployment_strategy,
                selector=selector)
            # Instantiate the deployment object
            self.object = client.ExtensionsV1beta1Deployment(
                api_version=api_version,
                kind=kind,
                spec=deployment_spec,
                metadata=client.V1ObjectMeta(name=name,
                                             cluster_name=cluster_name,
                                             namespace=namespace,
                                             generate_name=generate_name))
        self.master = master
Example #8
0
def serving(args, version):
    model_name_version = args.model_name + "_" + version
    config.load_incluster_config()

    k8s_apps_v1 = client.AppsV1Api()
    template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels={
            "app": "torchserve",
            "app.kubernetes.io/version": version
        }),
        spec=client.V1PodSpec(
            volumes=[
                client.V1Volume(name="persistent-storage",
                                persistent_volume_claim=client.
                                V1PersistentVolumeClaimVolumeSource(
                                    claim_name="serving-model-pvc"))
            ],
            containers=[
                client.V1Container(
                    name="torchserve",
                    image="pytorch/torchserve:0.3.0-cpu",
                    args=[
                        "torchserve", "--start", "--model-store",
                        "/home/model-server/shared/model-store/",
                        "--ts-config",
                        "/home/model-server/shared/config/config.properties"
                    ],
                    image_pull_policy="Always",
                    ports=[
                        client.V1ContainerPort(name="ts",
                                               container_port=args.pred_port),
                        client.V1ContainerPort(
                            name="ts-management",
                            container_port=args.manage_port),
                        client.V1ContainerPort(name="ts-metrics",
                                               container_port=args.metric_port)
                    ],
                    volume_mounts=[
                        client.V1VolumeMount(
                            name="persistent-storage",
                            mount_path="/home/model-server/shared/")
                    ],
                    resources=client.V1ResourceRequirements(limits={
                        "cpu": 1,
                        "memory": "4Gi",
                        "nvidia.com/gpu": 0
                    },
                                                            requests={
                                                                "cpu": 1,
                                                                "memory":
                                                                "1Gi",
                                                            }))
            ]))
    deployment = client.V1Deployment(
        api_version="apps/v1",
        kind="Deployment",
        metadata=client.V1ObjectMeta(name="torchserve",
                                     labels={
                                         "app": "torchserve",
                                         "app.kubernetes.io/version": version
                                     }),
        spec=client.V1DeploymentSpec(
            replicas=2,
            selector=client.V1LabelSelector(
                match_labels={"app": "torchserve"}),
            strategy=client.V1DeploymentStrategy(
                type="RollingUpdate",
                rolling_update=client.V1RollingUpdateDeployment(
                    max_surge=1,
                    max_unavailable=1,
                )),
            template=template))

    k8s_core_v1 = client.CoreV1Api()
    service = client.V1Service(
        api_version="v1",
        kind="Service",
        metadata=client.V1ObjectMeta(name="torchserve",
                                     labels={"app": "torchserve"}),
        spec=client.V1ServiceSpec(
            type="LoadBalancer",
            selector={"app": "torchserve"},
            ports=[
                client.V1ServicePort(name="preds",
                                     port=args.pred_port,
                                     target_port="ts"),
                client.V1ServicePort(name="mdl",
                                     port=args.manage_port,
                                     target_port="ts-management"),
                client.V1ServicePort(name="metrics",
                                     port=args.metric_port,
                                     target_port="ts-metrics")
            ]))

    try:
        k8s_apps_v1.create_namespaced_deployment(body=deployment,
                                                 namespace=args.namespace)
        print("[+] Deployment created")
    except:
        k8s_apps_v1.replace_namespaced_deployment(name="torchserve",
                                                  namespace=args.namespace,
                                                  body=deployment)
        print("[+] Deployment replaced")

    try:
        k8s_core_v1.create_namespaced_service(body=service,
                                              namespace=args.namespace)
        print("[+] Service created")
    except:
        print("[+] Service already created")

    send_manage("Serving the Model!!!",
                "Served Model using torchserve in k8s!!!")
Example #9
0
def create_job(batch_api_instance, core_api_instance, id, USER, PY_FILE, PWD,
               settings):
    """
    Input: Needs an instance of the BatchV1Api and the CoreV1Api
           id           - id of task
           USER         - User/Owner of the task (so the executable files can be reached)
           PY_FILE, PWD - the ENV variables to be set for Jupyter to work as intended
           settings = (CPU_SHARE, MEM_SHARE, _) - how much CPU and RAM to use at most
    -----
    Create a Job from a Notebook-Container and add it to the cluster.
    The job is similarly structured like this YAML:
    ----------
    labels: id:<id>
    containers:
      - name: notebook-site
      - image: notebookserver:1.0
      - env:
          - name: PY_FILE
            value: <PY_FILE>
          - name: JUPYTER_PWD
            value: <PWD>
      - resources:
          limits:
            cpu: <CPU_SHARE>
            memory: <MEM_SHARE>
      - volumeMounts:
        - mountPath: "/data"
          subPath: "data"
          name: vol
        - mountPath: "/scripts"
          subPath: "internal/<USER>/<id>"
          name: vol
    volumes:
      - name: vol
      - persistentVolumeClaim:
        - claimName: <VOLUME_NAME>
    ----------
    """
    JOB_NAME = "notebook-%02d" % id
    VOLUME_NAME = "hostclaim"
    CPU_SHARE, MEM_SHARE, _ = settings

    # The place to mount the datasets
    data_mount = client.V1VolumeMount(mount_path="/data",
                                      sub_path="data",
                                      name="vol")
    # The place to mount the scripts
    script_mount = client.V1VolumeMount(mount_path="/scripts",
                                        sub_path="internal/%s/%d" % (USER, id),
                                        name="vol")
    # volume for datasets/scripts
    volume = client.V1Volume(
        name="vol",
        persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
            claim_name=VOLUME_NAME))
    # env-Variables
    file_env = client.V1EnvVar(name='PY_FILE', value=PY_FILE)
    pwd_env = client.V1EnvVar(name='JUPYTER_PWD', value=PWD)
    # Resources
    resources = client.V1ResourceRequirements(limits={
        "cpu": CPU_SHARE,
        "memory": MEM_SHARE
    },
                                              requests={
                                                  "cpu": "0",
                                                  "memory": "0"
                                              })
    # Container
    container = client.V1Container(name="notebook-site",
                                   image="notebookserver:1.0",
                                   env=[file_env, pwd_env],
                                   resources=resources,
                                   volume_mounts=[data_mount, script_mount])
    # Pod-Spec
    template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels={"id": str(id)}),
        spec=client.V1PodSpec(restart_policy="Never",
                              volumes=[volume],
                              containers=[container]))
    # Job-Spec
    spec = client.V1JobSpec(template=template, backoff_limit=4)
    # Job-Object
    job = client.V1Job(api_version="batch/v1",
                       kind="Job",
                       metadata=client.V1ObjectMeta(name=JOB_NAME),
                       spec=spec)

    # Add Job to Cluster
    try:
        api_response = batch_api_instance.create_namespaced_job(
            body=job, namespace="default")
        logging.info("Job created. status='%s'" % str(api_response.status))
    except ApiException as e:
        logging.warning(
            "Exception when calling CoreV1Api->create_namespaced_job: %s\n" %
            e)

    # Create the service so the notebook becomes accessible
    create_service(core_api_instance, id)
Example #10
0
def submit_job(args, command=None):
    container_image = args.container
    container_name = args.name

    body = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=container_name))
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()

    labels = {
        'hugin-job': "1",
        'hugin-job-name': f'{container_name}'
    }
    template.template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels)
    )

    tolerations = []
    env = []
    if args.environment:
        for env_spec in args.environment:
            env_name,env_value = env_spec.split("=", 1)
            env.append(client.V1EnvVar(name=env_name, value=env_value))

    containe_args = dict(
        name=f"container-{container_name}",
        image=container_image,
        env=env,
    )

    if args.gpu:
        tolerations.append(client.V1Toleration(
        key='nvidia.com/gpu', operator='Exists', effect='NoSchedule'))
        containe_args['resources'] = client.V1ResourceRequirements(limits={"nvidia.com/gpu": 1})
    if command or args.command:
        containe_args['command'] = command if command else args.command

    container = client.V1Container(**containe_args)
    pull_secrets = []
    if args.pull_secret is not None:
        pull_secrets.append(client.V1LocalObjectReference(name=args.pull_secret))
    pod_args = dict(containers=[container],
                    restart_policy='Never',
                    image_pull_secrets=pull_secrets)


    if tolerations:
        pod_args['tolerations'] = tolerations

    if args.node_selector is not None:
        parts = args.node_selector.split("=", 1)
        if len(parts) == 2:
            affinity = client.V1Affinity(
                node_affinity=client.V1NodeAffinity(
                    required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                        node_selector_terms=[client.V1NodeSelectorTerm(
                            match_expressions=[client.V1NodeSelectorRequirement(
                                key=parts[0], operator='In', values=[parts[1]])]
                        )]
                    )
                )
            )
            pod_args['affinity'] = affinity

    template.template.spec = client.V1PodSpec(**pod_args)
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=1800, template=template.template)
    try:
        api_response = batch_v1.create_namespaced_job("default", body, pretty=True)
        #print (api_response)
    except client.exceptions.ApiException as e:
        logging.critical(f"Failed to start job: {e.reason}")
Example #11
0
    def _create_deployment_object(self):
        """Creates the deployment object for the grader service using environment variables

        Returns:
          V1Deployment: a valid kubernetes deployment object
        """
        # Configureate Pod template container
        # Volumes to mount as subPaths of PV
        sub_path_grader_home = str(self.course_dir.parent).strip("/")
        sub_path_exchange = str(self.exchange_dir.relative_to(EXCHANGE_MNT_ROOT))
        # define the container to launch
        container = client.V1Container(
            name="grader-notebook",
            image=GRADER_IMAGE_NAME,
            image_pull_policy=GRADER_IMAGE_PULL_POLICY,
            command=["start-notebook.sh", f"--group=formgrade-{self.course_id}"],
            ports=[client.V1ContainerPort(container_port=8888)],
            working_dir=f"/home/{self.grader_name}",
            resources=client.V1ResourceRequirements(
                requests={
                    "cpu": GRADER_REQUESTS_CPU,
                    "memory": GRADER_REQUESTS_MEM,
                },
                limits={
                    "cpu": GRADER_LIMITS_CPU,
                    "memory": GRADER_LIMITS_MEM,
                },
            ),
            security_context=client.V1SecurityContext(allow_privilege_escalation=False),
            env=[
                client.V1EnvVar(name="JUPYTERHUB_SERVICE_NAME", value=self.course_id),
                client.V1EnvVar(
                    name="JUPYTERHUB_SERVICE_URL",
                    value=f"http://{self.course_id}.{NAMESPACE}.svc.cluster.local:8888",
                ),
                client.V1EnvVar(name="JUPYTERHUB_API_TOKEN", value=self.grader_token),
                # we're using the K8s Service name 'hub' (defined in the jhub helm chart)
                # to connect from our grader-notebooks
                client.V1EnvVar(name="JUPYTERHUB_API_URL", value=JUPYTERHUB_API_URL),
                client.V1EnvVar(name="JUPYTERHUB_BASE_URL", value=JUPYTERHUB_BASE_URL),
                client.V1EnvVar(
                    name="JUPYTERHUB_SERVICE_PREFIX",
                    value=f"/services/{self.course_id}/",
                ),
                client.V1EnvVar(
                    name="JUPYTERHUB_CLIENT_ID", value=f"service-{self.course_id}"
                ),
                client.V1EnvVar(name="JUPYTERHUB_USER", value=self.grader_name),
                client.V1EnvVar(name="NAMESPACE", value=str(NAMESPACE)),
                client.V1EnvVar(name="NB_UID", value=str(NB_UID)),
                client.V1EnvVar(name="NB_GID", value=str(NB_GID)),
                client.V1EnvVar(name="NB_USER", value=self.grader_name),
            ],
            volume_mounts=[
                client.V1VolumeMount(
                    mount_path=f"/home/{self.grader_name}",
                    name=GRADER_PVC,
                    sub_path=sub_path_grader_home,
                ),
                client.V1VolumeMount(
                    mount_path="/srv/nbgrader/exchange",
                    name=GRADER_EXCHANGE_SHARED_PVC,
                    sub_path=sub_path_exchange,
                ),
            ],
        )
        # Create and configure a spec section
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                labels={"component": self.grader_name, "app": "illumidesk"}
            ),
            spec=client.V1PodSpec(
                containers=[container],
                security_context=client.V1PodSecurityContext(run_as_user=0),
                volumes=[
                    client.V1Volume(
                        name=GRADER_PVC,
                        persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                            claim_name=GRADER_PVC
                        ),
                    ),
                    client.V1Volume(
                        name=GRADER_EXCHANGE_SHARED_PVC,
                        persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                            claim_name=GRADER_EXCHANGE_SHARED_PVC
                        ),
                    ),
                ],
            ),
        )
        # Create the specification of deployment
        spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector={"matchLabels": {"component": self.grader_name}},
        )
        # Instantiate the deployment object
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=self.grader_name),
            spec=spec,
        )

        return deployment
def configure_workflow_job(
    namespace: str,
    project_name: str,
    project_repo_url: str,
    project_repo_branch: str = "master",
    retries: int = 2,
    image: str = BODYWORK_DOCKER_IMAGE,
) -> k8s.V1Job:
    """Configure a Bodywork workflow execution job.

    :param namespace: The namespace to deploy the job to.
    :param project_name: The name of the Bodywork project that the stage
        belongs to.
    :param project_repo_url: The URL for the Bodywork project Git
        repository.
    :param project_repo_branch: The Bodywork project Git repository
        branch to use, defaults to 'master'.
    :param retries: Number of times to retry running the stage to
        completion (if necessary), defaults to 2.
    :param image: Docker image to use for running the stage within,
        defaults to BODYWORK_DOCKER_IMAGE.
    :return: A configured k8s job object.
    """
    vcs_env_vars = [
        k8s.V1EnvVar(
            name=SSH_PRIVATE_KEY_ENV_VAR,
            value_from=k8s.V1EnvVarSource(
                secret_key_ref=k8s.V1SecretKeySelector(
                    key=SSH_PRIVATE_KEY_ENV_VAR,
                    name=SSH_SECRET_NAME,
                    optional=True)),
        )
    ]
    container = k8s.V1Container(
        name="bodywork",
        image=image,
        image_pull_policy="Always",
        env=vcs_env_vars,
        command=["bodywork", "workflow"],
        args=[
            f"--namespace={namespace}", project_repo_url, project_repo_branch
        ],
    )
    pod_spec = k8s.V1PodSpec(
        service_account_name=BODYWORK_WORKFLOW_SERVICE_ACCOUNT,
        containers=[container],
        restart_policy="Never",
    )
    pod_template_spec = k8s.V1PodTemplateSpec(spec=pod_spec)
    job_spec = k8s.V1JobSpec(
        template=pod_template_spec,
        completions=1,
        backoff_limit=retries,
        ttl_seconds_after_finished=BODYWORK_WORKFLOW_JOB_TIME_TO_LIVE,
    )
    job = k8s.V1Job(
        metadata=k8s.V1ObjectMeta(
            name=make_valid_k8s_name(project_name),
            namespace=namespace,
            labels={"app": "bodywork"},
        ),
        spec=job_spec,
    )
    return job
Example #13
0
def create_job_object(data):
    meta = client.V1ObjectMeta(name=data["name"], namespace=data["namespace"])

    labels = None
    if "labels" in data:
        labels_array = data["labels"].split(',')
        labels = dict(s.split('=') for s in labels_array)
        meta.labels = labels

    annotations = None
    if "annotations" in data:
        annotations_array = data["annotations"].split(',')
        annotations = dict(s.split('=') for s in annotations_array)
        meta.annotations = annotations

    envs = []
    if "environments" in data:
        envs_array = data["environments"].splitlines()
        tmp_envs = dict(s.split('=', 1) for s in envs_array)
        for key in tmp_envs:
            envs.append(client.V1EnvVar(name=key, value=tmp_envs[key]))

    if "environments_secrets" in data:
        envs_array = data["environments_secrets"].splitlines()
        tmp_envs = dict(s.split('=', 1) for s in envs_array)

        for key in tmp_envs:

            if (":" in tmp_envs[key]):
                # passing secret env
                value = tmp_envs[key]
                secrets = value.split(':')
                secrect_key = secrets[1]
                secrect_name = secrets[0]

                envs.append(
                    client.V1EnvVar(
                        name=key,
                        value="",
                        value_from=client.V1EnvVarSource(
                            secret_key_ref=client.V1SecretKeySelector(
                                key=secrect_key, name=secrect_name))))

    container = client.V1Container(name=data["container_name"],
                                   image=data["container_image"],
                                   image_pull_policy=data["image_pull_policy"])

    if "container_command" in data:
        container.command = data["container_command"].split(' ')

    if "container_args" in data:
        args_array = data["container_args"].splitlines()
        container.args = args_array

    if "resources_requests" in data:
        resources_array = data["resources_requests"].split(",")
        tmp = dict(s.split('=', 1) for s in resources_array)
        container.resources = client.V1ResourceRequirements(requests=tmp)

    if "volume_mounts" in data:
        volumes_array = data["volume_mounts"].splitlines()
        tmp = dict(s.split('=', 1) for s in volumes_array)

        mounts = []
        for key in tmp:
            mounts.append(client.V1VolumeMount(name=key, mount_path=tmp[key]))

        container.volume_mounts = mounts

    container.env = envs

    template_spec = client.V1PodSpec(containers=[container],
                                     restart_policy=data["job_restart_policy"])

    if "volumes" in data:
        volumes_data = yaml.full_load(data["volumes"])
        volumes = []

        if (isinstance(volumes_data, list)):
            for volume_data in volumes_data:
                volume = common.create_volume(volume_data)

                if volume:
                    volumes.append(volume)
        else:
            volume = common.create_volume(volumes_data)

            if volume:
                volumes.append(volume)

        template_spec.volumes = volumes

    if "image_pull_secrets" in data:
        images_array = data["image_pull_secrets"].split(",")
        images = []
        for image in images_array:
            images.append(client.V1LocalObjectReference(name=image))

        template_spec.image_pull_secrets = images

    if "tolerations" in data:
        tolerations_data = yaml.full_load(data["tolerations"])
        tolerations = []
        for toleration_data in tolerations_data:
            toleration = common.create_toleration(toleration_data)

            if toleration:
                tolerations.append(toleration)

        template_spec.tolerations = tolerations

    template = client.V1PodTemplateSpec(metadata=client.V1ObjectMeta(
        name=data["name"],
        labels=labels,
        annotations=annotations,
    ),
                                        spec=template_spec)

    spec = client.V1JobSpec(template=template)

    if "completions" in data:
        spec.completions = int(data["completions"])
    if "selectors" in data:
        selectors_array = data["selectors"].split(',')
        selectors = dict(s.split('=') for s in selectors_array)
        spec.selector = selectors
    if "node_selector" in data:
        node_selectors_array = data["node_selector"].split(',')
        node_selectors = dict(s.split('=') for s in node_selectors_array)
        spec.nodeSelector = node_selectors
    if "parallelism" in data:
        spec.parallelism = int(data["parallelism"])
    if "active_deadline_seconds" in data:
        spec.active_deadline_seconds = int(data["active_deadline_seconds"])
    if "backoff_limit" in data:
        spec.backoff_limit = int(data["backoff_limit"])

    job = client.V1Job(api_version=data["api_version"],
                       kind='Job',
                       metadata=meta,
                       spec=spec)

    return job
Example #14
0
def template(context):
    labels = {"app": context["name"]}

    # Create volume mount lists and populate them if they are declared in the command
    pod_spec_volume_mounts = []
    pod_spec_volumes = []

    if "volumeName" in context:
        # Create volume mounts
        if "subPath" in context:
            pod_spec_volume_mounts = [
                client.V1VolumeMount(name=context["volumeName"],
                                     mount_path=context["mountPath"],
                                     sub_path=context["subPath"])
            ]
        else:
            pod_spec_volume_mounts = [
                client.V1VolumeMount(name=context["volumeName"],
                                     mount_path=context["mountPath"])
            ]

        # Create volumes
        pod_spec_volumes = [
            client.V1Volume(name=context["volumeName"],
                            nfs=client.V1NFSVolumeSource(
                                path=context["nfsPath"],
                                server=context["nfsServer"]))
        ]

    pod_init_containers = []
    if "vpn_init" in context:
        for key, val in dict(
                zip(context["vpn_init"].split(),
                    context["vpn_cmds"].split(';'))).items():
            pod_init_containers.append(
                client.V1Container(name=context["name"] + "-" + key,
                                   image=context["image"],
                                   command=val.split(),
                                   volume_mounts=pod_spec_volume_mounts))

    # Create Environment variable list and populate if it is declared in the command
    env_list = []
    if "env" in context:
        envs = dict(zip(context["env"].split(), context["vals"].split()))

        for key, val in envs.items():
            env_var = client.V1EnvVar(name=key, value=val)
            env_list.append(env_var)

    args = []
    if "attack_args" in context:
        args.append(context["attack_args"].split())

    # Define the template specification
    template_spec = client.V1PodSpec(containers=[
        client.V1Container(
            name=context["name"],
            image=context["image"],
            env=env_list,
            args=args,
            security_context=client.V1SecurityContext(privileged=True),
            volume_mounts=pod_spec_volume_mounts)
    ],
                                     init_containers=pod_init_containers,
                                     volumes=pod_spec_volumes)

    # Create dictionary for network attachment definition
    # This is required in a dictionary format for the template.metadata.annotations field
    net_dict = {}
    net_dict[context["netkey"]] = ', '.join(context["netval"].split())

    # Return deployment specification and tie together all the above components
    return client.V1Deployment(
        api_version="extensions/v1beta1",
        kind="Deployment",
        metadata=client.V1ObjectMeta(name=context["name"]),
        spec=client.V1DeploymentSpec(
            replicas=int(context["replicas"]),
            selector=client.V1LabelSelector(match_labels=labels),
            template=client.V1PodTemplateSpec(metadata=client.V1ObjectMeta(
                labels=labels, annotations=net_dict),
                                              spec=template_spec),
        ),
    )
Example #15
0
    def deployment_object(self, instance_uuid, cnf_yaml, service_uuid, vim_uuid):
        """
        CNF modeling method. This build a deployment object in kubernetes
        instance_uuid: k8s deployment name
        cnf_yaml: CNF Descriptor in yaml format
        """
        t0 = time.time()
        LOG.debug("CNFD: {}".format(cnf_yaml))
        container_list = []
        pod_volume_list = []
        deployment_k8s = None
        privileged = False
        node_selector = {}
        host_network = False
        if "cloudnative_deployment_units" in cnf_yaml:
            cdu = cnf_yaml.get('cloudnative_deployment_units')
            for cdu_obj in cdu:
                env_vars = env_from = cpu = memory = huge_pages = gpu = sr_iov = resources = volume_mounts = None
                port_list = []
                environment = []
                capabilities_list = []
                cdu_id = cdu_obj.get('id')
                image = cdu_obj.get('image')
                cdu_conex = cdu_obj.get('connection_points')
                container_name = cdu_id
                config_map_id = cdu_id
                if cdu_obj.get('parameters'):
                    env_vars = cdu_obj['parameters'].get('env')
                    volume_mounts = cdu_obj['parameters'].get('volume_mounts')
                    capabilities_list = cdu_obj['parameters'].get('capabilities')
                    if cdu_obj['parameters'].get('privileged'):
                        privileged = cdu_obj['parameters'].get('privileged')
                    if cdu_obj['parameters'].get('host_network'):
                        privileged = cdu_obj['parameters']['host_network']
                    if not isinstance(capabilities_list, list):
                        capabilities_list = []
                    if cdu_obj['parameters'].get('node_selector'):
                        node_selector = cdu_obj['parameters']['node_selector']
                if cdu_obj.get('resource_requirements'):
                    gpu = cdu_obj['resource_requirements'].get('gpu')
                    cpu = cdu_obj['resource_requirements'].get('cpu')
                    memory = cdu_obj['resource_requirements'].get('memory')
                    sr_iov = cdu_obj['resource_requirements'].get('sr-iov')
                    huge_pages = cdu_obj['resource_requirements'].get('huge-pages')
                if cdu_conex:
                    for po in cdu_conex:
                        port = po.get('port')
                        port_name = po.get('id')
                        protocol = "TCP"
                        if po.get("protocol"):
                            protocol = po["protocol"]
                        port_list.append(client.V1ContainerPort(container_port = port, name = port_name, protocol=protocol))

                limits = {}
                requests = {}
                if gpu:
                    LOG.debug("Features requested: {}".format(gpu))
                    # gpu_type can be amd or nvidia
                    for gpu_type, amount in gpu.items():
                        limits["{}.com/gpu".format(gpu_type)] = amount
                if cpu:
                    # TODO
                    pass
                if memory:
                    # TODO
                    pass               
                if sr_iov:
                    # TODO
                    pass                  
                if huge_pages:
                    # TODO
                    pass  
                
                resources = client.V1ResourceRequirements(limits=limits, requests=requests)             

                # Environment variables from descriptor
                if env_vars:
                    LOG.debug("Configmap: {}".format(config_map_id))
                    KubernetesWrapperEngine.create_configmap(self, config_map_id, instance_uuid, env_vars, service_uuid,
                                                             vim_uuid, namespace = "default")
                else:
                    env_vars = {"sonata": "rules"}
                    LOG.debug("Configmap: {}".format(config_map_id))
                    KubernetesWrapperEngine.create_configmap(self, config_map_id, instance_uuid, env_vars, service_uuid, 
                                                             vim_uuid, namespace = "default")
                env_from = client.V1EnvFromSource(config_map_ref = client.V1ConfigMapEnvSource(name = config_map_id, 
                                                  optional = False))

                # Default static environment variables
                environment.append(client.V1EnvVar(name="instance_uuid", value=instance_uuid))
                environment.append(client.V1EnvVar(name="service_uuid", value=service_uuid))
                environment.append(client.V1EnvVar(name="container_name", value=container_name))
                environment.append(client.V1EnvVar(name="vendor", value=KubernetesWrapperEngine.normalize(self, cnf_yaml.get('vendor'))))
                environment.append(client.V1EnvVar(name="name", value=KubernetesWrapperEngine.normalize(self, cnf_yaml.get('name'))))
                environment.append(client.V1EnvVar(name="version", value=KubernetesWrapperEngine.normalize(self, cnf_yaml.get('version'))))

                image_pull_policy = KubernetesWrapperEngine.check_connection(self)
                
                # Volume mounts
                container_volume_mount_list = []
                if volume_mounts:
                    LOG.debug("volume mounts: {}".format(volume_mounts))
                    # Create the specification of volumes
                    for volume_mounts_item in volume_mounts:
                        if volume_mounts_item.get('id') and volume_mounts_item.get('location'):
                            if volume_mounts_item.get('persistent'):
                                volumes = client.V1Volume(name=volume_mounts_item['id'], 
                                                          host_path=client.V1HostPathVolumeSource(path='/mnt/data', type='DirectoryOrCreate' ))
                            else:
                                volumes = client.V1Volume(name=volume_mounts_item['id'], 
                                                          empty_dir=client.V1EmptyDirVolumeSource(medium='' ))
                            if volumes not in pod_volume_list:
                                pod_volume_list.append(volumes)
                            container_volume_mount = client.V1VolumeMount(name=volume_mounts_item['id'], mount_path=volume_mounts_item['location'] )
                            container_volume_mount_list.append(container_volume_mount)



                LOG.debug("Security capabilities: {}, privileged: {} applied to {}".format(capabilities_list, privileged, container_name))
                sec_context = client.V1SecurityContext(privileged=privileged, capabilities=client.V1Capabilities(add=capabilities_list))

                # Configureate Pod template container
                container = client.V1Container(
                    env = environment,
                    name = container_name,
                    resources = resources,
                    image = image,
                    image_pull_policy = image_pull_policy,
                    ports = port_list,
                    env_from = [env_from],
                    volume_mounts = container_volume_mount_list,
                    security_context=sec_context)
                container_list.append(container)
        else:
            return deployment_k8s

        # Create and configurate a spec section
        deployment_label =  ("{}-{}-{}-{}".format(cnf_yaml.get("vendor"), cnf_yaml.get("name"), cnf_yaml.get("version"),
                             instance_uuid.split("-")[0])).replace(".", "-")
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={'deployment': deployment_label,
                                                 'instance_uuid': cnf_yaml['instance_uuid'],
                                                 'service_uuid': service_uuid,
                                                 'sp': "sonata",
                                                 'descriptor_uuid': cnf_yaml['uuid']} 
                                                 ),
            spec=client.V1PodSpec(containers=container_list, volumes=pod_volume_list, node_selector=node_selector, 
                                  host_network=host_network))

        selector=client.V1LabelSelector(match_labels={'deployment': deployment_label,
                                                 'instance_uuid': cnf_yaml['instance_uuid'],
                                                 'service_uuid': service_uuid,
                                                 'sp': "sonata",
                                                 'descriptor_uuid': cnf_yaml['uuid']} 
                                                 )

        # Create the specification of deployment
        spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector=selector)
        # Instantiate the deployment object
        deployment_k8s = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=deployment_label,
                                         labels={'deployment': deployment_label,
                                                 'instance_uuid': cnf_yaml['instance_uuid'],
                                                 'service_uuid': service_uuid,
                                                 'sp': "sonata",
                                                 'descriptor_uuid': cnf_yaml['uuid']} 
                                                 ),
                                         spec=spec)
        LOG.info("Deployment object: {}".format(deployment_k8s))
        LOG.info("CreatingDeploymentObject-time: {} ms".format(int((time.time() - t0)* 1000)))
        return deployment_k8s
Example #16
0
    def construct_job(self, run):
        check.inst_param(run, 'run', PipelineRun)

        dagster_labels = {
            'app.kubernetes.io/name': 'dagster',
            'app.kubernetes.io/instance': 'dagster',
            'app.kubernetes.io/version': dagster_version,
        }

        job_container = client.V1Container(
            name='dagster-job-%s' % run.run_id,
            image=self.job_image,
            command=['dagster-graphql'],
            args=[
                '-p',
                'startPipelineExecutionForCreatedRun',
                '-v',
                json.dumps({'runId': run.run_id}),
            ],
            image_pull_policy=self.image_pull_policy,
            env=[
                client.V1EnvVar(
                    name='DAGSTER_PG_PASSWORD',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name=self.postgres_password_secret,
                            key='postgresql-password')),
                ),
            ],
            env_from=self.env_from_sources,
            volume_mounts=[
                client.V1VolumeMount(
                    name='dagster-instance',
                    mount_path='{dagster_home}/dagster.yaml'.format(
                        dagster_home=self.dagster_home),
                    sub_path='dagster.yaml',
                )
            ],
        )

        config_map_volume = client.V1Volume(
            name='dagster-instance',
            config_map=client.V1ConfigMapVolumeSource(
                name=self.instance_config_map),
        )

        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                name='dagster-job-pod-%s' % run.run_id,
                labels=dagster_labels,
            ),
            spec=client.V1PodSpec(
                image_pull_secrets=self.image_pull_secrets,
                service_account_name=self.service_account_name,
                restart_policy='Never',
                containers=[job_container],
                volumes=[config_map_volume],
            ),
        )

        job = client.V1Job(
            api_version='batch/v1',
            kind='Job',
            metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id,
                                         labels=dagster_labels),
            spec=client.V1JobSpec(
                template=template,
                backoff_limit=BACKOFF_LIMIT,
                ttl_seconds_after_finished=TTL_SECONDS_AFTER_FINISHED,
            ),
        )
        return job
Example #17
0
def create_job_object(kJob, kImage, kVname, kVvalue, kimagepullpolicy,
                      kimagepullsecret, krestartpolicy, kbackofflimit,
                      khostpath, kvolname, kvolpath, kpvolclaim, kcommands,
                      kargs):
    # This creates a job object dynamically but supports only limited parameters
    # If you need any characteristics nt supported here, use a yaml manifest
    #

    # Configure environment variables
    env_list = []
    for key in kVname:
        value = kVvalue[kVname.index(key)]
        v1_envvar = client.V1EnvVar(name=key, value=value)
        env_list.append(v1_envvar)

    # Configure Volume Devices and Mounts
    volnames_list = []
    if kvolname != 'none':
        volname = client.V1VolumeMount(name=kvolname, mount_path=kvolpath)
        volnames_list.append(volname)

    # Configure Volumes list
    vol_list = []
    if kvolname != 'none':
        if kpvolclaim != 'none':
            vol = client.V1Volume(
                name=kvolname,
                persistent_volume_claim=client.
                V1PersistentVolumeClaimVolumeSource(claim_name=kpvolclaim))
        else:
            vol = client.V1Volume(name=kvolname,
                                  host_path=client.V1HostPathVolumeSource(
                                      path=khostpath, type='Directory'))
        vol_list.append(vol)

    # Configure Pod template container
    container = client.V1Container(
        name="ctmjob",
        image=kImage,
        image_pull_policy=kimagepullpolicy,
        env=env_list,
        command=kcommands if len(kcommands) > 0 else None,
        args=kargs if len(kargs) > 0 else None,
        volume_mounts=volnames_list)

    # Configure Image Pull Secret(s)
    imagesecrets = []
    isecret = client.V1LocalObjectReference(name=kimagepullsecret)
    imagesecrets.append(isecret)

    # Create and configure a spec section
    template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(name=kJob),
        spec=client.V1PodSpec(containers=[container],
                              image_pull_secrets=imagesecrets,
                              restart_policy=krestartpolicy,
                              volumes=vol_list))

    # Create the specification of deployment
    spec = client.V1JobSpec(template=template, backoff_limit=kbackofflimit)

    # Instantiate the job object
    job = client.V1Job(api_version="batch/v1",
                       kind="Job",
                       metadata=client.V1ObjectMeta(name=kJob),
                       spec=spec)

    return job
Example #18
0
    def _build_definition(machine, config_map):
        volume_mounts = []
        if config_map:
            # Define volume mounts for hostlab if a ConfigMap is defined.
            volume_mounts.append(
                client.V1VolumeMount(name="hostlab",
                                     mount_path="/tmp/kathara"))

        if Setting.get_instance().host_shared:
            volume_mounts.append(
                client.V1VolumeMount(name="shared", mount_path="/shared"))

        # Machine must be executed in privileged mode to run sysctls.
        security_context = client.V1SecurityContext(privileged=True)

        ports_info = machine.get_ports()
        container_ports = None
        if ports_info:
            container_ports = []
            for (host_port, protocol), guest_port in ports_info.items():
                container_ports.append(
                    client.V1ContainerPort(name=str(uuid.uuid4()).replace(
                        '-', '')[0:15],
                                           container_port=guest_port,
                                           host_port=host_port,
                                           protocol=protocol.upper()))

        resources = None
        memory = machine.get_mem()
        cpus = machine.get_cpu(multiplier=1000)
        if memory or cpus:
            limits = dict()
            if memory:
                limits["memory"] = memory.upper()
            if cpus:
                limits["cpu"] = "%dm" % cpus

            resources = client.V1ResourceRequirements(limits=limits)

        # postStart lifecycle hook is launched asynchronously by k8s master when the main container is Ready
        # On Ready state, the pod has volumes and network interfaces up, so this hook is used
        # to execute custom commands coming from .startup file and "exec" option
        # Build the final startup commands string
        sysctl_commands = "; ".join([
            "sysctl -w -q %s=%d" % item
            for item in machine.meta["sysctls"].items()
        ])
        startup_commands_string = "; ".join(STARTUP_COMMANDS) \
            .format(machine_name=machine.name,
                    sysctl_commands=sysctl_commands,
                    machine_commands="; ".join(machine.startup_commands)
                    )

        post_start = client.V1Handler(_exec=client.V1ExecAction(command=[
            Setting.get_instance().device_shell, "-c", startup_commands_string
        ]))
        lifecycle = client.V1Lifecycle(post_start=post_start)

        env = [
            client.V1EnvVar(
                "_MEGALOS_SHELL", machine.meta["shell"] if "shell"
                in machine.meta else Setting.get_instance().device_shell)
        ]

        container_definition = client.V1Container(
            name=machine.meta['real_name'],
            image=machine.get_image(),
            lifecycle=lifecycle,
            stdin=True,
            tty=True,
            image_pull_policy=Setting.get_instance().image_pull_policy,
            ports=container_ports,
            resources=resources,
            volume_mounts=volume_mounts,
            security_context=security_context,
            env=env)

        pod_annotations = {}
        network_interfaces = []
        for (idx, machine_link) in machine.interfaces.items():
            network_interfaces.append({
                "name":
                machine_link.api_object["metadata"]["name"],
                "namespace":
                machine.lab.folder_hash,
                "interface":
                "net%d" % idx
            })
        pod_annotations["k8s.v1.cni.cncf.io/networks"] = json.dumps(
            network_interfaces)

        # Create labels (so Deployment can match them)
        pod_labels = {"name": machine.name, "app": "kathara"}

        pod_metadata = client.V1ObjectMeta(deletion_grace_period_seconds=0,
                                           annotations=pod_annotations,
                                           labels=pod_labels)

        # Add fake DNS just to override k8s one
        dns_config = client.V1PodDNSConfig(nameservers=["127.0.0.1"])

        volumes = []
        if config_map:
            # Hostlab is the lab base64 encoded .tar.gz of the machine files, deployed as a ConfigMap in the cluster
            # The base64 file is mounted into /tmp and it's extracted by the postStart hook
            volumes.append(
                client.V1Volume(name="hostlab",
                                config_map=client.V1ConfigMapVolumeSource(
                                    name=config_map.metadata.name)))

        # Container /shared mounts in /home/shared folder
        if Setting.get_instance().host_shared:
            volumes.append(
                client.V1Volume(name="shared",
                                host_path=client.V1HostPathVolumeSource(
                                    path='/home/shared',
                                    type='DirectoryOrCreate')))

        pod_spec = client.V1PodSpec(containers=[container_definition],
                                    hostname=machine.meta['real_name'],
                                    dns_policy="None",
                                    dns_config=dns_config,
                                    volumes=volumes)

        pod_template = client.V1PodTemplateSpec(metadata=pod_metadata,
                                                spec=pod_spec)
        selector_rules = client.V1LabelSelector(match_labels=pod_labels)
        deployment_spec = client.V1DeploymentSpec(replicas=1,
                                                  template=pod_template,
                                                  selector=selector_rules)
        deployment_metadata = client.V1ObjectMeta(
            name=machine.meta['real_name'], labels=pod_labels)

        return client.V1Deployment(api_version="apps/v1",
                                   kind="Deployment",
                                   metadata=deployment_metadata,
                                   spec=deployment_spec)
Example #19
0
    def __init__(self,
                 name=None,
                 selector=None,
                 labels=None,
                 image_metadata=None,
                 namespace='default',
                 create_in_cluster=False,
                 from_template=None):
        """
        Utility functions for kubernetes deployments.

        :param name: str, name of the deployment
        :param selector: Label selector for pods. Existing ReplicaSets whose pods are selected by
         this will be the ones affected by this deployment. It must match the pod template's labels
        :param labels: dict, dict of labels
        :param image_metadata: ImageMetadata
        :param namespace: str, name of the namespace
        :param create_in_cluster: bool, if True deployment is created in Kubernetes cluster
        :param from_template: str, deployment template, example:
               - https://kubernetes.io/docs/concepts/workloads/controllers/deployment/

        """

        self.namespace = namespace

        if (from_template is not None) and (name is not None or selector
                                            is not None or labels is not None
                                            or image_metadata is not None):
            raise ConuException(
                'from_template cannot be passed to constructor at the same time with'
                ' name, selector, labels or image_metadata')
        elif from_template is not None:
            self.body = yaml.safe_load(from_template)

            self.name = self.body['metadata']['name']

        elif (name is not None and selector is not None and labels is not None
              and image_metadata is not None):
            self.name = name
            self.pod = Pod.create(image_metadata)

            self.spec = client.V1DeploymentSpec(
                selector=client.V1LabelSelector(match_labels=selector),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(labels=selector),
                    spec=self.pod.spec))

            self.metadata = client.V1ObjectMeta(name=self.name,
                                                namespace=self.namespace,
                                                labels=labels)

            self.body = client.V1Deployment(spec=self.spec,
                                            metadata=self.metadata)
        else:
            raise ConuException(
                'to create deployment you need to specify template or'
                ' properties: name, selector, labels, image_metadata')

        self.api = get_apps_api()

        if create_in_cluster:
            self.create_in_cluster()
Example #20
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     containers = []
     volumes = []
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/docker/logs', name='logs'))
     volumes.append(
         client.V1Volume(name='logs',
                         host_path=client.V1HostPathVolumeSource(
                             path='/opt/logs', type='DirectoryOrCreate')))
     if self.mounts:
         for path in self.mounts:
             volume_mounts.append(
                 client.V1VolumeMount(mount_path=path,
                                      name=self.mounts[path]))
             volumes.append(
                 client.V1Volume(name=self.mounts[path],
                                 host_path=client.V1HostPathVolumeSource(
                                     path=path, type='DirectoryOrCreate')))
     liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                     tcp_socket=client.V1TCPSocketAction(
                                         port=int(self.container_port[0])))
     readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                      tcp_socket=client.V1TCPSocketAction(
                                          port=int(self.container_port[0])))
     if self.healthcheck:
         liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                         http_get=client.V1HTTPGetAction(
                                             path=self.healthcheck,
                                             port=int(
                                                 self.container_port[0])))
         readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                          http_get=client.V1HTTPGetAction(
                                              path=self.healthcheck,
                                              port=int(
                                                  self.container_port[0])))
     Env = [
         client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
         client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8'),
         client.V1EnvVar(name='POD_NAME',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='metadata.name'))),
         client.V1EnvVar(name='POD_IP',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='status.podIP'))),
     ]
     container = client.V1Container(
         name=self.dm_name,
         image=self.image,
         ports=[
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ],
         image_pull_policy='Always',
         env=Env,
         resources=client.V1ResourceRequirements(limits=self.re_limits,
                                                 requests=self.re_requests),
         volume_mounts=volume_mounts,
         liveness_probe=liveness_probe,
         readiness_probe=readiness_probe)
     containers.append(container)
     if self.sidecar:
         sidecar_container = client.V1Container(
             name='sidecar-%s' % self.dm_name,
             image=self.sidecar,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts)
         containers.append(sidecar_container)
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=containers,
             image_pull_secrets=[secrets],
             volumes=volumes,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='project',
                                     operator='In',
                                     values=['moji'])
                             ]),
                         weight=30),
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='deploy',
                                     operator='In',
                                     values=[self.dm_name])
                             ]),
                         weight=70)
                 ]))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
Example #21
0
    def create(self):
        """
        Creates the services, the scheduler deployment and the workers deployment.
        WARNING: does not currently check if the cluster already exists...
        """
        # Apis handlers
        v1 = kube_client.CoreV1Api()
        apps_v1 = kube_client.AppsV1Api()

        # Services to be accessed outside of the cluster
        service_scheduler = kube_client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=kube_client.V1ObjectMeta(
                name=self.name_scheduler_service),
            spec=kube_client.V1ServiceSpec(
                type="NodePort",
                ports=[kube_client.V1ServicePort(port=8786)],
                selector={
                    "app": "dask-scheduler",
                    "user": self.cluster_id
                }))
        service_scheduler_dashboard = kube_client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=kube_client.V1ObjectMeta(
                name=self.name_scheduler_dashboard_service),
            spec=kube_client.V1ServiceSpec(
                type="NodePort",
                ports=[kube_client.V1ServicePort(port=8787)],
                selector={
                    "app": "dask-scheduler",
                    "user": self.cluster_id
                }))
        # Start the services
        service_scheduler_created = v1.create_namespaced_service(
            self.namespace, service_scheduler, pretty=True)

        service_scheduler_dashboard_created = v1.create_namespaced_service(
            self.namespace, service_scheduler_dashboard, pretty=True)
        dask_scheduler_ip_port_internal = f"{service_scheduler_created.spec.cluster_ip}:{service_scheduler_created.spec.ports[0].port}"

        dask_scheduler_external_port = service_scheduler_created.spec.ports[
            0].node_port
        dask_scheduler_dashboard_external_port = service_scheduler_dashboard_created.spec.ports[
            0].node_port

        # Deployments #
        scheduler_pod_spec = yaml.safe_load(self.scheduler_pod_spec)
        worker_pod_spec = yaml.safe_load(self.worker_pod_spec)

        # scheduler deployment
        labels = {"app": "dask-scheduler", "user": self.cluster_id}
        template = kube_client.V1PodTemplateSpec(
            metadata=kube_client.V1ObjectMeta(labels=labels),
            spec=kube_client.V1PodSpec(**scheduler_pod_spec))

        scheduler_deployment = kube_client.V1Deployment(
            api_version="apps/v1",
            metadata=kube_client.V1ObjectMeta(
                name=self.name_scheduler_deployment),
            spec=kube_client.V1DeploymentSpec(replicas=1,
                                              selector={"matchLabels": labels},
                                              template=template))

        # worker deployment
        labels = {"app": "dask-workers", "user": self.cluster_id}
        worker_pod_spec['containers'][0]['env'].append(
            kube_client.V1EnvVar(name="DASK_SCHEDULER_ADDRESS",
                                 value=dask_scheduler_ip_port_internal))
        template = kube_client.V1PodTemplateSpec(
            metadata=kube_client.V1ObjectMeta(labels=labels),
            spec=kube_client.V1PodSpec(**worker_pod_spec))

        worker_deployment = kube_client.V1Deployment(
            api_version="apps/v1",
            metadata=kube_client.V1ObjectMeta(
                name=self.name_workers_deployment),
            spec=kube_client.V1DeploymentSpec(replicas=0,
                                              selector={"matchLabels": labels},
                                              template=template))

        # Starts the deployments
        apps_v1.create_namespaced_deployment(self.namespace,
                                             scheduler_deployment,
                                             pretty=True)
        apps_v1.create_namespaced_deployment(self.namespace,
                                             worker_deployment,
                                             pretty=True)

        # Get the host IP of the scheduler
        v1 = kube_client.CoreV1Api()
        if self.minikube_ip:
            dask_scheduler_external_ip = self.minikube_ip
        else:
            while True:
                dask_scheduler_external_ip = v1.list_namespaced_pod(
                    self.namespace,
                    label_selector=f"user={self.cluster_id},app=dask-scheduler"
                ).items[0].status.host_ip
                if dask_scheduler_external_ip is not None:
                    break
                sleep(2)

        self._initialized = True
        self._scheduler = f"tcp://{dask_scheduler_external_ip}:{dask_scheduler_external_port}"
        self._dashboard = f"http://{dask_scheduler_external_ip}:{dask_scheduler_dashboard_external_port}"

        print(f"Scheduler: {self._scheduler}")
        print(f"Dashboard: {self._dashboard}")
Example #22
0
    def deploy_k8s_nfs(self) -> bool:
        """
        Deploy the NFS server in the Kubernetes orchestrator.
        """
        from kubernetes import client as k8sclient

        name = "nfs-server-{}".format(uuid.uuid4())
        container = k8sclient.V1Container(
            name=name,
            image="k8s.gcr.io/volume-nfs:0.8",
            ports=[
                k8sclient.V1ContainerPort(name="nfs",
                                          container_port=2049,
                                          protocol="TCP"),
                k8sclient.V1ContainerPort(name="rpcbind", container_port=111),
                k8sclient.V1ContainerPort(name="mountd", container_port=20048),
            ],
            volume_mounts=[
                k8sclient.V1VolumeMount(name='nfs-host-path',
                                        mount_path='/exports')
            ],
            security_context=k8sclient.V1SecurityContext(privileged=True))
        template = k8sclient.V1PodTemplateSpec(
            metadata=k8sclient.V1ObjectMeta(labels={'app': name}),
            spec=k8sclient.V1PodSpec(
                containers=[container],
                volumes=[
                    k8sclient.V1Volume(
                        name="nfs-host-path",
                        host_path=k8sclient.V1HostPathVolumeSource(
                            path='/tmp/nfsexports-{}'.format(uuid.uuid4())))
                ]))
        deployment_spec = k8sclient.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector=k8sclient.V1LabelSelector(match_labels={'app': name}))

        deployment = k8sclient.V1Deployment(api_version='apps/v1',
                                            kind='Deployment',
                                            metadata=k8sclient.V1ObjectMeta(
                                                name=name,
                                                labels={'app': name}),
                                            spec=deployment_spec)

        k8s_apps_v1_api_client = k8sclient.AppsV1Api()
        try:
            k8s_apps_v1_api_client.create_namespaced_deployment(
                self.params.namespace, deployment)
            self.params.name = name
        except k8sclient.rest.ApiException as e:
            screen.print("Got exception: %s\n while creating nfs-server", e)
            return False

        k8s_core_v1_api_client = k8sclient.CoreV1Api()

        svc_name = "nfs-service-{}".format(uuid.uuid4())
        service = k8sclient.V1Service(
            api_version='v1',
            kind='Service',
            metadata=k8sclient.V1ObjectMeta(name=svc_name),
            spec=k8sclient.V1ServiceSpec(selector={'app': self.params.name},
                                         ports=[
                                             k8sclient.V1ServicePort(
                                                 protocol='TCP',
                                                 port=2049,
                                                 target_port=2049)
                                         ]))

        try:
            svc_response = k8s_core_v1_api_client.create_namespaced_service(
                self.params.namespace, service)
            self.params.svc_name = svc_name
            self.params.server = svc_response.spec.cluster_ip
        except k8sclient.rest.ApiException as e:
            screen.print(
                "Got exception: %s\n while creating a service for nfs-server",
                e)
            return False

        return True
Example #23
0
    def create_cronjob_object(self):
        container = client.V1Container(
            name=self.container_name,
            args=self.args,
            image=self.image,
            image_pull_policy='IfNotPresent',
            resources={"limits": {
                "cpu": "1",
                "memory": "512Mi"
            }},
            termination_message_policy='File',
            termination_message_path='/dev/termination-log',
            security_context={
                "allowPrivilegeEscalation": False,
                "capabilities": {},
                "privileged": False,
                "readOnlyRootFilesystem": False,
                "runAsNonRoot": False
            })

        job_template = client.V1beta1JobTemplateSpec(spec=client.V1JobSpec(
            backoff_limit=1,
            completions=1,
            parallelism=1,
            template=client.V1PodTemplateSpec(
                metadata=client.V1ObjectMeta(annotations=self.annotations),
                spec=client.V1PodSpec(affinity={
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [{
                                "matchExpressions": [{
                                    "key":
                                    "node-type",
                                    "operator":
                                    "In",
                                    "values": [self.server_type]
                                }]
                            }]
                        }
                    }
                },
                                      containers=[container],
                                      dns_policy='ClusterFirst',
                                      image_pull_secrets=[{
                                          'name':
                                          self.image_pull_secrets
                                      }],
                                      restart_policy='Never',
                                      scheduler_name='default-scheduler',
                                      security_context={},
                                      termination_grace_period_seconds=30)),
        ))

        spec = client.V1beta1CronJobSpec(concurrency_policy=self.concurrency,
                                         failed_jobs_history_limit=3,
                                         job_template=job_template,
                                         starting_deadline_seconds=300,
                                         schedule=self.schedule,
                                         successful_jobs_history_limit=3,
                                         suspend=False)

        cronjob = client.V1beta1CronJob(
            api_version='batch/v1beta1',
            kind='CronJob',
            metadata=client.V1ObjectMeta(
                labels={'cattle.io/creator': 'norman'},
                name=self.cronjob_name,
                namespace=self.namespace),
            spec=spec)

        return cronjob
Example #24
0
    def deploy_worker(self):
        """
        Deploys the rollout worker(s) in Kubernetes.
        """

        worker_params = self.params.run_type_params.get(
            str(RunType.ROLLOUT_WORKER), None)
        if not worker_params:
            return False

        worker_params.command += [
            '--memory_backend_params',
            json.dumps(self.params.memory_backend_parameters.__dict__)
        ]
        worker_params.command += [
            '--data_store_params',
            json.dumps(self.params.data_store_params.__dict__)
        ]
        worker_params.command += [
            '--num_workers', '{}'.format(worker_params.num_replicas)
        ]

        name = "{}-{}".format(worker_params.run_type, uuid.uuid4())

        if self.params.data_store_params.store_type == "nfs":
            container = k8sclient.V1Container(
                name=name,
                image=worker_params.image,
                command=worker_params.command,
                args=worker_params.arguments,
                image_pull_policy='Always',
                volume_mounts=[
                    k8sclient.V1VolumeMount(
                        name='nfs-pvc',
                        mount_path=worker_params.checkpoint_dir)
                ],
                stdin=True,
                tty=True)
            template = k8sclient.V1PodTemplateSpec(
                metadata=k8sclient.V1ObjectMeta(labels={'app': name}),
                spec=k8sclient.V1PodSpec(
                    containers=[container],
                    volumes=[
                        k8sclient.V1Volume(
                            name="nfs-pvc",
                            persistent_volume_claim=self.nfs_pvc)
                    ],
                    restart_policy='Never'),
            )
        else:
            container = k8sclient.V1Container(
                name=name,
                image=worker_params.image,
                command=worker_params.command,
                args=worker_params.arguments,
                image_pull_policy='Always',
                env=[
                    k8sclient.V1EnvVar("ACCESS_KEY_ID", self.s3_access_key),
                    k8sclient.V1EnvVar("SECRET_ACCESS_KEY", self.s3_secret_key)
                ],
                stdin=True,
                tty=True)
            template = k8sclient.V1PodTemplateSpec(
                metadata=k8sclient.V1ObjectMeta(labels={'app': name}),
                spec=k8sclient.V1PodSpec(containers=[container],
                                         restart_policy='Never'))

        job_spec = k8sclient.V1JobSpec(completions=worker_params.num_replicas,
                                       parallelism=worker_params.num_replicas,
                                       template=template)

        job = k8sclient.V1Job(api_version="batch/v1",
                              kind="Job",
                              metadata=k8sclient.V1ObjectMeta(name=name),
                              spec=job_spec)

        api_client = k8sclient.BatchV1Api()
        try:
            api_client.create_namespaced_job(self.params.namespace, job)
            worker_params.orchestration_params['job_name'] = name
            return True
        except k8sclient.rest.ApiException as e:
            print("Got exception: %s\n while creating Job", e)
            return False
Example #25
0
def create_job(name,
               configmap_name,
               container_name,
               container_image,
               container_command,
               namespace="default",
               env_vars={}):
    """
    Create a k8 Job Object
    Args:
        name:
        configmap_name:
        container_name:
        container_image:
        container_command:list类型,执行程序的命令,例如:['python','/home/test.py']
        namespace:
        env_vars: 环境变量

    Returns:

    """
    try:
        # Body是对象体
        body = client.V1Job(api_version="batch/v1", kind="Job")
        # 对象需要 Metadata,每个JOB必须有一个不同的名称!
        body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
        # 添加 Status
        body.status = client.V1JobStatus()

        # 开始 Template...
        template = client.V1PodTemplateSpec()

        # 在Env中传递Arguments:
        env_list = []
        for env_name, env_value in env_vars.items():
            env_list.append(client.V1EnvVar(name=env_name, value=env_value))

        container = client.V1Container(command=container_command,
                                       env=env_list,
                                       image=container_image,
                                       image_pull_policy="IfNotPresent",
                                       name=container_name)

        volume_mount = client.V1VolumeMount(name="config-volume",
                                            mount_path=mount_path)
        container.volume_mounts = [volume_mount]

        config_map = client.V1ConfigMapVolumeSource(name=configmap_name)

        volumes = client.V1Volume(name="config-volume", config_map=config_map)

        template.spec = client.V1PodSpec(containers=[container],
                                         restart_policy='OnFailure',
                                         volumes=[volumes])

        # 最后,创建V1JobSpec
        body.spec = client.V1JobSpec(ttl_seconds_after_finished=600,
                                     template=template)

        response = batch_v1_api.create_namespaced_job(namespace,
                                                      body,
                                                      pretty=True)

        return True, response
    except Exception as ex:
        print(ex)
        return False, "k8 Job Object creates Failed!"
Example #26
0
def create_job_object(name,
                      container_image,
                      command,
                      args=None,
                      namespace="default",
                      container_name="jobcontainer",
                      env_vars=None,
                      restart_policy='Never',
                      ttl_finished=180,
                      secret_names=None,
                      backoff_limit=0,
                      volume_mappings=None):

    if settings.TASK_DELETE_SUCCESSFUL_PODS or settings.TASK_DELETE_FAILED_PODS:
        cleanup_pods(delete_succeeded=settings.TASK_DELETE_SUCCESSFUL_PODS,
                     delete_failed=settings.TASK_DELETE_FAILED_PODS,
                     namespace=namespace)

    if env_vars is None:
        env_vars = {}
    if secret_names is None:
        secret_names = []
    if args is None:
        args = []
    if volume_mappings is None:
        volume_mappings = []

    body = client.V1Job(api_version="batch/v1", kind="Job")
    # metadata and status are required
    body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
    body.status = client.V1JobStatus()

    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()

    api_client = client.BatchV1Api()

    # Set env variables
    env_list = []
    for env_name, env_value in env_vars.items():
        env_list.append(client.V1EnvVar(name=env_name, value=env_value))

    env_from = []
    for secret_name in secret_names:
        env_from.append(
            client.V1EnvFromSource(secret_ref=client.V1SecretEnvSource(
                name=secret_name)))

    volumes = []
    volume_mounts = []
    for i, volume_mapping in enumerate(volume_mappings):
        volume = client.V1Volume(name=f'volume-{i}',
                                 host_path=client.V1HostPathVolumeSource(
                                     path=volume_mapping['host_path']))
        volumes.append(volume)
        volume_mounts.append(
            client.V1VolumeMount(name=f'volume-{i}',
                                 mount_path=volume_mapping['mount_path']))

    # set container options
    container = client.V1Container(
        name=container_name,
        image=container_image,
        env=env_list,
        command=command,
        args=args,
        env_from=env_from,
        volume_mounts=volume_mounts,
        image_pull_policy=settings.TASK_IMAGE_PULL_POLICY)

    # set pod options
    template.template.spec = client.V1PodSpec(
        containers=[container],
        restart_policy=restart_policy,
        volumes=volumes,
        service_account_name='collabovid-sa')

    body.spec = client.V1JobSpec(ttl_seconds_after_finished=ttl_finished,
                                 template=template.template,
                                 backoff_limit=backoff_limit)

    return body
def run_cases(splunk, test_id, test):
    cases_collection = get_performance_test_cases_collection(splunk)
    cases = cases_collection.query(
        query=json.dumps({
            "test_id": test_id,
        }),
        sort="index:1",
    )
    for case in cases:
        case_id = case["_key"]
        status = case["status"]
        if status == CASE_FINISHED:
            continue
        if status == CASE_WAITING:
            result = splunk.post(
                "saas/stacks", **{
                    "deployment_type": case["deployment_type"],
                    "indexer_count": case["indexer_count"],
                    "search_head_count": case["search_head_count"],
                    "cpu_per_instance": case["cpu_per_instance"],
                    "etc_storage_in_gb": case["etc_storage_in_gb"],
                    "other_var_storage_in_gb": case["other_var_storage_in_gb"],
                    "indexer_var_storage_in_gb":
                    case["indexer_var_storage_in_gb"],
                    "memory_per_instance": case["memory_per_instance"],
                    "title":
                    "Performance Test %s and Case %s" % (test_id, case_id),
                    "cluster": test["cluster"],
                })
            response = json.loads(result.body.read())["entry"][0]["content"]
            stack_id = response["stack_id"]
            logging.info("created stack %s for test case %s" %
                         (stack_id, case_id))
            case.update({
                "status": CASE_STARTING,
                "stack_id": stack_id,
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation(
                "waiting for stack %s in test case %s starting up ..." %
                (stack_id, case_id))
        elif status == CASE_STARTING:
            stack_id = case["stack_id"]
            stack = splunk.get("saas/stack/%s" % stack_id)
            stack_status = json.loads(
                stack.body.read())["entry"][0]["content"]["status"]
            if stack_status == stacks.CREATING:
                raise errors.RetryOperation()
            if stack_status != stacks.CREATED:
                raise Exception("unexpected stack status: %s" % stack_status)
            logging.info("successfully created stack %s for case %s" %
                         (stack_id, case_id))
            stack_config = stacks.get_stack_config(splunk, stack_id)
            kube_client = clusters.create_client(splunk,
                                                 stack_config["cluster"])
            cluster_config = clusters.get_cluster(splunk, test["cluster"])
            node_selector_labels = cluster_config["node_selector"].split(",")
            node_selector_for_generators = {}
            for label in node_selector_labels:
                if label:
                    kv = label.split("=")
                    if len(kv) != 2:
                        raise errors.ApplicationError(
                            "invalid node selector format (%s)" %
                            cluster_config.node_selector)
                    node_selector_for_generators[kv[0]] = kv[1]
            apps_api = kubernetes.AppsV1Api(kube_client)
            core_api = kubernetes.CoreV1Api(kube_client)
            if stack_config["deployment_type"] == "standalone":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])
            elif stack_config["deployment_type"] == "distributed":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.indexer_role,
                    stack_config["namespace"])
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            data_volume_in_gb_per_day = int(case["data_volume_in_gb_per_day"])
            logging.debug("data_volume_in_gb_per_day=%s" %
                          (data_volume_in_gb_per_day))
            data_volume_in_gb_per_second = data_volume_in_gb_per_day / 24 / 60 / 60
            logging.debug("data_volume_in_gb_per_second=%s" %
                          (data_volume_in_gb_per_second))
            data_volume_in_kb_per_second = data_volume_in_gb_per_second * 1024 * 1024
            logging.debug("data_volume_in_kb_per_second=%s" %
                          (data_volume_in_kb_per_second))
            max_kb_per_second_per_data_generator = 100
            logging.debug("max_kb_per_second_per_data_generator=%s" %
                          (max_kb_per_second_per_data_generator))
            number_of_data_generators = max(
                int(data_volume_in_kb_per_second /
                    max_kb_per_second_per_data_generator) + 1, 1)
            logging.debug("number_of_data_generators=%s" %
                          (number_of_data_generators))
            data_volume_in_kb_per_second_per_data_generator = data_volume_in_kb_per_second / \
                number_of_data_generators
            logging.debug(
                "data_volume_in_kb_per_second_per_data_generator=%s" %
                (data_volume_in_kb_per_second_per_data_generator))
            deployment_name = "datagen-%s" % (stack_id)
            try:
                apps_api.read_namespaced_deployment(
                    deployment_name, namespace=stack_config["namespace"])
                data_gen_deployment_already_exists = True
            except kubernetes.rest.ApiException as e:
                if e.status != 404:
                    raise
                data_gen_deployment_already_exists = False
            if not data_gen_deployment_already_exists:
                apps_api.create_namespaced_deployment(
                    namespace=stack_config["namespace"],
                    body=kubernetes.V1Deployment(
                        metadata=kubernetes.V1ObjectMeta(
                            name=deployment_name,
                            namespace=stack_config["namespace"],
                            labels={
                                "app": "datagen",
                                "test": test_id,
                                "case": case_id,
                            },
                        ),
                        spec=kubernetes.V1DeploymentSpec(
                            replicas=number_of_data_generators,
                            selector=kubernetes.V1LabelSelector(
                                match_labels={
                                    "name": "datagen-%s" % (stack_id),
                                }),
                            template=kubernetes.V1PodTemplateSpec(
                                metadata=kubernetes.V1ObjectMeta(labels={
                                    "name":
                                    "datagen-%s" % (stack_id),
                                    "app":
                                    "datagen",
                                    "test":
                                    test_id,
                                    "case":
                                    case_id,
                                    "stack":
                                    stack_id,
                                }, ),
                                spec=kubernetes.V1PodSpec(
                                    containers=[
                                        kubernetes.V1Container(
                                            name="datagen",
                                            image=
                                            "blackhypothesis/splunkeventgenerator:latest",
                                            resources=kubernetes.
                                            V1ResourceRequirements(
                                                requests={
                                                    "memory": "10Mi",
                                                    "cpu": "500m",
                                                },
                                                limits={
                                                    "memory": "50Mi",
                                                    "cpu": "1",
                                                },
                                            ),
                                            env=[
                                                kubernetes.V1EnvVar(
                                                    name="DSTHOST",
                                                    value=";".join(
                                                        map(
                                                            lambda host: host +
                                                            ":9996",
                                                            indexer_hosts)),
                                                ),
                                                kubernetes.V1EnvVar(
                                                    name="KB_S",
                                                    value="%s" %
                                                    data_volume_in_kb_per_second_per_data_generator,
                                                ),
                                            ],
                                        ),
                                    ],
                                    node_selector=node_selector_for_generators,
                                ),
                            ),
                        ),
                    ),
                )
                logging.info("created %s data generators for case %s" %
                             (number_of_data_generators, case_id))
            if stack_config["deployment_type"] == "standalone":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])[0]
            elif stack_config["deployment_type"] == "distributed":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.search_head_role,
                    stack_config["namespace"])[0]
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            searches_per_day = int(case["searches_per_day"])
            logging.debug("searches_per_day=%s" % (searches_per_day))
            searches_per_second = searches_per_day / 24 / 60 / 60
            logging.debug("searches_per_second=%s" % (searches_per_second))
            max_searches_per_second_per_generator = 5
            logging.debug("max_searches_per_second_per_generator=%s" %
                          (max_searches_per_second_per_generator))
            number_of_search_generators = max(
                int(searches_per_second /
                    max_searches_per_second_per_generator) + 1, 1)
            logging.debug("number_of_search_generators=%s" %
                          (number_of_search_generators))
            searches_per_second_per_generator = searches_per_second / \
                number_of_search_generators
            logging.debug("searches_per_second_per_generator=%s" %
                          (searches_per_second_per_generator))
            search_template = case["search_template"]
            if searches_per_day > 0 and search_template:
                deployment_name = "searchgen-%s" % (stack_id)
                try:
                    apps_api.read_namespaced_deployment(
                        deployment_name, namespace=stack_config["namespace"])
                    search_gen_deployment_already_exists = True
                except kubernetes.rest.ApiException as e:
                    if e.status != 404:
                        raise
                    search_gen_deployment_already_exists = False
                if not search_gen_deployment_already_exists:
                    admin_password = instances.get_admin_password(
                        core_api, stack_id, stack_config,
                        services.search_head_role)
                    apps_api.create_namespaced_deployment(
                        namespace=stack_config["namespace"],
                        body=kubernetes.V1Deployment(
                            metadata=kubernetes.V1ObjectMeta(
                                name=deployment_name,
                                namespace=stack_config["namespace"],
                                labels={
                                    "app": "searchgen",
                                    "test": test_id,
                                    "case": case_id,
                                },
                            ),
                            spec=kubernetes.V1DeploymentSpec(
                                replicas=number_of_search_generators,
                                selector=kubernetes.V1LabelSelector(
                                    match_labels={
                                        "name": "searchgen-%s" % (stack_id),
                                    }),
                                template=kubernetes.V1PodTemplateSpec(
                                    metadata=kubernetes.V1ObjectMeta(labels={
                                        "name":
                                        "searchgen-%s" % (stack_id),
                                        "app":
                                        "searchgen",
                                        "test":
                                        test_id,
                                        "case":
                                        case_id,
                                        "stack":
                                        stack_id,
                                    }, ),
                                    spec=kubernetes.V1PodSpec(
                                        containers=[
                                            kubernetes.V1Container(
                                                name="searchgen",
                                                image=
                                                "hovu96/splunk-searchgen:latest",
                                                resources=kubernetes.
                                                V1ResourceRequirements(
                                                    requests={
                                                        "memory": "10Mi",
                                                        "cpu": "500m",
                                                    },
                                                    limits={
                                                        "memory": "50Mi",
                                                        "cpu": "1",
                                                    },
                                                ),
                                                env=[
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPL",
                                                        value=search_template,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_HOST",
                                                        value=search_head_host,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_USER",
                                                        value="admin",
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name=
                                                        "SEARCH_GEN_PASSWORD",
                                                        value=admin_password,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPS",
                                                        value="%s" %
                                                        searches_per_second_per_generator,
                                                    ),
                                                ],
                                            ),
                                        ],
                                        node_selector=
                                        node_selector_for_generators,
                                    ),
                                ),
                            ),
                        ),
                    )
                    logging.info("created %s search generators for case %s" %
                                 (number_of_search_generators, case_id))
            else:
                logging.info("no search generators started")
            case.update({
                "status": CASE_RUNNING,
                "time_started_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("running test case %s ..." % case_id)
        elif status == CASE_RUNNING:
            time_started_running = case["time_started_running"]
            time_now = time.time()
            seconds_running_to_far = time_now - time_started_running
            target_run_duration = test["run_duration"]
            logging.debug(
                "time_started_running=%s time_now=%s seconds_running_to_far=%s"
                % (time_started_running, time_now, seconds_running_to_far))
            if seconds_running_to_far < (target_run_duration * 60):
                logging.debug("still waiting")
                raise errors.RetryOperation()
            logging.info("time elapsed for case %s" % (case_id))
            case.update({
                "status": CASE_STOPPING,
                "time_finished_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("stopping test case %s" % case_id)
        elif status == CASE_STOPPING:
            stop_case(splunk, test_id, case_id, case)
            case.update({
                "status": CASE_FINISHED,
            })
            cases_collection.update(case_id, json.dumps(case))
            logging.info("finished test case %s" % case_id)
        else:
            logging.error("run_cases: unexpected status for test case %s: %s" %
                          (case_id, status))
            raise errors.RetryOperation()
Example #28
0
    def create(self):
        # Check that job attributes are sensible.

        # CPU value should be greater than 0
        if not (isinstance(self._kwargs["cpu"],
                           (int, unicode, basestring, float))
                and float(self._kwargs["cpu"]) > 0):
            raise KubernetesJobException(
                "Invalid CPU value ({}); it should be greater than 0".format(
                    self._kwargs["cpu"]))

        # Memory value should be greater than 0
        if not (isinstance(self._kwargs["memory"], (int, unicode, basestring))
                and int(self._kwargs["memory"]) > 0):
            raise KubernetesJobException(
                "Invalid memory value ({}); it should be greater than 0".
                format(self._kwargs["memory"]))

        # Disk value should be greater than 0
        if not (isinstance(self._kwargs["disk"], (int, unicode, basestring))
                and int(self._kwargs["disk"]) > 0):
            raise KubernetesJobException(
                "Invalid disk value ({}); it should be greater than 0".format(
                    self._kwargs["disk"]))

        # TODO(s) (savin)
        # 1. Add support for GPUs.

        # A discerning eye would notice and question the choice of using the
        # V1Job construct over the V1Pod construct given that we don't rely much
        # on any of the V1Job semantics. The major reasons at the moment are -
        #     1. It makes the Kubernetes UIs (Octant, Lens) a bit more easy on
        #        the eyes, although even that can be questioned.
        #     2. AWS Step Functions, at the moment (Aug' 21) only supports
        #        executing Jobs and not Pods as part of it's publicly declared
        #        API. When we ship the AWS Step Functions integration with EKS,
        #        it will hopefully lessen our workload.
        #
        # Note: This implementation ensures that there is only one unique Pod
        # (unique UID) per Metaflow task attempt.
        client = self._client_wrapper.get()
        self._job = client.V1Job(
            api_version="batch/v1",
            kind="Job",
            metadata=client.V1ObjectMeta(
                # Annotations are for humans
                annotations=self._kwargs.get("annotations", {}),
                # While labels are for Kubernetes
                labels=self._kwargs.get("labels", {}),
                name=self._kwargs["name"],  # Unique within the namespace
                namespace=self._kwargs["namespace"],  # Defaults to `default`
            ),
            spec=client.V1JobSpec(
                # Retries are handled by Metaflow when it is responsible for
                # executing the flow. The responsibility is moved to Kubernetes
                # when AWS Step Functions / Argo are responsible for the
                # execution.
                backoff_limit=self._kwargs.get("retries", 0),
                completions=1,  # A single non-indexed pod job
                # TODO (savin): Implement a job clean-up option in the
                # kubernetes CLI.
                ttl_seconds_after_finished=7 * 60 *
                60  # Remove job after a week. TODO (savin): Make this
                * 24,  # configurable
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        annotations=self._kwargs.get("annotations", {}),
                        labels=self._kwargs.get("labels", {}),
                        name=self._kwargs["name"],
                        namespace=self._kwargs["namespace"],
                    ),
                    spec=client.V1PodSpec(
                        # Timeout is set on the pod and not the job (important!)
                        active_deadline_seconds=self.
                        _kwargs["timeout_in_seconds"],
                        # TODO (savin): Enable affinities for GPU scheduling.
                        #               This requires some thought around the
                        #               UX since specifying affinities can get
                        #               complicated quickly. We may well decide
                        #               to move it out of scope for the initial
                        #               roll out.
                        # affinity=?,
                        containers=[
                            client.V1Container(
                                command=self._kwargs["command"],
                                env=[
                                    client.V1EnvVar(name=k, value=str(v))
                                    for k, v in self._kwargs.get(
                                        "environment_variables", {}).items()
                                ]
                                # And some downward API magic. Add (key, value)
                                # pairs below to make pod metadata available
                                # within Kubernetes container.
                                #
                                # TODO: Figure out a way to make job
                                # metadata visible within the container
                                + [
                                    client.V1EnvVar(
                                        name=k,
                                        value_from=client.V1EnvVarSource(
                                            field_ref=client.
                                            V1ObjectFieldSelector(
                                                field_path=str(v))),
                                    ) for k, v in {
                                        "METAFLOW_KUBERNETES_POD_NAMESPACE":
                                        "metadata.namespace",
                                        "METAFLOW_KUBERNETES_POD_NAME":
                                        "metadata.name",
                                        "METAFLOW_KUBERNETES_POD_ID":
                                        "metadata.uid",
                                    }.items()
                                ],
                                env_from=[
                                    client.V1EnvFromSource(
                                        secret_ref=client.V1SecretEnvSource(
                                            name=str(k)))
                                    for k in self._kwargs.get("secrets", [])
                                ],
                                image=self._kwargs["image"],
                                name=self._kwargs["name"],
                                resources=client.V1ResourceRequirements(
                                    requests={
                                        "cpu":
                                        str(self._kwargs["cpu"]),
                                        "memory":
                                        "%sM" % str(self._kwargs["memory"]),
                                        "ephemeral-storage":
                                        "%sM" % str(self._kwargs["disk"]),
                                    }),
                            )
                        ],
                        node_selector={
                            # TODO: What should be the format of node selector -
                            #       key:value or key=value?
                            str(k.split("=", 1)[0]): str(k.split("=", 1)[1])
                            for k in self._kwargs.get("node_selector", [])
                        },
                        # TODO (savin): At some point in the very near future,
                        #               support docker access secrets.
                        # image_pull_secrets=?,
                        #
                        # TODO (savin): We should, someday, get into the pod
                        #               priority business
                        # preemption_policy=?,
                        #
                        # A Container in a Pod may fail for a number of
                        # reasons, such as because the process in it exited
                        # with a non-zero exit code, or the Container was
                        # killed due to OOM etc. If this happens, fail the pod
                        # and let Metaflow handle the retries.
                        restart_policy="Never",
                        service_account_name=self._kwargs["service_account"],
                        # Terminate the container immediately on SIGTERM
                        termination_grace_period_seconds=0,
                        # TODO (savin): Enable tolerations for GPU scheduling.
                        #               This requires some thought around the
                        #               UX since specifying tolerations can get
                        #               complicated quickly.
                        # tolerations=?,
                        #
                        # TODO (savin): At some point in the very near future,
                        #               support custom volumes (PVCs/EVCs).
                        # volumes=?,
                        #
                        # TODO (savin): Set termination_message_policy
                    ),
                ),
            ),
        )
        return self
Example #29
0
    def deploy_kubernetes(self):

        if 'namespace' not in self.params.orchestrator_params:
            self.params.orchestrator_params['namespace'] = "default"

        container = client.V1Container(
            name=self.redis_server_name,
            image='redis:4-alpine',
        )
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                labels={'app': self.redis_server_name}),
            spec=client.V1PodSpec(containers=[container]))
        deployment_spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector=client.V1LabelSelector(
                match_labels={'app': self.redis_server_name}))

        deployment = client.V1Deployment(
            api_version='apps/v1',
            kind='Deployment',
            metadata=client.V1ObjectMeta(
                name=self.redis_server_name,
                labels={'app': self.redis_server_name}),
            spec=deployment_spec)

        api_client = client.AppsV1Api()
        try:
            api_client.create_namespaced_deployment(
                self.params.orchestrator_params['namespace'], deployment)
        except client.rest.ApiException as e:
            print("Got exception: %s\n while creating redis-server", e)
            return False

        core_v1_api = client.CoreV1Api()

        service = client.V1Service(
            api_version='v1',
            kind='Service',
            metadata=client.V1ObjectMeta(name=self.redis_service_name),
            spec=client.V1ServiceSpec(selector={'app': self.redis_server_name},
                                      ports=[
                                          client.V1ServicePort(
                                              protocol='TCP',
                                              port=6379,
                                              target_port=6379)
                                      ]))

        try:
            core_v1_api.create_namespaced_service(
                self.params.orchestrator_params['namespace'], service)
            self.params.redis_address = '{}.{}.svc'.format(
                self.redis_service_name,
                self.params.orchestrator_params['namespace'])
            self.params.redis_port = 6379
            return True
        except client.rest.ApiException as e:
            print(
                "Got exception: %s\n while creating a service for redis-server",
                e)
            return False
Example #30
0
def create_deployment(apps_v1_api, username, token, gpu):
    name = 'jlab-{}'.format(username)
    try:
        init_container = client.V1Container(
            name='{}-init'.format(name),
            image="ubuntu:18.04",
            image_pull_policy="IfNotPresent",
            command=["/bin/sh"],
            args=["-c", "chown 1001:1001 /persistent_volume"],
            volume_mounts=[
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/persistent_volume",
                    sub_path='{}/jupyter'.format(username)
                )
            ]
        )
        if gpu == True:
            limits = {
                'nvidia.com/gpu': 1
            }
        else:
            limits = None
        container = client.V1Container(
            name=name,
            image=envvars.DOCKER_IMAGE_JLAB_SERVER,
            resources=client.V1ResourceRequirements(
                limits=limits
            ),
            image_pull_policy="Always",
            ports=[client.V1ContainerPort(container_port=8888)],
            env=[
                client.V1EnvVar(
                    name='DES_USER',
                    value=username
                ),
                client.V1EnvVar(
                    name='PIP_TARGET',
                    value='/home/jovyan/work/.pip'
                ),
                client.V1EnvVar(
                    name='PYTHONPATH',
                    value='/home/jovyan/work/.pip'
                )
            ],
            volume_mounts=[
                client.V1VolumeMount(
                    name='jupyter-config',
                    mount_path="/home/jovyan/.jupyter/"
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/jobs/cutout",
                    sub_path='{}/cutout'.format(username)
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/jobs/query",
                    sub_path='{}/query'.format(username)
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/work",
                    sub_path='{}/jupyter'.format(username)
                )
            ]
        )
        volume_config = client.V1Volume(
            name='jupyter-config',
            config_map=client.V1ConfigMapVolumeSource(
                name=name,
                items=[client.V1KeyToPath(
                    key=name,
                    path="jupyter_notebook_config.py"
                )]
            )
        )
        volume_persistent = client.V1Volume(
            name='persistent-volume',
            persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                claim_name=envvars.PVC_NAME_BASE
            )
        )
        # Template
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={"app": name}),
                spec=client.V1PodSpec(
                    image_pull_secrets=[
                        client.V1LocalObjectReference(
                            name='registry-auth'
                        )
                    ],
                    init_containers=[
                        init_container
                    ],
                    containers=[
                        container
                    ],
                    volumes=[
                        volume_config,
                        volume_persistent
                    ],
                    node_selector = {'gpu': '{}'.format(gpu).lower()}
                )
            )
        # Spec
        spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector=client.V1LabelSelector(
                match_labels=dict({'app': name})
            )
        )
        # Deployment
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=name),
            spec=spec)
        # Creation of the Deployment in specified namespace
        api_response = apps_v1_api.create_namespaced_deployment(
            namespace=namespace, body=deployment
        )
        # logger.info('Deployment created:\n{}'.format(api_response))
    except ApiException as e:
        error_msg = str(e).strip()
        logger.error(error_msg)