예제 #1
0
def create_cluster(**kwargs):
    workers = task['inputs'].get('workers', 0)
    cpu = task['inputs'].get('worker_cores', 2)
    memory = task['inputs'].get('worker_memory', 2)
    image = task['inputs'].get('worker_image', 'daskdev/dask:latest')

    resources = {
        'cpu': str(cpu),
        'memory': str(memory),
    }

    container = client.V1Container(
        name='dask',
        image=image,
        args=[
            'dask-worker',
            '--nthreads',
            str(cpu_to_threads(cpu)),
            '--no-bokeh',
            '--memory-limit',
            f'{memory}B',
            '--death-timeout',
            '60',
        ],
        resources=client.V1ResourceRequirements(
            limits=resources,
            requests=resources,
        ),
    )

    pod = client.V1Pod(
        metadata=client.V1ObjectMeta(labels={
            'cowait/task': 'worker-' + task.get('id'),
            'cowait/parent': task.get('id'),
        }, ),
        spec=client.V1PodSpec(
            restart_policy='Never',
            image_pull_secrets=[
                client.V1LocalObjectReference(name=secret)
                for secret in task['inputs'].get('pull_secrets', ['docker'])
            ],
            containers=[container],
        ),
    )

    return KubeCluster(
        pod_template=pod,
        n_workers=workers,
    )
예제 #2
0
def deploy_function(function: DaskCluster, secrets=None):
    try:
        from dask_kubernetes import KubeCluster, make_pod_spec
        from dask.distributed import Client, default_client
        from kubernetes_asyncio import client
        import dask
    except ImportError as e:
        print('missing dask or dask_kubernetes, please run '
              '"pip install dask distributed dask_kubernetes", %s', e)
        raise e

    spec = function.spec
    meta = function.metadata
    spec.remote = True

    image = function.full_image_path() or 'daskdev/dask:latest'
    env = spec.env
    namespace = meta.namespace or config.namespace
    if spec.extra_pip:
        env.append(spec.extra_pip)

    pod_labels = get_resource_labels(function)
    args = ['dask-worker', "--nthreads", str(spec.nthreads)]
    if spec.args:
        args += spec.args

    container = client.V1Container(name='base',
                                   image=image,
                                   env=env,
                                   args=args,
                                   image_pull_policy=spec.image_pull_policy,
                                   volume_mounts=spec.volume_mounts,
                                   resources=spec.resources)

    pod_spec = client.V1PodSpec(containers=[container],
                                restart_policy='Never',
                                volumes=spec.volumes,
                                service_account=spec.service_account)
    if spec.image_pull_secret:
        pod_spec.image_pull_secrets = [
            client.V1LocalObjectReference(name=spec.image_pull_secret)]

    pod = client.V1Pod(metadata=client.V1ObjectMeta(namespace=namespace,
                                                    labels=pod_labels),
                                                    #annotations=meta.annotation),
                       spec=pod_spec)

    svc_temp = dask.config.get("kubernetes.scheduler-service-template")
    if spec.service_type or spec.node_port:
        if spec.node_port:
            spec.service_type = 'NodePort'
            svc_temp['spec']['ports'][1]['nodePort'] = spec.node_port
        update_in(svc_temp, 'spec.type', spec.service_type)

    norm_name = normalize_name(meta.name)
    dask.config.set({"kubernetes.scheduler-service-template": svc_temp,
                     'kubernetes.name': 'mlrun-' + norm_name + '-{uuid}'})

    cluster = KubeCluster(
        pod, deploy_mode='remote',
        namespace=namespace,
        scheduler_timeout=spec.scheduler_timeout)

    logger.info('cluster {} started at {}'.format(
        cluster.name, cluster.scheduler_address
    ))

    function.status.scheduler_address = cluster.scheduler_address
    function.status.cluster_name = cluster.name
    if spec.service_type == 'NodePort':
        ports = cluster.scheduler.service.spec.ports
        function.status.node_ports = {'scheduler': ports[0].node_port,
                                      'dashboard': ports[1].node_port}

    if spec.replicas:
        cluster.scale(spec.replicas)
    else:
        cluster.adapt(minimum=spec.min_replicas,
                      maximum=spec.max_replicas)

    return cluster
예제 #3
0
def deploy_function(function: DaskCluster, secrets=None):

    # TODO: why is this here :|
    try:
        from dask_kubernetes import KubeCluster, make_pod_spec  # noqa: F401
        from dask.distributed import Client, default_client  # noqa: F401
        from kubernetes_asyncio import client
        import dask
    except ImportError as e:
        print(
            "missing dask or dask_kubernetes, please run "
            '"pip install dask distributed dask_kubernetes", %s',
            e,
        )
        raise e

    spec = function.spec
    meta = function.metadata
    spec.remote = True

    image = function.full_image_path() or "daskdev/dask:latest"
    env = spec.env
    namespace = meta.namespace or config.namespace
    if spec.extra_pip:
        env.append(spec.extra_pip)

    pod_labels = get_resource_labels(function, scrape_metrics=False)
    args = ["dask-worker", "--nthreads", str(spec.nthreads)]
    memory_limit = spec.resources.get("limits", {}).get("memory")
    if memory_limit:
        args.extend(["--memory-limit", str(memory_limit)])
    if spec.args:
        args.extend(spec.args)

    container = client.V1Container(
        name="base",
        image=image,
        env=env,
        args=args,
        image_pull_policy=spec.image_pull_policy,
        volume_mounts=spec.volume_mounts,
        resources=spec.resources,
    )

    pod_spec = client.V1PodSpec(
        containers=[container],
        restart_policy="Never",
        volumes=spec.volumes,
        service_account=spec.service_account,
    )
    if spec.image_pull_secret:
        pod_spec.image_pull_secrets = [
            client.V1LocalObjectReference(name=spec.image_pull_secret)
        ]

    pod = client.V1Pod(
        metadata=client.V1ObjectMeta(namespace=namespace, labels=pod_labels),
        # annotations=meta.annotation),
        spec=pod_spec,
    )

    svc_temp = dask.config.get("kubernetes.scheduler-service-template")
    if spec.service_type or spec.node_port:
        if spec.node_port:
            spec.service_type = "NodePort"
            svc_temp["spec"]["ports"][1]["nodePort"] = spec.node_port
        update_in(svc_temp, "spec.type", spec.service_type)

    norm_name = normalize_name(meta.name)
    dask.config.set(
        {
            "kubernetes.scheduler-service-template": svc_temp,
            "kubernetes.name": "mlrun-" + norm_name + "-{uuid}",
        }
    )

    cluster = KubeCluster(
        pod,
        deploy_mode="remote",
        namespace=namespace,
        scheduler_timeout=spec.scheduler_timeout,
    )

    logger.info(
        "cluster {} started at {}".format(cluster.name, cluster.scheduler_address)
    )

    function.status.scheduler_address = cluster.scheduler_address
    function.status.cluster_name = cluster.name
    if spec.service_type == "NodePort":
        ports = cluster.scheduler.service.spec.ports
        function.status.node_ports = {
            "scheduler": ports[0].node_port,
            "dashboard": ports[1].node_port,
        }

    if spec.replicas:
        cluster.scale(spec.replicas)
    else:
        cluster.adapt(minimum=spec.min_replicas, maximum=spec.max_replicas)

    return cluster
예제 #4
0
    async def run_inner(self, run_info):
        run_id = run_info['id']
        del run_info

        # This does not run the experiment, it schedules a runner pod by
        # talking to the Kubernetes API. That pod will run the experiment and
        # update the database directly

        k8s_config.load_incluster_config()

        name = self._pod_name(run_id)

        # Load configuration from configmap volume
        with open(os.path.join(self.config_dir, 'runner.pod_spec')) as fp:
            pod_spec = yaml.safe_load(fp)
        with open(os.path.join(self.config_dir, 'runner.namespace')) as fp:
            namespace = fp.read().strip()

        # Make required changes
        for container in pod_spec['containers']:
            if container['name'] == 'runner':
                container['args'] += [str(run_id)]

                # This is mostly used by Tilt
                if os.environ.get('OVERRIDE_RUNNER_IMAGE'):
                    container['image'] = os.environ['OVERRIDE_RUNNER_IMAGE']

        async with k8s_client.ApiClient() as api:
            # Create a Kubernetes pod to run
            v1 = k8s_client.CoreV1Api(api)
            pod = k8s_client.V1Pod(
                api_version='v1',
                kind='Pod',
                metadata=k8s_client.V1ObjectMeta(
                    name=name,
                    labels={
                        'app': 'run',
                        'run': str(run_id),
                    },
                ),
                spec=pod_spec,
            )
            await v1.create_namespaced_pod(
                namespace=namespace,
                body=pod,
            )
            logger.info("Pod created: %s", name)
            PROM_RUNS.inc()

            # Create a service for proxy connections
            svc = k8s_client.V1Service(
                api_version='v1',
                kind='Service',
                metadata=k8s_client.V1ObjectMeta(
                    name=name,
                    labels={
                        'app': 'run',
                        'run': str(run_id),
                    },
                ),
                spec=k8s_client.V1ServiceSpec(
                    selector={
                        'app': 'run',
                        'run': str(run_id),
                    },
                    ports=[
                        k8s_client.V1ServicePort(
                            protocol='TCP',
                            port=5597,
                        ),
                    ],
                ),
            )
            await v1.create_namespaced_service(
                namespace=namespace,
                body=svc,
            )
            logger.info("Service created: %s", name)
예제 #5
0
def make_pod_spec(
    image,
    labels={},
    threads_per_worker=1,
    env={},
    extra_container_config={},
    extra_pod_config={},
    memory_limit=None,
    memory_request=None,
    cpu_limit=None,
    cpu_request=None,
):
    """
    Create generic pod template from input parameters

    Examples
    --------
    >>> make_pod_spec(image='daskdev/dask:latest', memory_limit='4G', memory_request='4G')
    """
    args = [
        "dask-worker",
        "$(DASK_SCHEDULER_ADDRESS)",
        "--nthreads",
        str(threads_per_worker),
        "--death-timeout",
        "60",
    ]
    if memory_limit:
        args.extend(["--memory-limit", str(memory_limit)])
    pod = client.V1Pod(
        metadata=client.V1ObjectMeta(labels=labels),
        spec=client.V1PodSpec(
            restart_policy="Never",
            containers=[
                client.V1Container(
                    name="dask-worker",
                    image=image,
                    args=args,
                    env=[
                        client.V1EnvVar(name=k, value=v)
                        for k, v in env.items()
                    ],
                )
            ],
        ),
    )

    resources = client.V1ResourceRequirements(limits={}, requests={})

    if cpu_request:
        resources.requests["cpu"] = cpu_request
    if memory_request:
        resources.requests["memory"] = memory_request

    if cpu_limit:
        resources.limits["cpu"] = cpu_limit
    if memory_limit:
        resources.limits["memory"] = memory_limit

    pod.spec.containers[0].resources = resources

    for key, value in extra_container_config.items():
        _set_k8s_attribute(pod.spec.containers[0], key, value)

    for key, value in extra_pod_config.items():
        _set_k8s_attribute(pod.spec, key, value)
    return pod