예제 #1
0
def create_cluster(**kwargs):
    workers = task['inputs'].get('workers', 0)
    cpu = task['inputs'].get('worker_cores', 2)
    memory = task['inputs'].get('worker_memory', 2)
    image = task['inputs'].get('worker_image', 'daskdev/dask:latest')

    resources = {
        'cpu': str(cpu),
        'memory': str(memory),
    }

    container = client.V1Container(
        name='dask',
        image=image,
        args=[
            'dask-worker',
            '--nthreads',
            str(cpu_to_threads(cpu)),
            '--no-bokeh',
            '--memory-limit',
            f'{memory}B',
            '--death-timeout',
            '60',
        ],
        resources=client.V1ResourceRequirements(
            limits=resources,
            requests=resources,
        ),
    )

    pod = client.V1Pod(
        metadata=client.V1ObjectMeta(labels={
            'cowait/task': 'worker-' + task.get('id'),
            'cowait/parent': task.get('id'),
        }, ),
        spec=client.V1PodSpec(
            restart_policy='Never',
            image_pull_secrets=[
                client.V1LocalObjectReference(name=secret)
                for secret in task['inputs'].get('pull_secrets', ['docker'])
            ],
            containers=[container],
        ),
    )

    return KubeCluster(
        pod_template=pod,
        n_workers=workers,
    )
예제 #2
0
def deploy_function(function: DaskCluster, secrets=None):
    try:
        from dask_kubernetes import KubeCluster, make_pod_spec
        from dask.distributed import Client, default_client
        from kubernetes_asyncio import client
        import dask
    except ImportError as e:
        print('missing dask or dask_kubernetes, please run '
              '"pip install dask distributed dask_kubernetes", %s', e)
        raise e

    spec = function.spec
    meta = function.metadata
    spec.remote = True

    image = function.full_image_path() or 'daskdev/dask:latest'
    env = spec.env
    namespace = meta.namespace or config.namespace
    if spec.extra_pip:
        env.append(spec.extra_pip)

    pod_labels = get_resource_labels(function)
    args = ['dask-worker', "--nthreads", str(spec.nthreads)]
    if spec.args:
        args += spec.args

    container = client.V1Container(name='base',
                                   image=image,
                                   env=env,
                                   args=args,
                                   image_pull_policy=spec.image_pull_policy,
                                   volume_mounts=spec.volume_mounts,
                                   resources=spec.resources)

    pod_spec = client.V1PodSpec(containers=[container],
                                restart_policy='Never',
                                volumes=spec.volumes,
                                service_account=spec.service_account)
    if spec.image_pull_secret:
        pod_spec.image_pull_secrets = [
            client.V1LocalObjectReference(name=spec.image_pull_secret)]

    pod = client.V1Pod(metadata=client.V1ObjectMeta(namespace=namespace,
                                                    labels=pod_labels),
                                                    #annotations=meta.annotation),
                       spec=pod_spec)

    svc_temp = dask.config.get("kubernetes.scheduler-service-template")
    if spec.service_type or spec.node_port:
        if spec.node_port:
            spec.service_type = 'NodePort'
            svc_temp['spec']['ports'][1]['nodePort'] = spec.node_port
        update_in(svc_temp, 'spec.type', spec.service_type)

    norm_name = normalize_name(meta.name)
    dask.config.set({"kubernetes.scheduler-service-template": svc_temp,
                     'kubernetes.name': 'mlrun-' + norm_name + '-{uuid}'})

    cluster = KubeCluster(
        pod, deploy_mode='remote',
        namespace=namespace,
        scheduler_timeout=spec.scheduler_timeout)

    logger.info('cluster {} started at {}'.format(
        cluster.name, cluster.scheduler_address
    ))

    function.status.scheduler_address = cluster.scheduler_address
    function.status.cluster_name = cluster.name
    if spec.service_type == 'NodePort':
        ports = cluster.scheduler.service.spec.ports
        function.status.node_ports = {'scheduler': ports[0].node_port,
                                      'dashboard': ports[1].node_port}

    if spec.replicas:
        cluster.scale(spec.replicas)
    else:
        cluster.adapt(minimum=spec.min_replicas,
                      maximum=spec.max_replicas)

    return cluster
예제 #3
0
def deploy_function(function: DaskCluster, secrets=None):

    # TODO: why is this here :|
    try:
        from dask_kubernetes import KubeCluster, make_pod_spec  # noqa: F401
        from dask.distributed import Client, default_client  # noqa: F401
        from kubernetes_asyncio import client
        import dask
    except ImportError as e:
        print(
            "missing dask or dask_kubernetes, please run "
            '"pip install dask distributed dask_kubernetes", %s',
            e,
        )
        raise e

    spec = function.spec
    meta = function.metadata
    spec.remote = True

    image = function.full_image_path() or "daskdev/dask:latest"
    env = spec.env
    namespace = meta.namespace or config.namespace
    if spec.extra_pip:
        env.append(spec.extra_pip)

    pod_labels = get_resource_labels(function, scrape_metrics=False)
    args = ["dask-worker", "--nthreads", str(spec.nthreads)]
    memory_limit = spec.resources.get("limits", {}).get("memory")
    if memory_limit:
        args.extend(["--memory-limit", str(memory_limit)])
    if spec.args:
        args.extend(spec.args)

    container = client.V1Container(
        name="base",
        image=image,
        env=env,
        args=args,
        image_pull_policy=spec.image_pull_policy,
        volume_mounts=spec.volume_mounts,
        resources=spec.resources,
    )

    pod_spec = client.V1PodSpec(
        containers=[container],
        restart_policy="Never",
        volumes=spec.volumes,
        service_account=spec.service_account,
    )
    if spec.image_pull_secret:
        pod_spec.image_pull_secrets = [
            client.V1LocalObjectReference(name=spec.image_pull_secret)
        ]

    pod = client.V1Pod(
        metadata=client.V1ObjectMeta(namespace=namespace, labels=pod_labels),
        # annotations=meta.annotation),
        spec=pod_spec,
    )

    svc_temp = dask.config.get("kubernetes.scheduler-service-template")
    if spec.service_type or spec.node_port:
        if spec.node_port:
            spec.service_type = "NodePort"
            svc_temp["spec"]["ports"][1]["nodePort"] = spec.node_port
        update_in(svc_temp, "spec.type", spec.service_type)

    norm_name = normalize_name(meta.name)
    dask.config.set(
        {
            "kubernetes.scheduler-service-template": svc_temp,
            "kubernetes.name": "mlrun-" + norm_name + "-{uuid}",
        }
    )

    cluster = KubeCluster(
        pod,
        deploy_mode="remote",
        namespace=namespace,
        scheduler_timeout=spec.scheduler_timeout,
    )

    logger.info(
        "cluster {} started at {}".format(cluster.name, cluster.scheduler_address)
    )

    function.status.scheduler_address = cluster.scheduler_address
    function.status.cluster_name = cluster.name
    if spec.service_type == "NodePort":
        ports = cluster.scheduler.service.spec.ports
        function.status.node_ports = {
            "scheduler": ports[0].node_port,
            "dashboard": ports[1].node_port,
        }

    if spec.replicas:
        cluster.scale(spec.replicas)
    else:
        cluster.adapt(minimum=spec.min_replicas, maximum=spec.max_replicas)

    return cluster