def create_cluster(**kwargs): workers = task['inputs'].get('workers', 0) cpu = task['inputs'].get('worker_cores', 2) memory = task['inputs'].get('worker_memory', 2) image = task['inputs'].get('worker_image', 'daskdev/dask:latest') resources = { 'cpu': str(cpu), 'memory': str(memory), } container = client.V1Container( name='dask', image=image, args=[ 'dask-worker', '--nthreads', str(cpu_to_threads(cpu)), '--no-bokeh', '--memory-limit', f'{memory}B', '--death-timeout', '60', ], resources=client.V1ResourceRequirements( limits=resources, requests=resources, ), ) pod = client.V1Pod( metadata=client.V1ObjectMeta(labels={ 'cowait/task': 'worker-' + task.get('id'), 'cowait/parent': task.get('id'), }, ), spec=client.V1PodSpec( restart_policy='Never', image_pull_secrets=[ client.V1LocalObjectReference(name=secret) for secret in task['inputs'].get('pull_secrets', ['docker']) ], containers=[container], ), ) return KubeCluster( pod_template=pod, n_workers=workers, )
def deploy_function(function: DaskCluster, secrets=None): try: from dask_kubernetes import KubeCluster, make_pod_spec from dask.distributed import Client, default_client from kubernetes_asyncio import client import dask except ImportError as e: print('missing dask or dask_kubernetes, please run ' '"pip install dask distributed dask_kubernetes", %s', e) raise e spec = function.spec meta = function.metadata spec.remote = True image = function.full_image_path() or 'daskdev/dask:latest' env = spec.env namespace = meta.namespace or config.namespace if spec.extra_pip: env.append(spec.extra_pip) pod_labels = get_resource_labels(function) args = ['dask-worker', "--nthreads", str(spec.nthreads)] if spec.args: args += spec.args container = client.V1Container(name='base', image=image, env=env, args=args, image_pull_policy=spec.image_pull_policy, volume_mounts=spec.volume_mounts, resources=spec.resources) pod_spec = client.V1PodSpec(containers=[container], restart_policy='Never', volumes=spec.volumes, service_account=spec.service_account) if spec.image_pull_secret: pod_spec.image_pull_secrets = [ client.V1LocalObjectReference(name=spec.image_pull_secret)] pod = client.V1Pod(metadata=client.V1ObjectMeta(namespace=namespace, labels=pod_labels), #annotations=meta.annotation), spec=pod_spec) svc_temp = dask.config.get("kubernetes.scheduler-service-template") if spec.service_type or spec.node_port: if spec.node_port: spec.service_type = 'NodePort' svc_temp['spec']['ports'][1]['nodePort'] = spec.node_port update_in(svc_temp, 'spec.type', spec.service_type) norm_name = normalize_name(meta.name) dask.config.set({"kubernetes.scheduler-service-template": svc_temp, 'kubernetes.name': 'mlrun-' + norm_name + '-{uuid}'}) cluster = KubeCluster( pod, deploy_mode='remote', namespace=namespace, scheduler_timeout=spec.scheduler_timeout) logger.info('cluster {} started at {}'.format( cluster.name, cluster.scheduler_address )) function.status.scheduler_address = cluster.scheduler_address function.status.cluster_name = cluster.name if spec.service_type == 'NodePort': ports = cluster.scheduler.service.spec.ports function.status.node_ports = {'scheduler': ports[0].node_port, 'dashboard': ports[1].node_port} if spec.replicas: cluster.scale(spec.replicas) else: cluster.adapt(minimum=spec.min_replicas, maximum=spec.max_replicas) return cluster
def deploy_function(function: DaskCluster, secrets=None): # TODO: why is this here :| try: from dask_kubernetes import KubeCluster, make_pod_spec # noqa: F401 from dask.distributed import Client, default_client # noqa: F401 from kubernetes_asyncio import client import dask except ImportError as e: print( "missing dask or dask_kubernetes, please run " '"pip install dask distributed dask_kubernetes", %s', e, ) raise e spec = function.spec meta = function.metadata spec.remote = True image = function.full_image_path() or "daskdev/dask:latest" env = spec.env namespace = meta.namespace or config.namespace if spec.extra_pip: env.append(spec.extra_pip) pod_labels = get_resource_labels(function, scrape_metrics=False) args = ["dask-worker", "--nthreads", str(spec.nthreads)] memory_limit = spec.resources.get("limits", {}).get("memory") if memory_limit: args.extend(["--memory-limit", str(memory_limit)]) if spec.args: args.extend(spec.args) container = client.V1Container( name="base", image=image, env=env, args=args, image_pull_policy=spec.image_pull_policy, volume_mounts=spec.volume_mounts, resources=spec.resources, ) pod_spec = client.V1PodSpec( containers=[container], restart_policy="Never", volumes=spec.volumes, service_account=spec.service_account, ) if spec.image_pull_secret: pod_spec.image_pull_secrets = [ client.V1LocalObjectReference(name=spec.image_pull_secret) ] pod = client.V1Pod( metadata=client.V1ObjectMeta(namespace=namespace, labels=pod_labels), # annotations=meta.annotation), spec=pod_spec, ) svc_temp = dask.config.get("kubernetes.scheduler-service-template") if spec.service_type or spec.node_port: if spec.node_port: spec.service_type = "NodePort" svc_temp["spec"]["ports"][1]["nodePort"] = spec.node_port update_in(svc_temp, "spec.type", spec.service_type) norm_name = normalize_name(meta.name) dask.config.set( { "kubernetes.scheduler-service-template": svc_temp, "kubernetes.name": "mlrun-" + norm_name + "-{uuid}", } ) cluster = KubeCluster( pod, deploy_mode="remote", namespace=namespace, scheduler_timeout=spec.scheduler_timeout, ) logger.info( "cluster {} started at {}".format(cluster.name, cluster.scheduler_address) ) function.status.scheduler_address = cluster.scheduler_address function.status.cluster_name = cluster.name if spec.service_type == "NodePort": ports = cluster.scheduler.service.spec.ports function.status.node_ports = { "scheduler": ports[0].node_port, "dashboard": ports[1].node_port, } if spec.replicas: cluster.scale(spec.replicas) else: cluster.adapt(minimum=spec.min_replicas, maximum=spec.max_replicas) return cluster