Python KubeCluster.from_dict Beispiele, dask_kubernetes.KubeCluster.from_dict Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_pod_from_minimal_dict(image_name, loop, ns):
    spec = {
        'spec': {
            'containers': [{
                'args': [
                    'dask-worker', '$(DASK_SCHEDULER_ADDRESS)', '--nthreads',
                    '1', '--death-timeout', '60'
                ],
                'command':
                None,
                'image':
                image_name,
                'imagePullPolicy':
                'IfNotPresent',
                'name':
                'worker'
            }]
        }
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.adapt()
        with Client(cluster) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11

Beispiel #2

0

Datei anzeigen

Datei: environment.py Projekt: quickpanda/prefect

    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        from prefect.engine import get_default_flow_runner_class
        from prefect.engine.executors import DaskExecutor
        from dask_kubernetes import KubeCluster

        with open(path.join(path.dirname(__file__),
                            "worker_pod.yaml")) as pod_file:
            worker_pod = yaml.safe_load(pod_file)
            worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(worker_pod)
            cluster.adapt(minimum=1, maximum=1)

            # Load serialized flow from file and run it with a DaskExecutor
            with open(
                    prefect.context.get("flow_file_path",
                                        "/root/.prefect/flow_env.prefect"),
                    "rb",
            ) as f:
                flow = cloudpickle.load(f)

                executor = DaskExecutor(address=cluster.scheduler_address)
                runner_cls = get_default_flow_runner_class()
                runner_cls(flow=flow).run(executor=executor)

Beispiel #3

0

Datei anzeigen

Datei: environment.py Projekt: weblearngit/prefect

    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.engine.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            with open(path.join(path.dirname(__file__),
                                "worker_pod.yaml")) as pod_file:
                worker_pod = yaml.safe_load(pod_file)
                worker_pod = self._populate_worker_pod_yaml(
                    yaml_obj=worker_pod)

                cluster = KubeCluster.from_dict(
                    worker_pod, namespace=prefect.context.get("namespace"))
                cluster.adapt(minimum=1, maximum=1)

                # Load serialized flow from file and run it with a DaskExecutor
                with open(
                        prefect.context.get("flow_file_path",
                                            "/root/.prefect/flow_env.prefect"),
                        "rb",
                ) as f:
                    flow = cloudpickle.load(f)

                    executor = DaskExecutor(address=cluster.scheduler_address)
                    runner_cls = get_default_flow_runner_class()
                    runner_cls(flow=flow).run(executor=executor)
                    sys.exit(0)  # attempt to force resource cleanup
        except Exception as exc:
            self.logger.error(
                "Unexpected error raised during flow run: {}".format(exc))
            raise exc

Beispiel #4

0

Datei anzeigen

def test_pod_from_minimal_dict(image_name, loop, ns):
    spec = {
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker",
                    "$(DASK_SCHEDULER_ADDRESS)",
                    "--nthreads",
                    "1",
                    "--death-timeout",
                    "60",
                ],
                "command":
                None,
                "image":
                image_name,
                "imagePullPolicy":
                "IfNotPresent",
                "name":
                "worker",
            }]
        }
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.adapt()
        with Client(cluster, loop=loop) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11

Beispiel #5

0

Datei anzeigen

Datei: test_core.py Projekt: stjordanis/dask-kubernetes

def test_pod_from_dict(image_name, loop, ns):
    spec = {
        'metadata': {},
        'restartPolicy': 'Never',
        'spec': {
            'containers': [{
                'args': ['dask-worker', '$(DASK_SCHEDULER_ADDRESS)',
                         '--nthreads', '1',
                         '--death-timeout', '60'],
                'command': None,
                'image': image_name,
                'imagePullPolicy': 'IfNotPresent',
                'name': 'dask-worker',
            }]
        }
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.scale(2)
        with Client(cluster) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11

            while len(cluster.scheduler.workers) < 2:
                sleep(0.1)

            # Ensure that inter-worker communication works well
            futures = client.map(lambda x: x + 1, range(10))
            total = client.submit(sum, futures)
            assert total.result() == sum(map(lambda x: x + 1, range(10)))
            assert all(client.has_what().values())

Beispiel #6

0

Datei anzeigen

Datei: test_async.py Projekt: yuvipanda/dask-kubernetes

async def test_pod_from_minimal_dict(image_name, ns, auth):
    spec = {
        "spec": {
            "containers": [
                {
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1",
                        "--death-timeout",
                        "60",
                    ],
                    "command": None,
                    "image": image_name,
                    "imagePullPolicy": "IfNotPresent",
                    "name": "worker",
                }
            ]
        }
    }

    async with KubeCluster.from_dict(
        spec, namespace=ns, auth=auth, **cluster_kwargs
    ) as cluster:
        cluster.adapt()
        async with Client(cluster, asynchronous=True) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = await future
            assert result == 11

Beispiel #7

0

Datei anzeigen

Datei: k8s.py Projekt: aleury/prefect

    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """

        # Call on_start callback if specified
        if self.on_start:
            self.on_start()

        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.engine.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            if self._worker_spec:
                worker_pod = self._worker_spec
                worker_pod = self._populate_worker_spec_yaml(
                    yaml_obj=worker_pod)
            else:
                with open(path.join(path.dirname(__file__),
                                    "worker_pod.yaml")) as pod_file:
                    worker_pod = yaml.safe_load(pod_file)
                    worker_pod = self._populate_worker_pod_yaml(
                        yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(
                worker_pod, namespace=prefect.context.get("namespace"))
            cluster.adapt(minimum=self.min_workers, maximum=self.max_workers)

            # Load serialized flow from file and run it with a DaskExecutor
            with open(
                    prefect.context.get("flow_file_path",
                                        "/root/.prefect/flow_env.prefect"),
                    "rb",
            ) as f:
                flow = cloudpickle.load(f)

                ## populate global secrets
                secrets = prefect.context.get("secrets", {})
                for secret in flow.storage.secrets:
                    secrets[secret] = prefect.tasks.secrets.PrefectSecret(
                        name=secret).run()

                with prefect.context(secrets=secrets):
                    executor = DaskExecutor(address=cluster.scheduler_address)
                    runner_cls = get_default_flow_runner_class()
                    runner_cls(flow=flow).run(executor=executor)
        except Exception as exc:
            self.logger.exception(
                "Unexpected error raised during flow run: {}".format(exc))
            raise exc
        finally:
            # Call on_exit callback if specified
            if self.on_exit:
                self.on_exit()

Beispiel #8

0

Datei anzeigen

def make_kube(pod_spec, **kws):
    """Create a dask_kubernetes.KubeCluster.

    pod_spec is either the name of a YAML file containg the worker pod
    specification or a dict containing the specification directly.
    kws is passed to KubeCluster.from_yaml or .from_dict.
    """
    from dask_kubernetes import KubeCluster
    if isistance(pod_spec, str):
        return KubeCluster.from_yaml(pod_spec, **kws)
    else:
        return KubeCluster.from_dict(pod_spec, **kws)

Beispiel #9

0

Datei anzeigen

Datei: test_async.py Projekt: quasiben/dask-kubernetes

async def test_pod_from_dict(image_name, ns, auth):
    spec = {
        "metadata": {},
        "restartPolicy": "Never",
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker",
                    "$(DASK_SCHEDULER_ADDRESS)",
                    "--nthreads",
                    "1",
                    "--death-timeout",
                    "60",
                ],
                "command":
                None,
                "image":
                image_name,
                "imagePullPolicy":
                "IfNotPresent",
                "name":
                "dask-worker",
            }]
        },
    }

    async with KubeCluster.from_dict(spec,
                                     namespace=ns,
                                     port=32000,
                                     auth=auth,
                                     **cluster_kwargs) as cluster:
        cluster.scale(2)
        await cluster
        assert "32000" in cluster.scheduler_address
        async with Client(cluster, asynchronous=True) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = await future
            assert result == 11

            while len(cluster.scheduler_info["workers"]) < 2:
                await asyncio.sleep(0.1)

            # Ensure that inter-worker communication works well
            futures = client.map(lambda x: x + 1, range(10))
            total = client.submit(sum, futures)
            assert (await total) == sum(map(lambda x: x + 1, range(10)))
            assert all((await client.has_what()).values())

Beispiel #10

0

Datei anzeigen

    def run(self, flow: "Flow") -> None:
        """
        Run the flow using a temporary dask-kubernetes cluster.

        Args:
            - flow (Flow): the flow to run.
        """
        # Call on_start callback if specified
        if self.on_start:
            self.on_start()

        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            if self._worker_spec:
                worker_pod = self._worker_spec
                worker_pod = self._populate_worker_spec_yaml(yaml_obj=worker_pod)
            else:
                with open(
                    path.join(path.dirname(__file__), "worker_pod.yaml")
                ) as pod_file:
                    worker_pod = yaml.safe_load(pod_file)
                    worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(
                worker_pod, namespace=prefect.context.get("namespace")
            )
            cluster.adapt(minimum=self.min_workers, maximum=self.max_workers)

            executor = DaskExecutor(address=cluster.scheduler_address)
            runner_cls = get_default_flow_runner_class()
            runner_cls(flow=flow).run(executor=executor)
        except Exception as exc:
            self.logger.exception(
                "Unexpected error raised during flow run: {}".format(exc)
            )
            raise exc
        finally:
            # Call on_exit callback if specified
            if self.on_exit:
                self.on_exit()

Beispiel #11

0

Datei anzeigen

def test_pod_from_dict(image_name, loop, ns):
    spec = {
        "metadata": {},
        "restartPolicy": "Never",
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker",
                    "$(DASK_SCHEDULER_ADDRESS)",
                    "--nthreads",
                    "1",
                    "--death-timeout",
                    "60",
                ],
                "command":
                None,
                "image":
                image_name,
                "imagePullPolicy":
                "IfNotPresent",
                "name":
                "dask-worker",
            }]
        },
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.scale(2)
        with Client(cluster, loop=loop) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11

            while len(cluster.scheduler_info["workers"]) < 2:
                sleep(0.1)

            # Ensure that inter-worker communication works well
            futures = client.map(lambda x: x + 1, range(10))
            total = client.submit(sum, futures)
            assert total.result() == sum(map(lambda x: x + 1, range(10)))
            assert all(client.has_what().values())

Beispiel #12

0

Datei anzeigen

Datei: dask_on_kubernetes.py Projekt: zorrock/prefect

    def run(
        self,
        environment_file_path: str = "/root/.prefect/flow_env.prefect"
    ) -> "prefect.engine.state.State":
        """
        Runs the `Flow` represented by this environment. This creates a dask scheduler
        with the ability to scale from a single worker to the provided `maximum_workers`.

        The .prefect flow that was stored in this image is deserialized and has its `run`
        method called with the `DaskExecutor` pointing to the dask scheduler present on this pod.

        Args:
            - environment_file_path (str, optional): File path to the Prefect environment file; this
            is generally a serialized LocalEnvironment

        Returns:
            - prefect.engine.state.State: the state of the flow run
        """
        from prefect.engine.executors import DaskExecutor
        from dask_kubernetes import KubeCluster

        with open(path.join(path.dirname(__file__),
                            "worker_pod.yaml")) as pod_file:
            worker_pod = yaml.safe_load(pod_file)
            worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(worker_pod)
            cluster.adapt(minimum=1, maximum=self.max_workers)

            schema = prefect.serialization.environment.EnvironmentSchema()
            with open(environment_file_path, "r") as f:
                environment = schema.load(json.load(f))

                return environment.run(
                    runner_kwargs={
                        "executor": DaskExecutor(
                            address=cluster.scheduler_address)
                    })

Beispiel #13

0

Datei anzeigen

Datei: k8s.py Projekt: Progressive-Insurance/prefect

            from prefect.engine.executors import DaskExecutor
>>>>>>> prefect clone
            from dask_kubernetes import KubeCluster

            if self._worker_spec:
                worker_pod = self._worker_spec
                worker_pod = self._populate_worker_spec_yaml(yaml_obj=worker_pod)
            else:
                with open(
                    path.join(path.dirname(__file__), "worker_pod.yaml")
                ) as pod_file:
                    worker_pod = yaml.safe_load(pod_file)
                    worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(
                worker_pod, namespace=prefect.context.get("namespace")
            )
            cluster.adapt(minimum=self.min_workers, maximum=self.max_workers)

            executor = DaskExecutor(address=cluster.scheduler_address)
            runner_cls = get_default_flow_runner_class()
            runner_cls(flow=flow).run(executor=executor)
        except Exception as exc:
            self.logger.exception(
                "Unexpected error raised during flow run: {}".format(exc)
            )
            raise exc
        finally:
            # Call on_exit callback if specified
            if self.on_exit:
                self.on_exit()

Beispiel #14

0

Datei anzeigen

Datei: kubernetes.py Projekt: minghao2016/BTB

def run_dask_function(config):
    """Start a Dask Cluster using dask-kubernetes and run a function.

    Talks to kubernetes to create `n` amount of new `pods` with a dask worker inside of each
    forming a `dask` cluster. Then, a function specified from `config` is being imported and
    run with the given arguments. The tasks created by this `function` are being run on the
    `dask` cluster for distributed computation.

    The config dict must contain the following sections:
        * run
        * dask_cluster
        * output

    Args:
        config (dict):
            Config dictionary.
    """
    output_conf = config.get('output')
    if output_conf:
        path = output_conf.get('path')
        if not path:
            raise ValueError(
                'An output path must be provided when providing `output`.')

    cluster_spec = _generate_cluster_spec(config, kubernetes=False)
    cluster = KubeCluster.from_dict(cluster_spec)

    workers = config['dask_cluster'].get('workers')

    if not workers:
        cluster.adapt()
    elif isinstance(workers, int):
        cluster.scale(workers)
    else:
        cluster.adapt(**workers)

    client = Client(cluster)
    client.get_versions(check=True)

    try:
        run = _import_function(config['run'])
        kwargs = config['run']['args']
        results = run(**kwargs)

    finally:
        client.close()
        cluster.close()

    if output_conf:
        bucket = output_conf.get('bucket')

        try:
            if bucket:
                aws_key = output_conf.get('key')
                aws_secret = output_conf.get('secret_key')
                _upload_to_s3(bucket, path, results, aws_key, aws_secret)
            else:
                os.makedirs(os.path.dirname(path), exist_ok=True)
                results.to_csv(path)

        except Exception:
            print('Error storing results. Falling back to console dump.')
            print(_df_to_csv_str(results))

    else:
        return results

Beispiel #15

0

Datei anzeigen

Datei: k8s.py Projekt: weiplanet/prefect

    def run_flow(self) -> None:
        """
        Run the flow using a Dask executor
        """
        # Call on_start callback if specified
        if self.on_start:
            self.on_start()

        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.engine.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            if self._worker_spec:
                worker_pod = self._worker_spec
                worker_pod = self._populate_worker_spec_yaml(
                    yaml_obj=worker_pod)
            else:
                with open(path.join(path.dirname(__file__),
                                    "worker_pod.yaml")) as pod_file:
                    worker_pod = yaml.safe_load(pod_file)
                    worker_pod = self._populate_worker_pod_yaml(
                        yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(
                worker_pod, namespace=prefect.context.get("namespace"))
            cluster.adapt(minimum=self.min_workers, maximum=self.max_workers)

            flow_run_id = prefect.context.get("flow_run_id")

            if not flow_run_id:
                raise ValueError("No flow run ID found in context.")

            query = {
                "query": {
                    with_args("flow_run", {
                        "where": {
                            "id": {
                                "_eq": flow_run_id
                            }
                        }
                    }): {
                        "flow": {
                            "name": True,
                            "storage": True,
                        },
                    }
                }
            }

            client = Client()
            result = client.graphql(query)
            flow_run = result.data.flow_run[0]

            flow_data = flow_run.flow
            storage_schema = prefect.serialization.storage.StorageSchema()
            storage = storage_schema.load(flow_data.storage)

            ## populate global secrets
            secrets = prefect.context.get("secrets", {})
            for secret in storage.secrets:
                secrets[secret] = prefect.tasks.secrets.PrefectSecret(
                    name=secret).run()

            with prefect.context(secrets=secrets):
                flow = storage.get_flow(storage.flows[flow_data.name])
                executor = DaskExecutor(address=cluster.scheduler_address)
                runner_cls = get_default_flow_runner_class()
                runner_cls(flow=flow).run(executor=executor)
        except Exception as exc:
            self.logger.exception(
                "Unexpected error raised during flow run: {}".format(exc))
            raise exc
        finally:
            # Call on_exit callback if specified
            if self.on_exit:
                self.on_exit()

Beispiel #16

0

Datei anzeigen

Datei: kubernetes.py Projekt: dgergel/rhg_compute_tools

def get_cluster(name=None,
                extra_pip_packages=None,
                extra_conda_packages=None,
                memory_gb=None,
                nthreads=None,
                cpus=None,
                cred_name=None,
                cred_path=None,
                env_items=None,
                scaling_factor=1,
                dask_config_dict={},
                template_path='~/worker-template.yml',
                **kwargs):
    """
    Start dask.kubernetes cluster and dask.distributed client

    All arguments are optional. If not provided, arguments will default to
    values provided in ``template_path``.

    Parameters
    ----------
    name : str, optional
        Name of worker image to use. If None, default to worker specified in
        ``template_path``.
    extra_pip_packages : str, optional
        Extra pip packages to install on worker. Packages are installed
        using ``pip install extra_pip_packages``.
    extra_conda_packages :str, optional
        Extra conda packages to install on worker. Default channel is
        ``conda-forge``. Packages are installed using
        ``conda install -y -c conda-forge ${EXTRA_CONDA_PACKAGES}``.
    memory_gb : float, optional
        Memory to assign per 'group of workers', where a group consists of
        nthreads independent workers.
    nthreads : int, optional
        Number of independent threads per group of workers. Not sure if this
        should ever be set to something other than 1.
    cpus : float, optional
        Number of virtual CPUs to assign per 'group of workers'
    cred_name : str, optional
        Name of Google Cloud credentials file to use, equivalent to providing
        ``cred_path='/opt/gcsfuse_tokens/{}.json'.format(cred_name)``
    cred_path : str, optional
        Path to Google Cloud credentials file to use.
    env_items : list of dict, optional
        A list of env variable 'name'-'value' pairs to append to the env
        variables included in ``template_path``, e.g.

        .. code-block:: python

            [{
                'name': 'GOOGLE_APPLICATION_CREDENTIALS',
                'value': '/opt/gcsfuse_tokens/rhg-data.json'}])

    scaling_factor: float, optional
        scale the worker memory & CPU size using a constant multiplier of the
        specified worker. No constraints in terms of performance or cluster
        size are enforced - if you request too little the dask worker will not
        perform; if you request too much you may see an ``InsufficientMemory``
        or ``InsufficientCPU`` error on the google cloud Kubernetes console.
        Recommended scaling factors given our default ``~/worker-template.yml``
        specs are [0.5, 1, 2, 4].
    dask_config_dict: dict, optional
        Dask config parameters to modify from their defaults. A '.' is used
        to access progressive levels of the yaml structure. For instance, the
        dict could look like {'distributed.worker.profile.interval':'100ms'}
    template_path : str, optional
        Path to worker template file. Default ``~/worker-template.yml``.

    Returns
    -------
    client : object
        :py:class:`dask.distributed.Client` connected to cluster
    cluster : object
        Pre-configured :py:class:`dask_kubernetes.KubeCluster`


    See Also
    --------
    :py:func:`get_micro_cluster` :
        A cluster with one-CPU workers
    :py:func:`get_standard_cluster` :
        The default cluster specification
    :py:func:`get_big_cluster` :
        A cluster with workers twice the size of the default
    :py:func:`get_giant_cluster` :
        A cluster with workers four times the size of the default

    """

    # update dask settings
    dask.config.set(dask_config_dict)

    template_path = os.path.expanduser(template_path)

    with open(template_path, 'r') as f:
        template = yml.load(f, Loader=yml.SafeLoader)

    container = template['spec']['containers'][0]

    # replace the defualt image with the new one
    if name is not None:
        container['image'] = name

    if extra_pip_packages is not None:
        container['env'].append({
            'name': 'EXTRA_PIP_PACKAGES',
            'value': extra_pip_packages
        })

    if extra_conda_packages is not None:
        container['env'].append({
            'name': 'EXTRA_CONDA_PACKAGES',
            'value': extra_conda_packages
        })

    if cred_path is not None:
        # can remove this first env var once
        # worker docker image is updated to point to
        # 'GOOGLE_APPLCIATION_CREDENTIALS'
        container['env'].append({
            'name': 'GCLOUD_DEFAULT_TOKEN_FILE',
            'value': cred_path
        })
        container['env'].append({
            'name': 'GOOGLE_APPLICATION_CREDENTIALS',
            'value': cred_path
        })

    elif cred_name is not None:
        # can remove this first env var once
        # worker docker image is updated to point to
        # 'GOOGLE_APPLCIATION_CREDENTIALS'
        container['env'].append({
            'name':
            'GCLOUD_DEFAULT_TOKEN_FILE',
            'value':
            '/opt/gcsfuse_tokens/{}.json'.format(cred_name)
        })
        container['env'].append({
            'name':
            'GOOGLE_APPLICATION_CREDENTIALS',
            'value':
            '/opt/gcsfuse_tokens/{}.json'.format(cred_name)
        })

    if env_items is not None:
        container['env'] = container['env'] + env_items

    # adjust worker creation args
    args = container['args']

    # set nthreads if provided
    nthreads_ix = args.index('--nthreads') + 1

    if nthreads is not None:
        args[nthreads_ix] = str(nthreads)

    # then in resources
    resources = container['resources']
    limits = resources['limits']
    requests = resources['requests']

    msg = '{} limits and requests do not match'

    if memory_gb is None:
        memory_gb = float(limits['memory'].strip('G'))
        mem_request = float(requests['memory'].strip('G'))
        assert memory_gb == mem_request, msg.format('memory')

    if cpus is None:
        cpus = float(limits['cpu'])
        cpu_request = float(requests['cpu'])
        assert cpus == cpu_request, msg.format('cpu')

    format_request = lambda x: '{:04.2f}'.format(np.floor(x * 100) / 100)

    # set memory-limit if provided
    mem_ix = args.index('--memory-limit') + 1
    args[mem_ix] = (format_request(float(memory_gb) * scaling_factor) + 'GB')

    limits['memory'] = (format_request(float(memory_gb) * scaling_factor) +
                        'G')

    requests['memory'] = (format_request(float(memory_gb) * scaling_factor) +
                          'G')

    limits['cpu'] = format_request(float(cpus) * scaling_factor)
    requests['cpu'] = format_request(float(cpus) * scaling_factor)

    # start cluster and client and return
    cluster = KubeCluster.from_dict(template)

    client = dd.Client(cluster)

    return client, cluster

Beispiel #17

0

Datei anzeigen

Datei: kubernetes.py Projekt: RhodiumGroup/rhg_compute_tools

def _get_cluster_dask_kubernetes(
    name=None,
    tag=None,
    extra_pip_packages=None,
    extra_conda_packages=None,
    memory_gb=None,
    nthreads=None,
    cpus=None,
    cred_name=None,
    cred_path=None,
    env_items=None,
    scaling_factor=1,
    dask_config_dict={},
    deploy_mode="local",
    idle_timeout=None,
    template_path="~/worker-template.yml",
    extra_worker_labels=None,
    extra_pod_tolerations=None,
    keep_default_tolerations=True,
    **kwargs,
):
    """

    **DEPRECATED (12/15/2020) **: Since we no longer maintain clusters using
    dask-kubernetes schedulers. Only dask-gateway is now supported.

    Start dask.kubernetes cluster and dask.distributed client

    All arguments are optional. If not provided, arguments will default to
    values provided in ``template_path``.

    Parameters
    ----------
    name : str, optional
        Name of worker image to use (e.g. ``rhodium/worker:latest``). If ``None``
        (default), default to worker specified in ``template_path``.
    tag : str, optional
        Tag of the worker image to use. Cannot be used in combination with
        ``name``, which should include a tag. If provided, overrides the
        tag of the image specified in ``template_path``. If ``None``
        (default), the full image specified in ``name`` or ``template_path``
        is used.
    extra_pip_packages : str, optional
        Extra pip packages to install on worker. Packages are installed
        using ``pip install extra_pip_packages``.
    extra_conda_packages :str, optional
        Extra conda packages to install on worker. Default channel is
        ``conda-forge``. Packages are installed using
        ``conda install -y -c conda-forge ${EXTRA_CONDA_PACKAGES}``.
    memory_gb : float, optional
        Memory to assign per 'group of workers', where a group consists of
        nthreads independent workers.
    nthreads : int, optional
        Number of independent threads per group of workers. Not sure if this
        should ever be set to something other than 1.
    cpus : float, optional
        Number of virtual CPUs to assign per 'group of workers'
    cred_name : str, optional
        Name of Google Cloud credentials file to use, equivalent to providing
        ``cred_path='/opt/gcsfuse_tokens/{}.json'.format(cred_name)``
    cred_path : str, optional
        Path to Google Cloud credentials file to use.
    env_items : dict, optional
        A dictionary of env variable 'name'-'value' pairs to append to the env
        variables included in ``template_path``, e.g.

        .. code-block:: python

            {
                'GOOGLE_APPLICATION_CREDENTIALS': '/opt/gcsfuse_tokens/rhg-data.json',
            }

    scaling_factor : float, optional
        scale the worker memory & CPU size using a constant multiplier of the
        specified worker. No constraints in terms of performance or cluster
        size are enforced - if you request too little the dask worker will not
        perform; if you request too much you may see an ``InsufficientMemory``
        or ``InsufficientCPU`` error on the google cloud Kubernetes console.
        Recommended scaling factors given our default ``~/worker-template.yml``
        specs are [0.5, 1, 2, 4].
    dask_config_dict : dict, optional
        Dask config parameters to modify from their defaults. A '.' is used
        to access progressive levels of the yaml structure. For instance, the
        dict could look like ``{'distributed.worker.profile.interval': '100ms'}``
    deploy_mode : str, optional
        Where to deploy the scheduler (on the same pod or a different pod)
    idle_timeout : str, optional
        Number of seconds without active communication with the client before the
        remote scheduler shuts down (ignored if ``deploy_mode=='local'``).
        Default is to not shut down for this reason.
    template_path : str, optional
        Path to worker template file. Default ``~/worker-template.yml``.
    extra_worker_labels : dict, optional
        Dictionary of kubernetes labels to apply to pods. None (default) results
        in no additional labels besides those in the template, as well as
        ``jupyter_user``, which is inferred from the ``JUPYTERHUB_USER``, or, if
        not set, the server's hostname.
    extra_pod_tolerations : list of dict, optional
        List of pod toleration dictionaries. For example, to match a node pool
        NoSchedule toleration, you might provide:

        .. code-block:: python

            extra_pod_tolerations=[
                {
                    "effect": "NoSchedule",
                    "key": "k8s.dask.org_dedicated",
                    "operator": "Equal",
                    "value": "worker-highcpu"
                },
                {
                    "effect": "NoSchedule",
                    "key": "k8s.dask.org/dedicated",
                    "operator": "Equal",
                    "value": "worker-highcpu"
                }
            ]

    keep_default_tolerations : bool, optional
        Whether to append (default) or replace the default tolerations. Ignored if
        ``extra_pod_tolerations`` is ``None`` or has length 0.

    Returns
    -------
    client : object
        :py:class:`dask.distributed.Client` connected to cluster
    cluster : object
        Pre-configured :py:class:`dask_kubernetes.KubeCluster`


    See Also
    --------
    :py:func:`get_micro_cluster` :
        A cluster with one-CPU workers
    :py:func:`get_standard_cluster` :
        The default cluster specification
    :py:func:`get_big_cluster` :
        A cluster with workers twice the size of the default
    :py:func:`get_giant_cluster` :
        A cluster with workers four times the size of the default

    """

    if (name is not None) and (tag is not None):
        raise ValueError("provide either `name` or `tag`, not both")

    # update dask settings
    dask.config.set(dask_config_dict)

    template_path = os.path.expanduser(template_path)

    with open(template_path, "r") as f:
        template = yml.load(f, Loader=yml.SafeLoader)

    # update labels with default and user-provided labels
    if ("metadata" not in template) or (template.get("metadata", {}) is None):
        template["metadata"] = {}

    if ("labels" not in template["metadata"]) or (
        template["metadata"]["labels"] is None
    ):
        template["metadata"]["labels"] = {}

    labels = template["metadata"]["labels"]

    if extra_worker_labels is not None:
        labels.update(extra_worker_labels)

    labels.update(
        {"jupyter_user": os.environ.get("JUPYTERHUB_USER", socket.gethostname())}
    )

    template["metadata"]["labels"] = labels

    if "tolerations" not in template["spec"]:
        template["spec"]["tolerations"] = []

    if (extra_pod_tolerations is not None) and (len(extra_pod_tolerations) > 0):
        if keep_default_tolerations:
            template["spec"]["tolerations"].extend(extra_pod_tolerations)
        else:
            template["spec"]["tolerations"] = extra_pod_tolerations

    container = template["spec"]["containers"][0]

    # replace the defualt image with the new one
    if name is not None:
        container["image"] = name
    if tag is not None:
        img, _ = container["image"].split(":")
        container["image"] = ":".join(img, tag)

    if extra_pip_packages is not None:
        container["env"].append(
            {"name": "EXTRA_PIP_PACKAGES", "value": extra_pip_packages}
        )

    if extra_conda_packages is not None:
        container["env"].append(
            {"name": "EXTRA_CONDA_PACKAGES", "value": extra_conda_packages}
        )

    if cred_path is not None:
        container["env"].append(
            {"name": "GOOGLE_APPLICATION_CREDENTIALS", "value": cred_path}
        )

    elif cred_name is not None:
        container["env"].append(
            {
                "name": "GOOGLE_APPLICATION_CREDENTIALS",
                "value": "/opt/gcsfuse_tokens/{}.json".format(cred_name),
            }
        )

    if env_items is not None:
        if isinstance(env_items, dict):
            [
                container["env"].append({"name": k, "value": v})
                for k, v in env_items.items()
            ]
        # allow deprecated passing of list of name/value pairs
        elif isinstance(env_items, Sequence):
            warnings.warn(
                "Passing of list of name/value pairs deprecated. "
                "Please pass a dictionary instead."
            )
            container["env"] = container["env"] + env_items
        else:
            raise ValueError("Expected `env_items` of type dict or sequence.")

    # adjust worker creation args
    args = container["args"]

    # set nthreads if provided
    nthreads_ix = args.index("--nthreads") + 1
    if nthreads is not None:
        args[nthreads_ix] = str(nthreads)
    nthreads = int(args[nthreads_ix])

    # then in resources
    resources = container["resources"]
    limits = resources["limits"]
    requests = resources["requests"]

    msg = "{} limits and requests do not match"

    if memory_gb is None:
        memory_gb = float(limits["memory"].strip("G"))
        mem_request = float(requests["memory"].strip("G"))
        assert memory_gb == mem_request, msg.format("memory")

    if cpus is None:
        cpus = float(limits["cpu"])
        cpu_request = float(requests["cpu"])
        assert cpus == cpu_request, msg.format("cpu")

    # now properly set the threads accessible by multi-threaded libraries
    # so that there's no competition between dask threads and the threads of
    # these libraries
    cpus_rounded = np.round(cpus)
    lib_threads = int(cpus_rounded / nthreads)
    for lib in ["OMP_NUM_THREADS", "MKL_NUM_THREADS", "OPENBLAS_NUM_THREADS"]:
        container["env"].append({"name": lib, "value": lib_threads})

    def format_request(x):
        return "{:04.2f}".format(np.floor(x * 100) / 100)

    # set memory-limit if provided
    mem_ix = args.index("--memory-limit") + 1
    args[mem_ix] = format_request(float(memory_gb) * scaling_factor) + "GB"

    limits["memory"] = format_request(float(memory_gb) * scaling_factor) + "G"

    requests["memory"] = format_request(float(memory_gb) * scaling_factor) + "G"

    limits["cpu"] = format_request(float(cpus) * scaling_factor)
    requests["cpu"] = format_request(float(cpus) * scaling_factor)

    # start cluster and client and return
    # need more time to connect to remote scheduler
    if deploy_mode == "remote":
        dask.config.set({"kubernetes.idle-timeout": idle_timeout})
    cluster = KubeCluster.from_dict(
        template, deploy_mode=deploy_mode, idle_timeout=None
    )

    client = dd.Client(cluster)

    return client, cluster