Esempio n. 1
0
async def dask_spec_local_cluster(
    monkeypatch: MonkeyPatch,
    cluster_id_resource_name: str,
) -> AsyncIterable[SpecCluster]:
    # in this mode we can precisely create a specific cluster
    workers = {
        "cpu-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 2,
                "resources": {"CPU": 2, "RAM": 48e9, cluster_id_resource_name: 1},
            },
        },
        "gpu-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "CPU": 1,
                    "GPU": 1,
                    "RAM": 48e9,
                    cluster_id_resource_name: 1,
                },
            },
        },
        "mpi-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "CPU": 8,
                    "MPI": 1,
                    "RAM": 768e9,
                    cluster_id_resource_name: 1,
                },
            },
        },
        "gpu-mpi-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "GPU": 1,
                    "MPI": 1,
                    "RAM": 768e9,
                    cluster_id_resource_name: 1,
                },
            },
        },
    }
    scheduler = {"cls": Scheduler, "options": {"dashboard_address": ":8787"}}

    async with SpecCluster(
        workers=workers, scheduler=scheduler, asynchronous=True, name="pytest_cluster"
    ) as cluster:
        scheduler_address = URL(cluster.scheduler_address)
        monkeypatch.setenv("DASK_SCHEDULER_HOST", scheduler_address.host or "invalid")
        monkeypatch.setenv("DASK_SCHEDULER_PORT", f"{scheduler_address.port}")
        yield cluster
Esempio n. 2
0
async def test_changed_scheduler_raises_exception(
    dask_spec_local_cluster: SpecCluster,
    dask_client: DaskClient,
    user_id: UserID,
    project_id: ProjectID,
    cluster_id: ClusterID,
    cpu_image: ImageParams,
    mocked_node_ports: None,
    mocked_user_completed_cb: mock.AsyncMock,
):
    # change the scheduler (stop the current one and start another at the same address)
    scheduler_address = URL(dask_spec_local_cluster.scheduler_address)
    await dask_spec_local_cluster.close()  # type: ignore

    scheduler = {
        "cls": Scheduler,
        "options": {"dashboard_address": ":8787", "port": scheduler_address.port},
    }
    async with SpecCluster(
        scheduler=scheduler, asynchronous=True, name="pytest_cluster"
    ) as cluster:
        assert URL(cluster.scheduler_address) == scheduler_address

        # leave a bit of time to allow the client to reconnect automatically
        await asyncio.sleep(2)

        with pytest.raises(ComputationalSchedulerChangedError):
            await dask_client.send_computation_tasks(
                user_id=user_id,
                project_id=project_id,
                cluster_id=cluster_id,
                tasks=cpu_image.fake_tasks,
                callback=mocked_user_completed_cb,
                remote_fct=None,
            )
    mocked_user_completed_cb.assert_not_called()
Esempio n. 3
0
def SSHCluster(
    hosts: list[str] | None = None,
    connect_options: dict | list[dict] = {},
    worker_options: dict = {},
    scheduler_options: dict = {},
    worker_module: str = "deprecated",
    worker_class: str = "distributed.Nanny",
    remote_python: str | list[str] | None = None,
    **kwargs,
):
    """Deploy a Dask cluster using SSH

    The SSHCluster function deploys a Dask Scheduler and Workers for you on a
    set of machine addresses that you provide.  The first address will be used
    for the scheduler while the rest will be used for the workers (feel free to
    repeat the first hostname if you want to have the scheduler and worker
    co-habitate one machine.)

    You may configure the scheduler and workers by passing
    ``scheduler_options`` and ``worker_options`` dictionary keywords.  See the
    ``dask.distributed.Scheduler`` and ``dask.distributed.Worker`` classes for
    details on the available options, but the defaults should work in most
    situations.

    You may configure your use of SSH itself using the ``connect_options``
    keyword, which passes values to the ``asyncssh.connect`` function.  For
    more information on these see the documentation for the ``asyncssh``
    library https://asyncssh.readthedocs.io .

    Parameters
    ----------
    hosts : list[str]
        List of hostnames or addresses on which to launch our cluster.
        The first will be used for the scheduler and the rest for workers.
    connect_options : dict or list of dict, optional
        Keywords to pass through to :func:`asyncssh.connect`.
        This could include things such as ``port``, ``username``, ``password``
        or ``known_hosts``. See docs for :func:`asyncssh.connect` and
        :class:`asyncssh.SSHClientConnectionOptions` for full information.
        If a list it must have the same length as ``hosts``.
    worker_options : dict, optional
        Keywords to pass on to workers.
    scheduler_options : dict, optional
        Keywords to pass on to scheduler.
    worker_class: str
        The python class to use to create the worker(s).
    remote_python : str or list of str, optional
        Path to Python on remote nodes.

    Examples
    --------
    Create a cluster with one worker:

    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(["localhost", "localhost"])
    >>> client = Client(cluster)

    Create a cluster with three workers, each with two threads
    and host the dashdoard on port 8797:

    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(
    ...     ["localhost", "localhost", "localhost", "localhost"],
    ...     connect_options={"known_hosts": None},
    ...     worker_options={"nthreads": 2},
    ...     scheduler_options={"port": 0, "dashboard_address": ":8797"}
    ... )
    >>> client = Client(cluster)

    Create a cluster with two workers on each host:

    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(
    ...     ["localhost", "localhost", "localhost", "localhost"],
    ...     connect_options={"known_hosts": None},
    ...     worker_options={"nthreads": 2, "n_workers": 2},
    ...     scheduler_options={"port": 0, "dashboard_address": ":8797"}
    ... )
    >>> client = Client(cluster)

    An example using a different worker class, in particular the
    ``CUDAWorker`` from the ``dask-cuda`` project:

    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(
    ...     ["localhost", "hostwithgpus", "anothergpuhost"],
    ...     connect_options={"known_hosts": None},
    ...     scheduler_options={"port": 0, "dashboard_address": ":8797"},
    ...     worker_class="dask_cuda.CUDAWorker")
    >>> client = Client(cluster)

    See Also
    --------
    dask.distributed.Scheduler
    dask.distributed.Worker
    asyncssh.connect
    """
    if worker_module != "deprecated":
        raise ValueError(
            "worker_module has been deprecated in favor of worker_class. "
            "Please specify a Python class rather than a CLI module.")

    if set(kwargs) & old_cluster_kwargs:
        from distributed.deploy.old_ssh import SSHCluster as OldSSHCluster

        warnings.warn("Note that the SSHCluster API has been replaced.  "
                      "We're routing you to the older implementation.  "
                      "This will be removed in the future")
        kwargs.setdefault("worker_addrs", hosts)
        return OldSSHCluster(**kwargs)

    if not hosts:
        raise ValueError(
            f"`hosts` must be a non empty list, value {repr(hosts)!r} found.")
    if isinstance(connect_options,
                  list) and len(connect_options) != len(hosts):
        raise RuntimeError(
            "When specifying a list of connect_options you must provide a "
            "dictionary for each address.")

    if isinstance(remote_python, list) and len(remote_python) != len(hosts):
        raise RuntimeError(
            "When specifying a list of remote_python you must provide a "
            "path for each address.")

    scheduler = {
        "cls": Scheduler,
        "options": {
            "address":
            hosts[0],
            "connect_options":
            connect_options
            if isinstance(connect_options, dict) else connect_options[0],
            "kwargs":
            scheduler_options,
            "remote_python":
            remote_python[0]
            if isinstance(remote_python, list) else remote_python,
        },
    }
    workers = {
        i: {
            "cls": Worker,
            "options": {
                "address":
                host,
                "connect_options":
                connect_options if isinstance(connect_options, dict) else
                connect_options[i + 1],
                "kwargs":
                worker_options,
                "worker_class":
                worker_class,
                "remote_python":
                remote_python[i + 1]
                if isinstance(remote_python, list) else remote_python,
            },
        }
        for i, host in enumerate(hosts[1:])
    }
    return SpecCluster(workers, scheduler, name="SSHCluster", **kwargs)
Esempio n. 4
0
def SSHDockerCluster(hosts: List[str] = ['localhost', 'localhost'],
                     image: str = 'daskdev/dask:latest',
                     docker_login: bool = False,
                     connect_options: dict = {},
                     worker_module: str = "distributed.cli.dask_worker",
                     **kwargs):
    """ Deploy a Dask cluster using SSH

    Note that the helper binary, ``dask-ssh-docker``, is installed along with
    this package, and obviates the need to call this function directly.  Plus,
    it allows for user code which executes locally by default, but can leverage
    a cluster.

    The SSHCluster function deploys a Dask Scheduler and Workers for you on a
    set of machine addresses that you provide.  The first address will be used
    for the scheduler while the rest will be used for the workers (feel free to
    repeat the first hostname if you want to have the scheduler and worker
    co-habitate one machine.)

    You may configure the scheduler and workers by passing space-delimited
    arguments for ``dask-scheduler`` and ``dask-worker`` after the address
    in the ``hosts`` list.

    You may configure your use of SSH itself using the ``connect_options``
    keyword, which passes values to the ``asyncssh.connect`` function.  For
    more information on these see the documentation for the ``asyncssh``
    library https://asyncssh.readthedocs.io .

    Parameters
    ----------
    hosts: List[str]
        List of hostnames or addresses on which to launch our cluster
        The first will be used for the scheduler and the rest for workers

        Note that these are formatted as 'localhost [--scheduler-opt value]...'
        and 'localhost [--worker-opt value]' - that is, the address followed
        by a space-delimited list of arguments to send to dask-scheduler
        and dask-worker, respectively.
    docker_login:
        Some remote repositories, such as when image is specified as
        ``'docker.repository.com:port/imagename:tag'``, require authentication.
        If the ``docker_login`` flag is set, then the current user's
        ~/.docker/config.json will be parsed for the given repository and
        transferred to the remote machine so that it might pull down the
        image.

        This is disabled by default, because transferring auth without opting
        in to that behavior would be concerning.
    connect_options:
        Keywords to pass through to asyncssh.connect
        known_hosts: List[str] or None
            The list of keys which will be used to validate the server host
            key presented during the SSH handshake.  If this is not specified,
            the keys will be looked up in the file .ssh/known_hosts.  If this
            is explicitly set to None, server host key validation will be disabled.
    worker_module:
        Python module to call to start the worker

    Examples
    --------
    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(
    ...     ["localhost", "localhost", "localhost", "localhost"],
    ...     connect_options={"known_hosts": None},
    ...     scheduler_options={"port": 0, "dashboard_address": ":8797"}
    ... )
    >>> client = Client(cluster)

    An example using a different worker module, in particular the
    ``dask-cuda-worker`` command from the ``dask-cuda`` project.

    >>> from dask.distributed import Client, SSHCluster
    >>> cluster = SSHCluster(
    ...     ["localhost", "hostwithgpus", "anothergpuhost"],
    ...     connect_options={"known_hosts": None},
    ...     scheduler_options={"port": 0, "dashboard_address": ":8797"},
    ...     worker_module='dask_cuda.dask_cuda_worker')
    >>> client = Client(cluster)

    See Also
    --------
    dask.distributed.Scheduler
    dask.distributed.Worker
    asyncssh.connect
    """

    assert isinstance(hosts, list), hosts
    assert isinstance(image, str), image

    scheduler = {
        "cls": Scheduler,
        "options": {
            "address": hosts[0],
            "connect_options": connect_options,
            "image": image,
            "docker_login": docker_login,
            "kwargs": {},
        },
    }
    workers = {
        i: {
            "cls": Worker,
            "options": {
                "address": host,
                "connect_options": connect_options,
                "image": image,
                "docker_login": docker_login,
                "worker_module": worker_module,
                "kwargs": {},
            },
        }
        for i, host in enumerate(hosts[1:])
    }
    return SpecCluster(workers, scheduler, name="SSHCluster", **kwargs)
Esempio n. 5
0
async def dask_spec_local_cluster(
    monkeypatch: MonkeyPatch, ) -> AsyncIterable[SpecCluster]:
    # in this mode we can precisely create a specific cluster
    workers = {
        "cpu-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 2,
                "resources": {
                    "CPU": 2,
                    "RAM": 48e9
                },
            },
        },
        "gpu-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "CPU": 1,
                    "GPU": 1,
                    "RAM": 48e9,
                },
            },
        },
        "mpi-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "CPU": 8,
                    "MPI": 1,
                    "RAM": 768e9,
                },
            },
        },
        "gpu-mpi-worker": {
            "cls": Worker,
            "options": {
                "nthreads": 1,
                "resources": {
                    "GPU": 1,
                    "MPI": 1,
                    "RAM": 768e9,
                },
            },
        },
    }
    scheduler = {"cls": Scheduler, "options": {"dashboard_address": ":31545"}}

    async with SpecCluster(workers=workers,
                           scheduler=scheduler,
                           asynchronous=True,
                           name="pytest_cluster") as cluster:
        scheduler_address = URL(cluster.scheduler_address)
        monkeypatch.setenv("DASK_SCHEDULER_HOST", scheduler_address.host
                           or "invalid")
        monkeypatch.setenv("DASK_SCHEDULER_PORT", f"{scheduler_address.port}")
        yield cluster
    # force yielding to the event loop so that it properly closes the cluster
    await asyncio.sleep(0)