def dask_gateway_object(): """Connects to Dask Gateway cluster from outside the cluster.""" os.environ['JUPYTERHUB_API_TOKEN'] = get_jupyterhub_token() return dask_gateway.Gateway( address=f'https://{constants.QHUB_HOSTNAME}/{constants.GATEWAY_ENDPOINT}', auth='jupyterhub', proxy_address=f'tcp://{constants.QHUB_HOSTNAME}:8786' )
async def gateway(): addr = os.environ.get("TEST_DASK_GATEWAY_KUBE_ADDRESS", "http://localhost:8000") auth = dask_gateway.BasicAuth(username="******") async with dask_gateway.Gateway(address=addr, asynchronous=True, auth=auth) as gateway: for cluster in await gateway.list_clusters(): await gateway.stop_cluster(cluster.name) yield gateway for cluster in await gateway.list_clusters(): await gateway.stop_cluster(cluster.name)
async def _connect_with_gateway_and_create_cluster( endpoint: AnyUrl, auth_params: ClusterAuthentication ) -> DaskSubSystem: try: gateway_auth = await get_gateway_auth_from_params(auth_params) gateway = dask_gateway.Gateway( address=f"{endpoint}", auth=gateway_auth, asynchronous=True ) # if there is already a cluster that means we can re-connect to it, # and IT SHALL BE the first in the list cluster_reports_list = await gateway.list_clusters() cluster = None if cluster_reports_list: assert ( len(cluster_reports_list) == 1 ), "More than 1 cluster at this location, that is unexpected!!" # nosec cluster = await gateway.connect( cluster_reports_list[0].name, shutdown_on_close=False ) else: cluster = await gateway.new_cluster(shutdown_on_close=False) assert cluster # nosec logger.info("Cluster dashboard available: %s", cluster.dashboard_link) # NOTE: we scale to 1 worker as they are global await cluster.adapt(active=True) client = await cluster.get_client() # type: ignore assert client # nosec return DaskSubSystem( client=client, scheduler_id=client.scheduler_info()["id"], gateway=gateway, gateway_cluster=cluster, ) except (TypeError) as exc: raise ConfigurationError( f"Cluster has invalid configuration: {endpoint=}, {auth_params=}" ) from exc except (ValueError) as exc: # this is when a 404=NotFound,422=MalformedData comes up raise DaskClientRequestError(endpoint=endpoint, error=exc) from exc except (dask_gateway.GatewayClusterError) as exc: # this is when a 409=Conflict/Cannot complete request comes up raise DaskClusterError(endpoint=endpoint, error=exc) from exc except (dask_gateway.GatewayServerError) as exc: # this is when a 500 comes up raise DaskGatewayServerError(endpoint=endpoint, error=exc) from exc
async def test_gateway_endpoint( endpoint: AnyUrl, authentication: ClusterAuthentication ) -> None: """This method will try to connect to a gateway endpoint and raise a ConfigurationError in case of problem :raises ConfigurationError: contians some information as to why the connection failed """ try: gateway_auth = await get_gateway_auth_from_params(authentication) async with dask_gateway.Gateway( address=f"{endpoint}", auth=gateway_auth, asynchronous=True ) as gateway: # this does not yet create any connection to the underlying gateway. # since using a fct from dask gateway is going to timeout after a long time # we bypass the pinging by calling in ourselves with a short timeout async with httpx.AsyncClient( transport=httpx.AsyncHTTPTransport(retries=2) ) as client: # try to get something the api shall return fast response = await client.get( f"{endpoint}/api/version", timeout=_PING_TIMEOUT_S ) response.raise_for_status() # now we try to list the clusters to check the gateway responds in a sensible way await gateway.list_clusters() logger.debug("Pinging %s, succeeded", f"{endpoint=}") except ( dask_gateway.GatewayServerError, ClientConnectionError, ClientResponseError, httpx.HTTPError, ) as exc: logger.debug("Pinging %s, failed: %s", f"{endpoint=}", f"{exc=!r}") raise ConfigurationError( f"Could not connect to cluster in {endpoint}: error: {exc}" ) from exc
def _get_cluster_dask_gateway(**kwargs): """ Start dask.kubernetes cluster and dask.distributed client All arguments are optional. If not provided, defaults will be used. To view defaults, instantiate a :class:`dask_gateway.Gateway` object and call `gateway.cluster_options()`. Parameters ---------- name : str, optional Name of worker image to use (e.g. ``rhodium/worker:latest``). If ``None`` (default), default to worker specified in ``template_path``. tag : str, optional Tag of the worker image to use. Cannot be used in combination with ``name``, which should include a tag. If provided, overrides the tag of the image specified in ``template_path``. If ``None`` (default), the full image specified in ``name`` or ``template_path`` is used. extra_pip_packages : str, optional Extra pip packages to install on worker. Packages are installed using ``pip install extra_pip_packages``. profile : One of ["micro", "standard", "big", "giant"] Determines size of worker. CPUs assigned are slightly under 1, 2, 4, and 8, respectively. Memory assigned is slightly over 6, 12, 24, and 48 GB, respectively. cpus : float, optional Set the CPUs requested for your workers as defined by ``profile``. Will raise error if >7.5, because our 8-CPU nodes need ~.5 vCPU for kubernetes pods. (NOTE 12/15/20: This is currently set to 1 by default to allow for mapping big workflows across inputs, see https://github.com/dask/dask-gateway/issues/364). cred_name : str, optional Name of Google Cloud credentials file to use, equivalent to providing ``cred_path='/opt/gcsfuse_tokens/{}.json'.format(cred_name)``. May not use if ``cred_path`` is specified. cred_path : str, optional Path to Google Cloud credentials file to use. May not use if ``cred_name`` is specified. env_items : dict, optional A dictionary of env variable 'name'-'value' pairs to append to the env variables included in ``template_path``, e.g. .. code-block:: python { 'MY_ENV_VAR': 'some string', } extra_worker_labels : dict, optional Dictionary of kubernetes labels to apply to pods. None (default) results in no additional labels besides those in the template, as well as ``jupyter_user``, which is inferred from the ``JUPYTERHUB_USER``, or, if not set, the server's hostname. extra_pod_tolerations : list of dict, optional List of pod toleration dictionaries. For example, to match a node pool NoSchedule toleration, you might provide: .. code-block:: python extra_pod_tolerations=[ { "effect": "NoSchedule", "key": "k8s.dask.org_dedicated", "operator": "Equal", "value": "worker-highcpu" }, { "effect": "NoSchedule", "key": "k8s.dask.org/dedicated", "operator": "Equal", "value": "worker-highcpu" } ] keep_default_tolerations : bool, optional Whether to append (default) or replace the default tolerations. Ignored if ``extra_pod_tolerations`` is ``None`` or has length 0. Returns ------- client : object :py:class:`dask.distributed.Client` connected to cluster cluster : object Pre-configured :py:class:`dask_gateway.GatewayCluster` See Also -------- :py:func:`get_micro_cluster` : A cluster with one-CPU workers :py:func:`get_standard_cluster` : The default cluster specification :py:func:`get_big_cluster` : A cluster with workers twice the size of the default :py:func:`get_giant_cluster` : A cluster with workers four times the size of the default """ gateway = dask_gateway.Gateway() default_options = gateway.cluster_options() new_kwargs = kwargs.copy() if new_kwargs.get("cpus", 0) > 7.25: raise ValueError("Must specify ``cpus`` <= 7.25") # handle naming changes for k, v in kwargs.items(): if k == "name": new_kwargs["worker_image"] = kwargs["name"] del new_kwargs["name"] elif k == "cred_path": if "cred_name" not in kwargs.keys(): new_kwargs["cred_name"] = Path(v).stem del new_kwargs["cred_path"] elif k == "extra_pod_tolerations": if ( "keep_default_tolerations" in kwargs.keys() and kwargs["keep_default_tolerations"] == False ): base_tols = {} else: base_tols = default_options.worker_tolerations new_kwargs.pop("keep_default_tolerations", None) new_kwargs["worker_tolerations"] = { **base_tols, **{ f"user_{key}": val for key, val in enumerate(new_kwargs.pop("extra_pod_tolerations")) }, } elif k not in list(default_options.keys()) + ["tag"]: raise KeyError(f"{k} not allowed as a kwarg when using dask-gateway") if "worker_image" in new_kwargs and "tag" in new_kwargs: raise ValueError("provide either `name` or `tag`, not both") if "tag" in new_kwargs: img, _ = default_options.worker_image.split(":") new_kwargs["worker_image"] = ":".join((img, new_kwargs["tag"])) del new_kwargs["tag"] cluster = gateway.new_cluster(**new_kwargs) client = cluster.get_client() return client, cluster