Example #1
0
async def test_failing_worker_start(tmpdir, fail_stage):

    async with temp_gateway(
        cluster_manager_class=FailWorkerStartClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        gateway_proc.cluster_manager.fail_stage = fail_stage

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:
            cluster = await gateway.new_cluster()
            await cluster.scale(1)

            # Wait for worker failure
            timeout = 5
            while timeout > 0:
                if gateway_proc.cluster_manager.stop_worker_state is not None:
                    break
                await asyncio.sleep(0.1)
                timeout -= 0.1
            else:
                assert False, "Operation timed out"

            # Stop worker called with last reported state
            res = {} if fail_stage == 0 else {"i": fail_stage - 1}
            assert gateway_proc.cluster_manager.stop_worker_state == res

            # Stop the cluster
            await cluster.shutdown()
Example #2
0
async def test_slow_worker_connect(tmpdir):

    async with temp_gateway(
        cluster_manager_class=SlowWorkerStartClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        gateway_proc.cluster_manager.worker_connect_timeout = 0.1
        gateway_proc.cluster_manager.pause_time = 0

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:
            cluster = await gateway.new_cluster()
            await cluster.scale(1)

            # Wait for worker failure
            timeout = 5
            while timeout > 0:
                if gateway_proc.cluster_manager.stop_worker_state is not None:
                    break
                await asyncio.sleep(0.1)
                timeout -= 0.1
            else:
                assert False, "Operation timed out"

            # Stop worker called with last reported state
            assert gateway_proc.cluster_manager.stop_worker_state == {"i": 2}

            # Stop the cluster
            await cluster.shutdown()
Example #3
0
async def test_slow_cluster_connect(tmpdir):

    config = Config()
    config.DaskGateway.cluster_manager_class = SlowStartClusterManager
    config.DaskGateway.temp_dir = str(tmpdir)
    config.SlowStartClusterManager.cluster_start_timeout = 0.1
    config.SlowStartClusterManager.pause_time = 0

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:

            # Submission fails due to connect timeout
            cluster_id = await gateway.submit()
            with pytest.raises(GatewayClusterError) as exc:
                async with gateway.connect(cluster_id):
                    pass
            assert cluster_id in str(exc.value)

            cluster = gateway_proc.db.cluster_from_name(cluster_id)

            # Stop cluster called with last reported state
            res = cluster.manager.state_3
            assert cluster.manager.stop_cluster_state == res
Example #4
0
async def test_kerberos_auth(tmpdir):
    config = Config()
    config.DaskGateway.public_url = "http://master.example.com:0"
    config.DaskGateway.temp_dir = str(tmpdir)
    config.DaskGateway.authenticator_class = (
        "dask_gateway_server.auth.KerberosAuthenticator")
    config.KerberosAuthenticator.keytab = KEYTAB_PATH

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
                auth="kerberos",
        ) as gateway:

            kdestroy()

            with pytest.raises(Exception):
                await gateway.list_clusters()

            kinit()

            await gateway.list_clusters()

            kdestroy()
Example #5
0
async def test_cluster_fails_during_start(tmpdir, fail_stage):

    config = Config()
    config.DaskGateway.cluster_manager_class = ClusterFailsDuringStart
    config.DaskGateway.temp_dir = str(tmpdir)
    config.ClusterFailsDuringStart.fail_stage = fail_stage

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:

            # Submission fails due to error during start
            cluster_id = await gateway.submit()
            with pytest.raises(GatewayClusterError) as exc:
                async with gateway.connect(cluster_id):
                    pass
            assert cluster_id in str(exc.value)

            cluster_obj = gateway_proc.db.cluster_from_name(cluster_id)

            # Stop cluster called with last reported state
            res = {} if fail_stage == 0 else {"i": fail_stage - 1}
            assert cluster_obj.manager.stop_cluster_state == res
Example #6
0
async def test_cluster_fails_between_start_and_connect(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = ClusterFailsBetweenStartAndConnect
    config.DaskGateway.temp_dir = str(tmpdir)
    config.ClusterFailsBetweenStartAndConnect.cluster_status_period = 0.1

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:

            # Submit cluster
            cluster_id = await gateway.submit()

            cluster_obj = gateway_proc.db.cluster_from_name(cluster_id)

            # Connect and wait for start failure
            with pytest.raises(GatewayClusterError) as exc:
                await asyncio.wait_for(gateway.connect(cluster_id), 5)
            assert cluster_id in str(exc.value)
            assert "failed to start" in str(exc.value)

            assert cluster_obj.manager.status == "stopped"
Example #7
0
async def test_cluster_fails_after_connect(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = ClusterFailsAfterConnect
    config.DaskGateway.temp_dir = str(tmpdir)
    config.ClusterFailsAfterConnect.cluster_status_period = 0.25

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:

            # Cluster starts successfully
            cluster_id = await gateway.submit()

            cluster_obj = gateway_proc.db.cluster_from_name(cluster_id)

            async with gateway.connect(cluster_id):
                # Wait for cluster to fail while running
                await asyncio.wait_for(cluster_obj.manager.failed, 3)

                # Stop cluster called to cleanup after failure
                await asyncio.wait_for(cluster_obj.manager.stop_cluster_called,
                                       3)
Example #8
0
async def test_gateway_stop_clusters_on_shutdown(tmpdir):
    async with temp_gateway(
        cluster_manager_class=InProcessClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:
        async with Gateway(
            address=gateway_proc.public_urls.connect_url,
            proxy_address=gateway_proc.gateway_urls.connect_url,
            asynchronous=True,
        ) as gateway:

            cluster = await gateway.new_cluster()
            cluster2 = await gateway.new_cluster()
            await cluster2.shutdown()

            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)
            cluster_obj2 = gateway_proc.db.cluster_from_name(cluster2.name)

            # There is an active cluster
            assert cluster_obj.manager.scheduler is not None

    # Active clusters are stopped on shutdown
    for c in [cluster_obj, cluster_obj2]:
        assert c.manager.scheduler is None
        assert c.status >= ClusterStatus.STOPPED
Example #9
0
async def test_slow_worker_connect(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = SlowWorkerStartClusterManager
    config.DaskGateway.temp_dir = str(tmpdir)
    config.SlowWorkerStartClusterManager.worker_start_timeout = 0.1
    config.SlowWorkerStartClusterManager.pause_time = 0

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            cluster = await gateway.new_cluster()
            await cluster.scale(1)
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)

            # Wait for worker failure
            timeout = 5
            while timeout > 0:
                if cluster_obj.manager.stop_worker_state is not None:
                    break
                await asyncio.sleep(0.1)
                timeout -= 0.1
            else:
                assert False, "Operation timed out"

            # Stop worker called with last reported state
            assert cluster_obj.manager.stop_worker_state == {"i": 2}

            # Stop the cluster
            await cluster.shutdown()
Example #10
0
async def test_worker_fails_between_start_and_connect(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = WorkerFailsBetweenStartAndConnect
    config.DaskGateway.temp_dir = str(tmpdir)
    config.WorkerFailsBetweenStartAndConnect.worker_status_period = 0.1

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            cluster = await gateway.new_cluster()
            await cluster.scale(1)
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)

            # Wait for worker failure and stop_worker called
            timeout = 5
            while timeout > 0:
                if cluster_obj.manager.status == "stopped":
                    break
                await asyncio.sleep(0.1)
                timeout -= 0.1
            else:
                assert False, "Operation timed out"

            # Stop the cluster
            await cluster.shutdown()
Example #11
0
async def test_worker_fails_during_start(tmpdir, fail_stage):
    config = Config()
    config.DaskGateway.cluster_manager_class = WorkerFailsDuringStart
    config.DaskGateway.temp_dir = str(tmpdir)
    config.WorkerFailsDuringStart.fail_stage = fail_stage

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            cluster = await gateway.new_cluster()
            await cluster.scale(1)
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)

            # Wait for worker failure
            timeout = 5
            while timeout > 0:
                if cluster_obj.manager.stop_worker_state is not None:
                    break
                await asyncio.sleep(0.1)
                timeout -= 0.1
            else:
                assert False, "Operation timed out"

            # Stop worker called with last reported state
            res = {} if fail_stage == 0 else {"i": fail_stage - 1}
            assert cluster_obj.manager.stop_worker_state == res

            # Stop the cluster
            await cluster.shutdown()
Example #12
0
async def test_worker_fails_after_connect(tmpdir):
    async with temp_gateway(
        cluster_manager_class=WorkerFailsAfterConnect,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:
        async with Gateway(
            address=gateway_proc.public_urls.connect_url,
            proxy_address=gateway_proc.gateway_urls.connect_url,
            asynchronous=True,
        ) as gateway:

            cluster = await gateway.new_cluster()
            await cluster.scale(1)
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)

            # Wait for worker to connect
            await asyncio.wait_for(cluster_obj.manager.worker_connected, 30)

            # Close the worker
            worker = list(cluster_obj.manager.workers.values())[0]
            await worker.close(1)

            # Stop cluster called to cleanup after failure
            await asyncio.wait_for(cluster_obj.manager.stop_worker_called, 30)

            await cluster.shutdown()
Example #13
0
async def test_user_limits(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = InProcessClusterManager
    config.DaskGateway.temp_dir = str(tmpdir)
    config.UserLimits.max_clusters = 1
    config.UserLimits.max_cores = 3
    config.InProcessClusterManager.scheduler_cores = 1
    config.InProcessClusterManager.worker_cores = 2

    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            # Start a cluster
            cluster = await gateway.new_cluster()

            # Only one cluster allowed
            with pytest.raises(ValueError) as exc:
                await gateway.new_cluster()
            assert "user limit" in str(exc.value)

            # Scaling > 1 triggers a warning, only scales to 1
            with pytest.warns(GatewayWarning, match="user cores limit"):
                await cluster.scale(2)

            # Shutdown the cluster
            await cluster.shutdown()

            # Can create a new cluster after resources returned
            cluster = await gateway.new_cluster()
            await cluster.shutdown()
Example #14
0
 def gateway_client(self, **kwargs):
     defaults = {
         "address": self.address,
         "proxy_address": self.proxy_address,
         "asynchronous": True,
     }
     defaults.update(kwargs)
     return Gateway(**defaults)
Example #15
0
async def test_basic_auth(tmpdir):
    async with temp_gateway(temp_dir=str(tmpdir)) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
                auth="basic",
        ) as gateway:
            await gateway.list_clusters()
Example #16
0
async def test_basic_auth(tmpdir):
    async with temp_gateway(
            temp_dir=str(tmpdir.join("dask-gateway"))) as gateway_proc:

        async with Gateway(address=gateway_proc.public_url,
                           asynchronous=True,
                           auth="basic") as gateway:

            await gateway.list_clusters()
def create_app(test_config=None):
    app = Flask(__name__)

    gateway = Gateway(
        address="http://traefik-dask-gateway.jhub/services/dask-gateway/",
        public_address="https://sg.zonca.dev/services/dask-gateway/",
        auth="jupyterhub",
    )
    options = gateway.cluster_options()
    options["image"] = "zonca/dask-gateway-zarr:latest"
    cluster = gateway.new_cluster(options)
    cluster.scale(3)

    @app.route("/")
    def hello():

        return "Gateway up and running!\n"

    @app.route("/submit_job/<job_id>")
    def submit_job(job_id):
        import s3fs

        client = cluster.get_client()
        fs = s3fs.S3FileSystem(
            use_ssl=True,
            client_kwargs=dict(
                endpoint_url="https://js2.jetstream-cloud.org:8001/",
                region_name="RegionOne",
            ),
        )
        store = s3fs.S3Map(root=f"gateway-results/{job_id}",
                           s3=fs)  # , check=False)
        z = zarr.empty(shape=(1000, 1000),
                       chunks=(100, 100),
                       dtype="f4",
                       store=store,
                       compression=None)
        x = da.random.random(size=z.shape, chunks=z.chunks).astype(z.dtype)
        x.store(z, lock=False)

        return "Submitted job {}\n".format(job_id)

    return app
Example #18
0
async def test_jupyterhub_auth(tmpdir, monkeypatch):
    from jupyterhub.tests.utils import add_user

    gateway_address = "http://127.0.0.1:%d" % random_port()
    jhub_api_token = uuid.uuid4().hex
    jhub_bind_url = "http://127.0.0.1:%i/@/space%%20word/" % random_port()

    hub_config = Config()
    hub_config.JupyterHub.services = [{
        "name": "dask-gateway",
        "url": gateway_address,
        "api_token": jhub_api_token
    }]
    hub_config.JupyterHub.bind_url = jhub_bind_url

    class MockHub(hub_mocking.MockHub):
        def init_logging(self):
            pass

    hub = MockHub(config=hub_config)

    # Configure gateway
    config = Config()
    config.DaskGateway.public_url = gateway_address + "/services/dask-gateway/"
    config.DaskGateway.temp_dir = str(tmpdir)
    config.DaskGateway.authenticator_class = (
        "dask_gateway_server.auth.JupyterHubAuthenticator")
    config.JupyterHubAuthenticator.jupyterhub_api_token = jhub_api_token
    config.JupyterHubAuthenticator.jupyterhub_api_url = jhub_bind_url + "api/"

    async with temp_gateway(config=config) as gateway_proc:
        async with temp_hub(hub):
            # Create a new jupyterhub user alice, and get the api token
            u = add_user(hub.db, name="alice")
            api_token = u.new_api_token()
            hub.db.commit()

            # Configure auth with incorrect api token
            auth = JupyterHubAuth(api_token=uuid.uuid4().hex)

            async with Gateway(
                    address=gateway_proc.public_urls.connect_url,
                    proxy_address=gateway_proc.gateway_urls.connect_url,
                    asynchronous=True,
                    auth=auth,
            ) as gateway:

                # Auth fails with bad token
                with pytest.raises(Exception):
                    await gateway.list_clusters()

                # Auth works with correct token
                auth.api_token = api_token
                await gateway.list_clusters()
def init_cluster(n_workers=10):
    """ 
    Initialize a dask cluster
    
    Parameters
    ----------
    n_workers : int.
        Number of workers in the cluster. Default: 10.
        
    Returns
    -------
    cluster, client
    """
    gateway = Gateway(
        "http://web-public-l2lhub-prod-dask-gateway",
        proxy_address="tls://scheduler-public-l2lhub-prod-dask-gateway:8786")

    cluster = gateway.new_cluster(image=os.environ["JUPYTER_IMAGE_SPEC"])
    cluster.scale(n_workers)
    client = client = cluster.get_client()
    return cluster, client
Example #20
0
async def test_adaptive_scaling(tmpdir):
    # XXX: we should be able to use `InProcessClusterManager` here, but due to
    # https://github.com/dask/distributed/issues/3251 this results in periodic
    # failures.
    config = Config()
    config.DaskGateway.cluster_manager_class = LocalTestingClusterManager
    config.DaskGateway.temp_dir = str(tmpdir)
    config.LocalTestingClusterManager.adaptive_period = 0.25
    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            # Start a cluster
            cluster = await gateway.new_cluster()

            async def is_adaptive():
                report = await gateway.get_cluster(cluster.name)
                return report.adaptive

            # Not in adaptive mode
            assert not await is_adaptive()

            # Turn on adaptive scaling
            await cluster.adapt()

            # Now in adaptive mode
            assert await is_adaptive()

            # Worker is automatically requested
            with cluster.get_client(set_as_default=False) as client:
                res = await client.submit(lambda x: x + 1, 1)
                assert res == 2

            # Scales down automatically
            await wait_for_workers(cluster, exact=0)

            # Still in adaptive mode
            assert await is_adaptive()

            # Turn off adaptive scaling implicitly
            await cluster.scale(1)
            assert not await is_adaptive()

            # Turn off adaptive scaling explicitly
            await cluster.adapt()
            assert await is_adaptive()
            await cluster.adapt(active=False)
            assert not await is_adaptive()

            # Shutdown the cluster
            await cluster.shutdown()
Example #21
0
async def test_local_dask_gateway_server(
        loop: AbstractEventLoop, local_dask_gateway_server: DaskGatewayServer):
    async with Gateway(
            local_dask_gateway_server.address,
            local_dask_gateway_server.proxy_address,
            asynchronous=True,
            auth=auth.BasicAuth("pytest_user",
                                local_dask_gateway_server.password),
    ) as gateway:
        print(f"--> {gateway=} created")
        cluster_options = await gateway.cluster_options()
        gateway_versions = await gateway.get_versions()
        clusters_list = await gateway.list_clusters()
        print(f"--> {gateway_versions=}, {cluster_options=}, {clusters_list=}")
        for option in cluster_options.items():
            print(f"--> {option=}")

        async with gateway.new_cluster() as cluster:
            assert cluster
            print(
                f"--> created new cluster {cluster=}, {cluster.scheduler_info=}"
            )
            NUM_WORKERS = 10
            await cluster.scale(NUM_WORKERS)
            print(f"--> scaling cluster {cluster=} to {NUM_WORKERS} workers")
            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 10

            async with cluster.get_client() as client:
                print(f"--> created new client {client=}, submitting a job")
                res = await client.submit(lambda x: x + 1, 1)  # type: ignore
                assert res == 2

            print(f"--> scaling cluster {cluster=} back to 0")
            await cluster.scale(0)

            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 0
Example #22
0
async def test_cluster_manager_options(tmpdir):
    async with temp_gateway(
        cluster_manager_class=ClusterOptionsManager,
        cluster_manager_options=options.Options(
            options.Integer(
                "option_one", default=1, min=1, max=4, target="option_one_b"
            ),
            options.Select("option_two", options=[("small", 1.5), ("large", 15)]),
        ),
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:
        async with Gateway(
            address=gateway_proc.public_urls.connect_url,
            proxy_address=gateway_proc.gateway_urls.connect_url,
            asynchronous=True,
        ) as gateway:

            # Create with no parameters
            cluster = await gateway.new_cluster()
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)
            assert cluster_obj.manager.option_one_b == 1
            assert cluster_obj.manager.option_two == 1.5
            assert cluster_obj.options == {"option_one": 1, "option_two": "small"}
            await cluster.shutdown()

            # Create with parameters
            cluster = await gateway.new_cluster(option_two="large")
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)
            assert cluster_obj.manager.option_one_b == 1
            assert cluster_obj.manager.option_two == 15
            assert cluster_obj.options == {"option_one": 1, "option_two": "large"}
            await cluster.shutdown()

            # With options object
            opts = await gateway.cluster_options()
            opts.option_one = 2
            cluster = await gateway.new_cluster(opts, option_two="large")
            cluster_obj = gateway_proc.db.cluster_from_name(cluster.name)
            assert cluster_obj.manager.option_one_b == 2
            assert cluster_obj.manager.option_two == 15
            assert cluster_obj.options == {"option_one": 2, "option_two": "large"}
            await cluster.shutdown()

            # Bad parameters
            with pytest.raises(TypeError):
                await gateway.new_cluster(cluster_options=10)

            with pytest.raises(ValueError) as exc:
                await gateway.new_cluster(option_two="medium")
            assert "option_two" in str(exc.value)
async def dask_gateway(
    local_dask_gateway_server: DaskGatewayServer, ) -> Gateway:
    async with Gateway(
            local_dask_gateway_server.address,
            local_dask_gateway_server.proxy_address,
            asynchronous=True,
            auth=auth.BasicAuth("pytest_user",
                                local_dask_gateway_server.password),
    ) as gateway:
        print(f"--> {gateway=} created")
        cluster_options = await gateway.cluster_options()
        gateway_versions = await gateway.get_versions()
        clusters_list = await gateway.list_clusters()
        print(f"--> {gateway_versions=}, {cluster_options=}, {clusters_list=}")
        for option in cluster_options.items():
            print(f"--> {option=}")
        return gateway
Example #24
0
async def test_basic_auth_password(tmpdir):
    config = Config()
    config.DaskGateway.temp_dir = str(tmpdir.join("dask-gateway"))
    config.DaskGateway.authenticator_class = (
        "dask_gateway_server.auth.DummyAuthenticator")
    config.DummyAuthenticator.password = "******"

    async with temp_gateway(config=config) as gateway_proc:
        auth = BasicAuth()

        async with Gateway(address=gateway_proc.public_url,
                           asynchronous=True,
                           auth=auth) as gateway:

            with pytest.raises(Exception):
                await gateway.list_clusters()

            auth.password = "******"

            await gateway.list_clusters()
Example #25
0
async def test_scaling(tmpdir):
    config = Config()
    config.DaskGateway.cluster_manager_class = InProcessClusterManager
    config.DaskGateway.temp_dir = str(tmpdir)
    async with temp_gateway(config=config) as gateway_proc:
        async with Gateway(
                address=gateway_proc.public_urls.connect_url,
                proxy_address=gateway_proc.gateway_urls.connect_url,
                asynchronous=True,
        ) as gateway:
            # Start a cluster
            cluster = await gateway.new_cluster()

            await cluster.scale(5)
            await wait_for_workers(cluster, atleast=3)

            await cluster.scale(1)
            await wait_for_workers(cluster, exact=1)

            await cluster.shutdown()
Example #26
0
async def test_gateway_stop_clusters_on_shutdown(tmpdir):
    async with temp_gateway(
        cluster_manager_class=InProcessClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        manager = gateway_proc.cluster_manager

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:

            await gateway.new_cluster()
            cluster2 = await gateway.new_cluster()
            await cluster2.shutdown()

            # There are active clusters
            assert manager.active_schedulers

    # Active clusters are stopped on shutdown
    assert not manager.active_schedulers
Example #27
0
async def test_successful_cluster(tmpdir):
    async with temp_gateway(
        cluster_manager_class=InProcessClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:
        async with Gateway(
            address=gateway_proc.public_urls.connect_url,
            proxy_address=gateway_proc.gateway_urls.connect_url,
            asynchronous=True,
        ) as gateway:

            # No currently running clusters
            clusters = await gateway.list_clusters()
            assert clusters == []

            # Start a cluster
            cluster = await gateway.new_cluster()

            # Cluster is now present in list
            clusters = await gateway.list_clusters()
            assert len(clusters)
            assert clusters[0].name == cluster.name

            # Scale up, connect, and compute
            await cluster.scale(2)

            with cluster.get_client(set_as_default=False) as client:
                res = await client.submit(lambda x: x + 1, 1)
                assert res == 2

            # Scale down
            await cluster.scale(1)

            # Can still compute
            with cluster.get_client(set_as_default=False) as client:
                res = await client.submit(lambda x: x + 1, 1)
                assert res == 2

            # Shutdown the cluster
            await cluster.shutdown()
Example #28
0
async def test_failing_cluster_start(tmpdir, fail_stage):

    async with temp_gateway(
        cluster_manager_class=FailStartClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        gateway_proc.cluster_manager.fail_stage = fail_stage

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:

            # Submission fails due to error during start
            cluster_id = await gateway.submit()
            with pytest.raises(Exception) as exc:
                await gateway.connect(cluster_id)
            assert cluster_id in str(exc.value)

            # Stop cluster called with last reported state
            res = {} if fail_stage == 0 else {"i": fail_stage - 1}
            assert gateway_proc.cluster_manager.stop_cluster_state == res
Example #29
0
async def test_slow_cluster_start(tmpdir, start_timeout, state):

    async with temp_gateway(
        cluster_manager_class=SlowStartClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        gateway_proc.cluster_manager.cluster_start_timeout = start_timeout

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:

            # Submission fails due to start timeout
            cluster_id = await gateway.submit()
            with pytest.raises(Exception) as exc:
                await gateway.connect(cluster_id)
            assert cluster_id in str(exc.value)

            # Stop cluster called with last reported state
            res = getattr(gateway_proc.cluster_manager, state)
            assert gateway_proc.cluster_manager.stop_cluster_state == res
Example #30
0
async def test_successful_cluster(tmpdir):
    async with temp_gateway(
        cluster_manager_class=InProcessClusterManager,
        temp_dir=str(tmpdir.join("dask-gateway")),
    ) as gateway_proc:

        async with Gateway(
            address=gateway_proc.public_url, asynchronous=True
        ) as gateway:

            cluster = await gateway.new_cluster()
            await cluster.scale(2)

            with cluster.get_client(set_as_default=False) as client:
                res = await client.submit(lambda x: x + 1, 1)
                assert res == 2

            await cluster.scale(1)

            with cluster.get_client(set_as_default=False) as client:
                res = await client.submit(lambda x: x + 1, 1)
                assert res == 2

            await cluster.shutdown()