コード例 #1
0
ファイル: test_local.py プロジェクト: chagge/distributed
def test_scale_up_and_down():
    loop = IOLoop.current()
    cluster = LocalCluster(0, scheduler_port=0, processes=False, silence_logs=False,
                           diagnostics_port=None, loop=loop, start=False)
    c = Client(cluster, start=False, loop=loop)
    yield c._start()

    assert not cluster.workers

    yield cluster.scale_up(2)
    assert len(cluster.workers) == 2
    assert len(cluster.scheduler.ncores) == 2

    addr = cluster.workers[0].address
    yield cluster.scale_down([addr])

    assert len(cluster.workers) == 1
    assert addr not in cluster.scheduler.ncores

    yield c._shutdown()
    yield cluster._close()
コード例 #2
0
def test_local_tls_restart(loop):
    from distributed.utils_test import tls_only_security

    security = tls_only_security()
    with LocalCluster(
            n_workers=1,
            scheduler_port=8786,
            silence_logs=False,
            security=security,
            dashboard_address=False,
            host="tls://0.0.0.0",
            loop=loop,
    ) as c:
        with Client(c.scheduler.address, loop=loop,
                    security=security) as client:
            workers_before = set(client.scheduler_info()["workers"])
            assert client.submit(inc, 1).result() == 2
            client.restart()
            workers_after = set(client.scheduler_info()["workers"])
            assert client.submit(inc, 2).result() == 3
            assert workers_before != workers_after
コード例 #3
0
ファイル: test_local.py プロジェクト: dailu/distributed
def test_bokeh(loop):
    pytest.importorskip('bokeh')
    from distributed.http import HTTPScheduler
    import requests
    with LocalCluster(scheduler_port=0, silence_logs=False, loop=loop,
            diagnostics_port=4724, services={('http', 0): HTTPScheduler},
            ) as c:
        start = time()
        while True:
            with ignoring(Exception):
                response = requests.get('http://127.0.0.1:%d/status/' %
                                        c.diagnostics.port)
                if response.ok:
                    break
            assert time() < start + 20
            sleep(0.01)

    start = time()
    while not raises(lambda: requests.get('http://127.0.0.1:%d/status/' % 4724)):
        assert time() < start + 10
        sleep(0.01)
コード例 #4
0
ファイル: test_local.py プロジェクト: victor-ab/distributed
def test_scale_up_and_down():
    loop = IOLoop.current()
    cluster = yield LocalCluster(0, scheduler_port=0, processes=False,
                                 silence_logs=False, diagnostics_port=None,
                                 loop=loop, asynchronous=True)
    c = yield Client(cluster, asynchronous=True)

    assert not cluster.workers

    yield cluster.scale_up(2)
    assert len(cluster.workers) == 2
    assert len(cluster.scheduler.ncores) == 2

    addr = cluster.workers[0].address
    yield cluster.scale_down([addr])

    assert len(cluster.workers) == 1
    assert addr not in cluster.scheduler.ncores

    yield c.close()
    yield cluster.close()
コード例 #5
0
async def test_repr(memory_limit, cleanup):
    async with LocalCluster(
            n_workers=2,
            processes=False,
            threads_per_worker=2,
            memory_limit=memory_limit,
            asynchronous=True,
            dashboard_address=":0",
    ) as cluster:
        # __repr__ uses cluster.scheduler_info, which slightly lags behind
        # cluster.scheduler.workers and client.wait_for_workers.
        while len(cluster.scheduler_info["workers"]) < 2:
            await asyncio.sleep(0.01)

        text = repr(cluster)
        assert cluster.scheduler_address in text
        assert "workers=2, threads=4" in text
        if memory_limit:
            assert "memory=4.00 GiB" in text
        else:
            assert "memory" not in text
コード例 #6
0
async def test_scale_up_and_down():
    async with LocalCluster(
            n_workers=0,
            processes=False,
            silence_logs=False,
            dashboard_address=":0",
            asynchronous=True,
    ) as cluster:
        async with Client(cluster, asynchronous=True) as c:

            assert not cluster.workers

            cluster.scale(2)
            await cluster
            assert len(cluster.workers) == 2
            assert len(cluster.scheduler.nthreads) == 2

            cluster.scale(1)
            await cluster

            assert len(cluster.workers) == 1
コード例 #7
0
ファイル: test_local.py プロジェクト: bmaisonn/distributed
def test_defaults():
    from distributed.worker import _ncores

    with LocalCluster(scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        assert sum(w.ncores for w in c.workers) == _ncores
        assert all(isinstance(w, Nanny) for w in c.workers)
        assert all(w.ncores == 1 for w in c.workers)

    with LocalCluster(processes=False,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        assert sum(w.ncores for w in c.workers) == _ncores
        assert all(isinstance(w, Worker) for w in c.workers)
        assert len(c.workers) == 1

    with LocalCluster(n_workers=2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        if _ncores % 2 == 0:
            expected_total_threads = max(2, _ncores)
        else:
            # n_workers not a divisor of _ncores => threads are overcommitted
            expected_total_threads = max(2, _ncores + 1)
        assert sum(w.ncores for w in c.workers) == expected_total_threads

    with LocalCluster(threads_per_worker=_ncores * 2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        assert len(c.workers) == 1

    with LocalCluster(n_workers=_ncores * 2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        assert all(w.ncores == 1 for w in c.workers)
    with LocalCluster(threads_per_worker=2,
                      n_workers=3,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None) as c:
        assert len(c.workers) == 3
        assert all(w.ncores == 2 for w in c.workers)
コード例 #8
0
ファイル: test_ucx.py プロジェクト: GueroudjiAmal/distributed
async def test_stress():
    da = pytest.importorskip("dask.array")

    chunksize = "10 MB"

    async with LocalCluster(
            protocol="ucx",
            dashboard_address=":0",
            asynchronous=True,
            host=HOST,
    ) as cluster:
        async with Client(cluster, asynchronous=True):
            rs = da.random.RandomState()
            x = rs.random((10000, 10000), chunks=(-1, chunksize))
            x = x.persist()
            await wait(x)

            for i in range(10):
                x = x.rechunk((chunksize, -1))
                x = x.rechunk((-1, chunksize))
                x = x.persist()
                await wait(x)
コード例 #9
0
def _test_local_cluster(protocol):
    dask.config.update(
        dask.config.global_config,
        {
            "ucx": {
                "tcp": True,
                "cuda_copy": True,
            },
        },
        priority="new",
    )

    with LocalCluster(
            protocol=protocol,
            dashboard_address=None,
            n_workers=4,
            threads_per_worker=1,
            processes=True,
    ) as cluster:
        with Client(cluster) as client:
            c = comms.CommsContext(client)
            assert sum(c.run(my_rank, 0)) == sum(range(4))
コード例 #10
0
def _test_dataframe_merge_empty_partitions(nrows, npartitions):
    with LocalCluster(
            protocol="tcp",
            dashboard_address=None,
            n_workers=npartitions,
            threads_per_worker=1,
            processes=True,
    ) as cluster:
        with Client(cluster):
            df1 = pd.DataFrame({
                "key": np.arange(nrows),
                "payload1": np.arange(nrows)
            })
            key = np.arange(nrows)
            np.random.shuffle(key)
            df2 = pd.DataFrame({"key": key, "payload2": np.arange(nrows)})
            expected = df1.merge(df2).set_index("key")
            ddf1 = dd.from_pandas(df1, npartitions=npartitions)
            ddf2 = dd.from_pandas(df2, npartitions=npartitions)
            ddf3 = dataframe_merge(ddf1, ddf2, on="key").set_index("key")
            got = ddf3.compute()
            pd.testing.assert_frame_equal(got, expected)
コード例 #11
0
def test_no_ipywidgets(loop, monkeypatch):
    from unittest.mock import MagicMock

    mock_display = MagicMock()

    monkeypatch.setitem(sys.modules, "ipywidgets", None)
    monkeypatch.setitem(sys.modules, "IPython.display", mock_display)

    with LocalCluster(
            n_workers=0,
            silence_logs=False,
            loop=loop,
            dashboard_address=":0",
            processes=False,
    ) as cluster:
        cluster._ipython_display_()
        args, kwargs = mock_display.display.call_args
        res = args[0]
        assert kwargs == {"raw": True}
        assert isinstance(res, dict)
        assert "text/plain" in res
        assert "text/html" in res
コード例 #12
0
ファイル: test_local.py プロジェクト: bmaisonn/distributed
def test_bokeh(loop, processes):
    pytest.importorskip('bokeh')
    import requests
    with LocalCluster(scheduler_port=0,
                      silence_logs=False,
                      loop=loop,
                      processes=processes,
                      diagnostics_port=0) as c:
        bokeh_port = c.scheduler.services['bokeh'].port
        url = 'http://127.0.0.1:%d/status/' % bokeh_port
        start = time()
        while True:
            response = requests.get(url)
            if response.ok:
                break
            assert time() < start + 20
            sleep(0.01)
        # 'localhost' also works
        response = requests.get('http://localhost:%d/status/' % bokeh_port)
        assert response.ok

    with pytest.raises(requests.RequestException):
        requests.get(url, timeout=0.2)
コード例 #13
0
def _test_global_option(seg_size):
    """Test setting UCX options through dask's global config"""
    dask.config.set({
        "ucx": {
            "SEG_SIZE": seg_size,
            "TLS": "tcp,sockcm,cuda_copy",
            "SOCKADDR_TLS_PRIORITY": "sockcm",
        }
    })

    with LocalCluster(
            protocol="ucx",
            dashboard_address=None,
            n_workers=1,
            threads_per_worker=1,
            processes=True,
    ) as cluster:
        with Client(cluster):
            res = da.from_array(numpy.arange(10000), chunks=(1000, ))
            res = res.sum().compute()
            assert res == 49995000
            conf = ucp.get_config()
            assert conf["SEG_SIZE"] == seg_size
コード例 #14
0
async def test_cluster_info_sync():
    async with LocalCluster(processes=False,
                            asynchronous=True,
                            scheduler_sync_interval="1ms") as cluster:
        assert cluster._cluster_info["name"] == cluster.name

        while "name" not in cluster.scheduler.get_metadata(
                keys=["cluster-manager-info"]):
            await asyncio.sleep(0.01)

        info = await cluster.scheduler_comm.get_metadata(
            keys=["cluster-manager-info"])
        assert info["name"] == cluster.name
        info = cluster.scheduler.get_metadata(keys=["cluster-manager-info"])
        assert info["name"] == cluster.name

        cluster._cluster_info["foo"] = "bar"
        while "foo" not in cluster.scheduler.get_metadata(
                keys=["cluster-manager-info"]):
            await asyncio.sleep(0.01)

        info = cluster.scheduler.get_metadata(keys=["cluster-manager-info"])
        assert info["foo"] == "bar"
コード例 #15
0
def test_adapt(loop):
    with LocalCluster(scheduler_port=0, silence_logs=False, loop=loop,
                      diagnostics_port=0, processes=False, n_workers=0) as cluster:
        cluster.adapt(minimum=0, maximum=2, interval='10ms')
        assert cluster._adaptive.minimum == 0
        assert cluster._adaptive.maximum == 2
        ref = weakref.ref(cluster._adaptive)

        cluster.adapt(minimum=1, maximum=2, interval='10ms')
        assert cluster._adaptive.minimum == 1
        gc.collect()

        # the old Adaptive class sticks around, not sure why
        # start = time()
        # while ref():
        #     sleep(0.01)
        #     gc.collect()
        #     assert time() < start + 5

        start = time()
        while len(cluster.scheduler.workers) != 1:
            sleep(0.01)
            assert time() < start + 5
コード例 #16
0
def _test_initialize_ucx_tcp():
    initialize(enable_tcp_over_ucx=True)
    with LocalCluster(
            protocol="ucx",
            dashboard_address=None,
            n_workers=1,
            threads_per_worker=1,
            processes=True,
    ) as cluster:
        with Client(cluster) as client:
            res = da.from_array(numpy.arange(10000), chunks=(1000, ))
            res = res.sum().compute()
            assert res == 49995000

            def check_ucx_options():
                conf = ucp.get_config()
                assert "TLS" in conf
                assert "tcp" in conf["TLS"]
                assert "sockcm" in conf["TLS"]
                assert "cuda_copy" in conf["TLS"]
                assert "sockcm" in conf["SOCKADDR_TLS_PRIORITY"]
                return True

            assert all(client.run(check_ucx_options).values())
コード例 #17
0
def test_scale(loop):
    """ Directly calling scale both up and down works as expected """
    with LocalCluster(scheduler_port=0,
                      silence_logs=False,
                      loop=loop,
                      diagnostics_port=0,
                      processes=False,
                      n_workers=0) as cluster:
        assert not cluster.scheduler.workers
        cluster.scale(3)

        start = time()
        while len(cluster.scheduler.workers) != 3:
            sleep(0.01)
            assert time() < start + 5, len(cluster.scheduler.workers)

        sleep(0.2)  # let workers settle # TODO: remove need for this

        cluster.scale(2)

        start = time()
        while len(cluster.scheduler.workers) != 2:
            sleep(0.01)
            assert time() < start + 5, len(cluster.scheduler.workers)
コード例 #18
0
ファイル: test_local.py プロジェクト: rcthomas/distributed
def test_local_tls(loop):
    from distributed.utils_test import tls_only_security
    security = tls_only_security()
    with LocalCluster(scheduler_port=8786,
                      silence_logs=False,
                      security=security,
                      diagnostics_port=False,
                      ip='tls://0.0.0.0',
                      loop=loop) as c:
        sync(loop,
             assert_can_connect_from_everywhere_4,
             c.scheduler.port,
             connection_args=security.get_connection_args('client'),
             protocol='tls',
             timeout=3)

        # If we connect to a TLS localculster without ssl information we should fail
        sync(
            loop,
            assert_cannot_connect,
            addr='tcp://127.0.0.1:%d' % c.scheduler.port,
            connection_args=security.get_connection_args('client'),
            exception_class=RuntimeError,
        )
コード例 #19
0
def _test_dataframe_merge(backend, protocol, n_workers):
    with LocalCluster(
        protocol=protocol,
        dashboard_address=None,
        n_workers=n_workers,
        threads_per_worker=1,
        processes=True,
    ) as cluster:
        with Client(cluster):
            nrows = n_workers * 10

            # Let's make some dataframes that we can join on the "key" column
            df1 = pd.DataFrame({"key": np.arange(nrows), "payload1": np.arange(nrows)})
            key = np.arange(nrows)
            np.random.shuffle(key)
            df2 = pd.DataFrame(
                {"key": key[nrows // 3 :], "payload2": np.arange(nrows)[nrows // 3 :]}
            )
            expected = df1.merge(df2).set_index("key")

            if backend == "cudf":
                df1 = cudf.DataFrame.from_pandas(df1)
                df2 = cudf.DataFrame.from_pandas(df2)

            ddf1 = dd.from_pandas(df1, npartitions=n_workers + 1)
            ddf2 = dd.from_pandas(
                df2, npartitions=n_workers - 1 if n_workers > 1 else 1
            )
            ddf3 = dataframe_merge(ddf1, ddf2, on="key").set_index("key")
            got = ddf3.compute()

            if backend == "cudf":
                got = got.to_pandas()
                got.index.names = ["key"]  # TODO: this shouldn't be needed

            pd.testing.assert_frame_equal(got, expected)
コード例 #20
0
def test_ucx_localcluster(loop, processes):
    if processes:
        kwargs = {"env": {"UCX_MEMTYPE_CACHE": "n"}}
    else:
        kwargs = {}

    ucx_addr = ucp.get_address()
    with LocalCluster(
        protocol="ucx",
        interface="ib0",
        dashboard_address=None,
        n_workers=2,
        threads_per_worker=1,
        processes=processes,
        loop=loop,
        **kwargs
    ) as cluster:
        with Client(cluster) as client:
            x = client.submit(inc, 1)
            x.result()
            assert x.key in cluster.scheduler.tasks
            if not processes:
                assert any(w.data == {x.key: 2} for w in cluster.workers.values())
            assert len(cluster.scheduler.workers) == 2
コード例 #21
0
def test_defaults():
    from distributed.worker import _ncores

    with LocalCluster(scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert sum(w.ncores for w in c.workers) == _ncores
        assert all(isinstance(w, Nanny) for w in c.workers)
        assert all(w.ncores == 1 for w in c.workers)

    with LocalCluster(nanny=False,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert sum(w.ncores for w in c.workers) == _ncores
        assert all(isinstance(w, Worker) for w in c.workers)
        assert len(c.workers) == 1

    with LocalCluster(n_workers=2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert sum(w.ncores for w in c.workers) == max(2, _ncores)

    with LocalCluster(threads_per_worker=_ncores * 2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert len(c.workers) == 1

    with LocalCluster(n_workers=_ncores * 2,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert all(w.ncores == 1 for w in c.workers)
    with LocalCluster(threads_per_worker=2,
                      n_workers=3,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostic_port=None) as c:
        assert len(c.workers) == 3
        assert all(w.ncores == 2 for w in c.workers)
コード例 #22
0
def test_memory(loop):
    with LocalCluster(scheduler_port=0, processes=False, silence_logs=False,
                      diagnostics_port=None, loop=loop) as cluster:
        assert sum(w.memory_limit for w in cluster.workers) <= TOTAL_MEMORY
コード例 #23
0
ファイル: test_local.py プロジェクト: bmaisonn/distributed
def test_only_local_access(loop):
    with LocalCluster(scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None,
                      loop=loop) as c:
        sync(loop, assert_can_connect_locally_4, c.scheduler.port)
コード例 #24
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
def test_protocol_ip(loop):
    with LocalCluster(
        host="tcp://127.0.0.2", loop=loop, n_workers=0, processes=False
    ) as cluster:
        assert cluster.scheduler.address.startswith("tcp://127.0.0.2")
コード例 #25
0
def test_death_timeout_raises(loop):
    with pytest.raises(gen.TimeoutError):
        with LocalCluster(scheduler_port=0, silence_logs=False,
                          death_timeout=1e-10, diagnostics_port=None,
                          loop=loop) as cluster:
            pass
コード例 #26
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
def test_protocol_tcp(loop):
    with LocalCluster(
        protocol="tcp", loop=loop, n_workers=0, processes=False
    ) as cluster:
        assert cluster.scheduler.address.startswith("tcp://")
コード例 #27
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
def test_protocol_inproc(loop):
    with LocalCluster(protocol="inproc://", loop=loop, processes=False) as cluster:
        assert cluster.scheduler.address.startswith("inproc://")
コード例 #28
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
def test_only_local_access(loop):
    with LocalCluster(
        0, scheduler_port=0, silence_logs=False, dashboard_address=None, loop=loop
    ) as c:
        sync(loop, assert_can_connect_locally_4, c.scheduler.port)
コード例 #29
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
async def test_defaults(cleanup):
    async with LocalCluster(
        scheduler_port=0, silence_logs=False, dashboard_address=None, asynchronous=True
    ) as c:
        assert sum(w.nthreads for w in c.workers.values()) == CPU_COUNT
        assert all(isinstance(w, Nanny) for w in c.workers.values())
コード例 #30
0
ファイル: test_local.py プロジェクト: PhanidharJammula/py
async def test_async_with():
    async with LocalCluster(processes=False, asynchronous=True) as cluster:
        w = cluster.workers
        assert w

    assert not w