Beispiel #1
0
async def run():
    initialize(
        create_cuda_context=True,
        enable_tcp_over_ucx=enable_tcp_over_ucx,
        enable_infiniband=enable_infiniband,
        enable_nvlink=enable_nvlink,
    )

    async with DGX(
            interface="enp1s0f0",
            protocol="ucx",
            enable_tcp_over_ucx=enable_tcp_over_ucx,
            enable_infiniband=enable_infiniband,
            enable_nvlink=enable_nvlink,
            asynchronous=True,
    ) as dgx:
        async with Client(dgx, asynchronous=True) as client:
            rs = da.random.RandomState(RandomState=cupy.random.RandomState)
            a = rs.normal(10,
                          1, (int(4e3), int(4e3)),
                          chunks=(int(1e3), int(1e3)))
            x = a + a.T

            for i in range(100):
                print("Running iteration:", i)
                start = time.time()
                await client.compute(x)
                print("Time for iteration", i, ":", time.time() - start)
Beispiel #2
0
def _test_tcp_only():
    with pytest.warns(DeprecationWarning):
        with DGX(protocol="tcp") as cluster:
            with Client(cluster):
                res = da.from_array(numpy.arange(10000), chunks=(1000, ))
                res = res.sum().compute()
                assert res == 49995000
Beispiel #3
0
async def test_dgx_ucx_infiniband_nvlink(params):
    ucp = pytest.importorskip("ucp")

    enable_tcp = params["enable_tcp"]
    enable_infiniband = params["enable_infiniband"]
    enable_nvlink = params["enable_nvlink"]

    initialize(create_cuda_context=True,
               enable_tcp_over_ucx=enable_tcp,
               enable_infiniband=enable_infiniband,
               enable_nvlink=enable_nvlink)

    async with DGX(
            interface="enp1s0f0",
            protocol="ucx",
            enable_tcp_over_ucx=enable_tcp,
            enable_infiniband=enable_infiniband,
            enable_nvlink=enable_nvlink,
            asynchronous=True,
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            rs = da.random.RandomState(RandomState=cupy.random.RandomState)
            a = rs.normal(10,
                          1, (int(1e4), int(1e4)),
                          chunks=(int(2.5e3), int(2.5e3)))
            x = a + a.T

            res = await client.compute(x)
Beispiel #4
0
async def run():
    initialize(
        create_cuda_context=True,
        enable_tcp_over_ucx=enable_tcp_over_ucx,
        enable_infiniband=enable_infiniband,
        enable_nvlink=enable_nvlink,
    )

    async with DGX(
            interface="enp1s0f0",
            protocol="ucx",
            enable_tcp_over_ucx=enable_tcp_over_ucx,
            enable_infiniband=enable_infiniband,
            enable_nvlink=enable_nvlink,
            asynchronous=True,
    ) as dgx:
        async with Client(dgx, asynchronous=True) as client:
            d = dask_cudf.from_cudf(cudf.DataFrame({"a": range(2**16)}),
                                    npartitions=2)
            r = d.sum()

            for i in range(100):
                print("Running iteration:", i)
                start = time.time()
                await client.compute(r)
                print("Time for iteration", i, ":", time.time() - start)
Beispiel #5
0
async def test_dgx_tcp_over_ucx():
    ucx_env = get_ucx_env(enable_tcp=True)
    os.environ.update(ucx_env)

    ucp = pytest.importorskip("ucp")

    async with DGX(protocol="ucx", enable_tcp_over_ucx=True,
                   asynchronous=True) as cluster:
        async with Client(cluster, asynchronous=True):
            pass
Beispiel #6
0
def _test_tcp_over_ucx():
    with pytest.warns(DeprecationWarning):
        with DGX(enable_tcp_over_ucx=True) as cluster:
            with Client(cluster) as client:
                res = da.from_array(numpy.arange(10000), chunks=(1000, ))
                res = res.sum().compute()
                assert res == 49995000

                def check_ucx_options():
                    conf = ucp.get_config()
                    assert "TLS" in conf
                    assert "tcp" in conf["TLS"]
                    assert "sockcm" in conf["TLS"]
                    assert "cuda_copy" in conf["TLS"]
                    assert "sockcm" in conf["SOCKADDR_TLS_PRIORITY"]
                    return True

                assert all(client.run(check_ucx_options).values())
Beispiel #7
0
def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink):
    cupy = pytest.importorskip("cupy")

    net_devices = _get_dgx_net_devices()

    ucx_net_devices = "auto" if enable_infiniband else None

    with pytest.warns(DeprecationWarning):
        with DGX(
                enable_tcp_over_ucx=True,
                enable_infiniband=enable_infiniband,
                enable_nvlink=enable_nvlink,
                ucx_net_devices=ucx_net_devices,
        ) as cluster:
            with Client(cluster) as client:
                res = da.from_array(cupy.arange(10000),
                                    chunks=(1000, ),
                                    asarray=False)
                res = res.sum().compute()
                assert res == 49995000

                def check_ucx_options():
                    conf = ucp.get_config()
                    assert "TLS" in conf
                    assert "tcp" in conf["TLS"]
                    assert "sockcm" in conf["TLS"]
                    assert "cuda_copy" in conf["TLS"]
                    assert "sockcm" in conf["SOCKADDR_TLS_PRIORITY"]
                    if enable_nvlink:
                        assert "cuda_ipc" in conf["TLS"]
                    if enable_infiniband:
                        assert "rc" in conf["TLS"]
                    return True

                if enable_infiniband:
                    assert all([
                        cluster.worker_spec[k]["options"]["env"]
                        ["UCX_NET_DEVICES"] == net_devices[k]
                        for k in cluster.worker_spec.keys()
                    ])

                assert all(client.run(check_ucx_options).values())
Beispiel #8
0
async def test_dgx():
    async with DGX(enable_infiniband=False,
                   enable_nvlink=False,
                   asynchronous=True) as cluster:
        async with Client(cluster, asynchronous=True):
            pass
Beispiel #9
0
async def test_dgx():
    async with DGX(asynchronous=True) as cluster:
        async with Client(cluster, asynchronous=True):
            pass