예제 #1
0
파일: utils.py 프로젝트: rapidsai/dask-cuda
def get_ucx_config(
    enable_tcp_over_ucx=None,
    enable_infiniband=None,
    enable_nvlink=None,
    enable_rdmacm=None,
    net_devices=None,
    cuda_device_index=None,
):
    if net_devices == "auto" and enable_infiniband is False:
        raise ValueError(
            "Using ucx_net_devices='auto' is currently only "
            "supported when enable_infiniband=True."
        )

    ucx_config = dask.config.get("distributed.comm.ucx")

    ucx_config[canonical_name("create-cuda-context", ucx_config)] = True
    ucx_config[canonical_name("reuse-endpoints", ucx_config)] = not _ucx_111

    # If any transport is explicitly disabled (`False`) by the user, others that
    # are not specified should be enabled (`True`). If transports are explicitly
    # enabled (`True`), then default (`None`) or an explicit `False` will suffice
    # in disabling others. However, if there's a mix of enable (`True`) and
    # disable (`False`), then those choices can be assumed as intended by the
    # user.
    #
    # This may be handled more gracefully in Distributed in the future.
    opts = [enable_tcp_over_ucx, enable_infiniband, enable_nvlink]
    if any(opt is False for opt in opts) and not any(opt is True for opt in opts):
        if enable_tcp_over_ucx is None:
            enable_tcp_over_ucx = True
        if enable_nvlink is None:
            enable_nvlink = True
        if enable_infiniband is None:
            enable_infiniband = True

    ucx_config[canonical_name("tcp", ucx_config)] = enable_tcp_over_ucx
    ucx_config[canonical_name("infiniband", ucx_config)] = enable_infiniband
    ucx_config[canonical_name("nvlink", ucx_config)] = enable_nvlink
    ucx_config[canonical_name("rdmacm", ucx_config)] = enable_rdmacm

    if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
        ucx_config[canonical_name("cuda-copy", ucx_config)] = True
    else:
        ucx_config[canonical_name("cuda-copy", ucx_config)] = None

    if net_devices is not None and net_devices != "":
        ucx_config[canonical_name("net-devices", ucx_config)] = get_ucx_net_devices(
            cuda_device_index, net_devices
        )
    return ucx_config
예제 #2
0
def test_canonical_name():
    c = {"foo-bar": 1, "fizz_buzz": 2}
    assert canonical_name("foo-bar", c) == "foo-bar"
    assert canonical_name("foo_bar", c) == "foo-bar"
    assert canonical_name("fizz-buzz", c) == "fizz_buzz"
    assert canonical_name("fizz_buzz", c) == "fizz_buzz"
    assert canonical_name("new-key", c) == "new-key"
    assert canonical_name("new_key", c) == "new_key"
예제 #3
0
def test_canonical_name():
    c = {'foo-bar': 1, 'fizz_buzz': 2}
    assert canonical_name('foo-bar', c) == 'foo-bar'
    assert canonical_name('foo_bar', c) == 'foo-bar'
    assert canonical_name('fizz-buzz', c) == 'fizz_buzz'
    assert canonical_name('fizz_buzz', c) == 'fizz_buzz'
    assert canonical_name('new-key', c) == 'new-key'
    assert canonical_name('new_key', c) == 'new_key'
예제 #4
0
def test_get_ucx_config(enable_tcp_over_ucx, enable_infiniband, enable_nvlink,
                        net_devices):
    pytest.importorskip("ucp")

    kwargs = {
        "enable_tcp_over_ucx": enable_tcp_over_ucx,
        "enable_infiniband": enable_infiniband,
        "enable_nvlink": enable_nvlink,
        "net_devices": net_devices,
        "cuda_device_index": 0,
    }
    if net_devices == "auto" and enable_infiniband is False:
        with pytest.raises(ValueError):
            get_ucx_config(**kwargs)
        return
    else:
        ucx_config = get_ucx_config(**kwargs)

    assert ucx_config[canonical_name("create_cuda_context",
                                     ucx_config)] is True

    if enable_tcp_over_ucx is not None:
        assert ucx_config[canonical_name("tcp",
                                         ucx_config)] is enable_tcp_over_ucx
    else:
        if (enable_infiniband is not True and enable_nvlink is not True
                and not (enable_infiniband is None and enable_nvlink is None)):
            assert ucx_config[canonical_name("tcp", ucx_config)] is True
        else:
            assert ucx_config[canonical_name("tcp", ucx_config)] is None

    if enable_infiniband is not None:
        assert ucx_config[canonical_name("infiniband",
                                         ucx_config)] is enable_infiniband
    else:
        if (enable_tcp_over_ucx is not True and enable_nvlink is not True and
                not (enable_tcp_over_ucx is None and enable_nvlink is None)):
            assert ucx_config[canonical_name("infiniband", ucx_config)] is True
        else:
            assert ucx_config[canonical_name("infiniband", ucx_config)] is None

    if enable_nvlink is not None:
        assert ucx_config[canonical_name("nvlink",
                                         ucx_config)] is enable_nvlink
    else:
        if (enable_tcp_over_ucx is not True and enable_infiniband is not True
                and not (enable_tcp_over_ucx is None
                         and enable_infiniband is None)):
            assert ucx_config[canonical_name("nvlink", ucx_config)] is True
        else:
            assert ucx_config[canonical_name("nvlink", ucx_config)] is None

    if any(
            opt is not None
            for opt in [enable_tcp_over_ucx, enable_infiniband, enable_nvlink]
    ) and not all(opt is False for opt in
                  [enable_tcp_over_ucx, enable_infiniband, enable_nvlink]):
        assert ucx_config[canonical_name("cuda-copy", ucx_config)] is True
    else:
        assert ucx_config[canonical_name("cuda-copy", ucx_config)] is None

    if net_devices == "auto":
        # Since the actual device is system-dependent, we don't do any
        # checks at the moment. If any InfiniBand devices are available,
        # that will be the value of "net-devices", otherwise an empty string.
        pass
    elif net_devices == "eth0":
        assert ucx_config[canonical_name("net-devices", ucx_config)] == "eth0"
    else:
        assert ucx_config[canonical_name("net-devices", ucx_config)] is None