Example #1
0
async def test_send_recv_am(size, blocking_progress_mode, recv_wait, data):
    rndv_thresh = 8192
    ucp.init(
        options={"RNDV_THRESH": str(rndv_thresh)},
        blocking_progress_mode=blocking_progress_mode,
    )

    ucp.register_am_allocator(data["allocator"], data["memory_type"])
    msg = data["generator"](size)

    recv = []
    listener = ucp.create_listener(simple_server(size, recv))
    num_clients = 1
    clients = [
        await ucp.create_endpoint(ucp.get_address(), listener.port)
        for i in range(num_clients)
    ]
    for c in clients:
        if recv_wait:
            # By sleeping here we ensure that the listener's
            # ep.am_recv call will have to wait, rather than return
            # immediately as receive data is already available.
            await asyncio.sleep(1)
        await c.am_send(msg)
    for c in clients:
        await c.close()
    listener.close()

    if data["memory_type"] == "cuda" and msg.nbytes < rndv_thresh:
        # Eager messages are always received on the host, if no host
        # allocator is registered UCX-Py defaults to `bytearray`.
        assert recv[0] == bytearray(msg.get())
    else:
        data["validator"](recv[0], msg)
Example #2
0
async def _func_ucp_create_listener(sessionId, r):
    """
    Creates a UCP listener for incoming endpoint connections.
    This function runs in a loop asynchronously in the background
    on the worker
    :param sessionId: uuid Unique id for current instance
    :param r: float a random number to stop the function from being cached
    """
    if "ucp_listener" in worker_state(sessionId):
        print("Listener already started for sessionId=" +
              str(sessionId))
    else:
        ucp.init()
        listener = ucp.start_listener(_connection_func, 0,
                                      is_coroutine=True)

        worker_state(sessionId)["ucp_listener"] = listener

        while not listener.done():
            await listener.coroutine
            await asyncio.sleep(1)

        del worker_state(sessionId)["ucp_listener"]
        del listener

        ucp.fin()
Example #3
0
def init_once():
    global ucp, cuda_array
    if ucp is not None:
        return

    import ucp as _ucp

    ucp = _ucp
    ucp.init(options=dask.config.get("ucx"), env_takes_precedence=True)

    # Find the function, `cuda_array()`, to use when allocating new CUDA arrays
    try:
        import rmm

        if hasattr(rmm, "DeviceBuffer"):
            cuda_array = lambda n: rmm.DeviceBuffer(size=n)
        else:  # pre-0.11.0
            cuda_array = lambda n: rmm.device_array(n, dtype=np.uint8)
    except ImportError:
        try:
            import numba.cuda

            cuda_array = lambda n: numba.cuda.device_array(
                (n, ), dtype=np.uint8)
        except ImportError:

            def cuda_array(n):
                raise RuntimeError(
                    "In order to send/recv CUDA arrays, Numba or RMM is required"
                )
Example #4
0
def test_init_options():
    ucp.reset()
    options = {"SEG_SIZE": "3M"}
    # environment specification should be ignored
    ucp.init(options)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Example #5
0
def client(port, func, endpoint_error_handling):
    # wait for server to come up
    # receive object
    # process suicides

    ucp.init()

    # must create context before importing
    # cudf/cupy/etc

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port, endpoint_error_handling)
        msg = None
        import cupy

        cupy.cuda.set_allocator(None)

        frames, msg = recv(ep)

        # Client process suicides to force an "Endpoint timeout"
        # on the server
        os.kill(os.getpid(), signal.SIGKILL)

    asyncio.get_event_loop().run_until_complete(read())
def server(port, func, comm_api):
    # create listener receiver
    # write cudf object
    # confirm message is sent correctly
    from distributed.comm.utils import to_frames
    from distributed.protocol import to_serialize

    ucp.init()

    if comm_api == "am":
        register_am_allocators()

    async def f(listener_port):
        # coroutine shows up when the client asks
        # to connect
        async def write(ep):
            import cupy

            cupy.cuda.set_allocator(None)

            print("CREATING CUDA OBJECT IN SERVER...")
            cuda_obj_generator = cloudpickle.loads(func)
            cuda_obj = cuda_obj_generator()
            msg = {"data": to_serialize(cuda_obj)}
            frames = await to_frames(msg,
                                     serializers=("cuda", "dask", "pickle"))
            for i in range(ITERATIONS):
                # Send meta data
                if comm_api == "tag":
                    await send(ep, frames)
                else:
                    await am_send(ep, frames)

            print("CONFIRM RECEIPT")
            close_msg = b"shutdown listener"

            if comm_api == "tag":
                msg_size = np.empty(1, dtype=np.uint64)
                await ep.recv(msg_size)

                msg = np.empty(msg_size[0], dtype=np.uint8)
                await ep.recv(msg)
            else:
                msg = await ep.am_recv()

            recv_msg = msg.tobytes()
            assert recv_msg == close_msg
            print("Shutting Down Server...")
            await ep.close()
            lf.close()

        lf = ucp.create_listener(write, port=listener_port)
        try:
            while not lf.closed():
                await asyncio.sleep(0.1)
        except ucp.UCXCloseError:
            pass

    loop = get_event_loop()
    loop.run_until_complete(f(port))
Example #7
0
async def test_fence(blocking_progress_mode):
    # Test needs to be async here to ensure progress tasks are cleared
    # and avoid warnings.

    ucp.init(blocking_progress_mode=blocking_progress_mode)
    # this should always succeed
    ucp.fence()
Example #8
0
def test_init_options():
    ucp.reset()
    os.environ["UCX_SEG_SIZE"] = "2M"  # Should be ignored
    options = {"SEG_SIZE": "3M"}
    ucp.init(options)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Example #9
0
def test_init_options_and_env():
    ucp.reset()
    os.environ["UCX_SEG_SIZE"] = "4M"
    options = {"SEG_SIZE": "3M"}  # Should be ignored
    ucp.init(options, env_takes_precedence=True)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Example #10
0
    def start(self):
        async def serve_forever(client_ep, listener_instance):
            ucx = UCX(
                client_ep,
                local_addr=self.address,
                peer_addr=self.
                address,  # TODO: https://github.com/Akshay-Venkatesh/ucx-py/issues/111
                deserialize=self.deserialize,
            )
            self.listener_instance = listener_instance
            if self.comm_handler:
                await self.comm_handler(ucx)

        ucp.init()
        self.ucp_server = ucp.start_listener(serve_forever,
                                             listener_port=self._input_port,
                                             is_coroutine=True)

        try:
            loop = asyncio.get_running_loop()
        except (RuntimeError, AttributeError):
            loop = asyncio.get_event_loop()

        t = loop.create_task(self.ucp_server.coroutine)
        self._task = t
Example #11
0
def _worker_process(queue, rank, server_address, n_workers, ucx_options_list,
                    func, args):
    import ucp

    if ucx_options_list is not None:
        ucp.init(ucx_options_list[rank])

    async def run():
        eps = {}

        async def server_handler(ep):
            peer_rank = np.empty((1, ), dtype=np.uint64)
            await ep.recv(peer_rank)
            assert peer_rank[0] not in eps
            eps[peer_rank[0]] = ep

        lf = ucp.create_listener(server_handler)
        queue.put(lf.port)
        port_list = queue.get()
        for i in range(rank + 1, n_workers):
            assert i not in eps
            eps[i] = await ucp.create_endpoint(server_address, port_list[i])
            await eps[i].send(np.array([rank], dtype=np.uint64))

        while len(eps) != n_workers - 1:
            await asyncio.sleep(0.1)

        if asyncio.iscoroutinefunction(func):
            return await func(rank, eps, args)
        else:
            return func(rank, eps, args)

    loop = asyncio.get_event_loop()
    ret = loop.run_until_complete(run())
    queue.put(ret)
Example #12
0
def ucp_init():
    try:
        set_env()
        ucp.init()
        yield
    finally:
        ucp.fin()
Example #13
0
def test_init_options_and_env():
    ucp.reset()
    options = {"SEG_SIZE": "3M"}  # Should be ignored
    ucp.init(options, env_takes_precedence=True)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
    # Provided options dict was not modified.
    assert options == {"SEG_SIZE": "3M"}
Example #14
0
def server(port, func, endpoint_error_handling):
    # create listener receiver
    # add queue logger
    # write cudf object
    # terminates ep/listener
    # checks that "Endpoint timeout" was logged
    ucp.init()

    log_queue, log_listener = get_log_queue_handler()
    log_listener.start()

    async def f(listener_port):
        # coroutine shows up when the client asks
        # to connect
        async def write(ep):
            import cupy

            cupy.cuda.set_allocator(None)

            print("CREATING CUDA OBJECT IN SERVER...")
            cuda_obj_generator = cloudpickle.loads(func)
            cuda_obj = cuda_obj_generator()
            msg = {"data": to_serialize(cuda_obj)}
            frames = await to_frames(msg, serializers=("cuda", "dask", "pickle"))

            # Send meta data
            try:
                await send(ep, frames)
            except Exception:
                # Avoids process hanging on "Endpoint timeout"
                pass

            print("Shutting Down Server...")
            await ep.close()
            lf.close()

        lf = ucp.create_listener(
            write, port=listener_port, endpoint_error_handling=endpoint_error_handling
        )
        try:
            while not lf.closed():
                await asyncio.sleep(0.1)
        except ucp.UCXCloseError:
            pass

    log_listener.stop()

    asyncio.get_event_loop().run_until_complete(f(port))

    # Check log for the expected "Endpoint timeout" and exits with
    # status -80 if encountered, 0 otherwise. The process will exit
    # with status -6 when endpoint_error_callback=False.
    while not log_queue.empty():
        log = log_queue.get()
        if "Endpoint timeout" in log.getMessage():
            sys.exit(-80)
    sys.exit(0)
Example #15
0
def client(queue, port, args):
    import ucp

    ucp.init()

    if args.object_type == "numpy":
        import numpy as np
    elif args.object_type == "cupy":
        import cupy as np

        np.cuda.runtime.setDevice(args.client_dev)
    else:
        import cupy as np
        import rmm

        rmm.reinitialize(
            pool_allocator=True,
            managed_memory=False,
            initial_pool_size=args.rmm_init_pool_size,
            devices=[args.client_dev],
        )
        np.cuda.runtime.setDevice(args.client_dev)
        np.cuda.set_allocator(rmm.rmm_cupy_allocator)

    async def run():
        ep = await ucp.create_endpoint(args.server_address, port)

        msg_send_list = []
        msg_recv_list = []
        if not args.reuse_alloc:
            for i in range(args.n_iter):
                msg_send_list.append(np.arange(args.n_bytes, dtype="u1"))
                msg_recv_list.append(np.zeros(args.n_bytes, dtype="u1"))
        else:
            t1 = np.arange(args.n_bytes, dtype="u1")
            t2 = np.zeros(args.n_bytes, dtype="u1")
            for i in range(args.n_iter):
                msg_send_list.append(t1)
                msg_recv_list.append(t2)
        assert msg_send_list[0].nbytes == args.n_bytes
        assert msg_recv_list[0].nbytes == args.n_bytes
        if args.cuda_profile:
            np.cuda.profiler.start()
        times = []
        for i in range(args.n_iter):
            start = clock()
            await ep.send(msg_send_list[i], args.n_bytes)
            await ep.recv(msg_recv_list[i], args.n_bytes)
            stop = clock()
            times.append(stop - start)
        if args.cuda_profile:
            np.cuda.profiler.stop()
        queue.put(times)

    loop = asyncio.get_event_loop()
    loop.run_until_complete(run())
    loop.close()
Example #16
0
def client(port, func, comm_api):
    # wait for server to come up
    # receive cudf object
    # deserialize
    # assert deserialized msg is cdf
    # send receipt
    from distributed.utils import nbytes

    ucp.init()

    if comm_api == "am":
        register_am_allocators()

    # must create context before importing
    # cudf/cupy/etc

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port)
        msg = None
        import cupy

        cupy.cuda.set_allocator(None)
        for i in range(ITERATIONS):
            if comm_api == "tag":
                frames, msg = await recv(ep)
            else:
                frames, msg = await am_recv(ep)

        close_msg = b"shutdown listener"

        if comm_api == "tag":
            close_msg_size = np.array([len(close_msg)], dtype=np.uint64)

            await ep.send(close_msg_size)
            await ep.send(close_msg)
        else:
            await ep.am_send(close_msg)

        print("Shutting Down Client...")
        return msg["data"]

    rx_cuda_obj = asyncio.get_event_loop().run_until_complete(read())
    rx_cuda_obj + rx_cuda_obj
    num_bytes = nbytes(rx_cuda_obj)
    print(f"TOTAL DATA RECEIVED: {num_bytes}")

    cuda_obj_generator = cloudpickle.loads(func)
    pure_cuda_obj = cuda_obj_generator()

    if isinstance(rx_cuda_obj, cupy.ndarray):
        cupy.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
    else:
        from cudf.testing._utils import assert_eq

        assert_eq(rx_cuda_obj, pure_cuda_obj)
Example #17
0
 async def connect(self, address: str, deserialize=True, **connection_args) -> UCX:
     logger.debug("UCXConnector.connect: %s", address)
     ucp.init()
     ip, port = parse_host_port(address)
     ep = await ucp.get_endpoint(ip.encode(), port)
     return self.comm_class(
         ep,
         local_addr=None,
         peer_addr=self.prefix + address,
         deserialize=deserialize,
     )
Example #18
0
async def test_zero_port():
    ucp.init()
    listener = ucp.start_listener(talk_to_client,
                                  listener_port=0,
                                  is_coroutine=True)
    assert 0 < listener.port < 2**16

    ip = ucp.get_address()
    await asyncio.gather(listener.coroutine,
                         talk_to_server(ip.encode(), listener.port))
    ucp.fin()
Example #19
0
def server(queue, args):
    if args.server_cpu_affinity >= 0:
        os.sched_setaffinity(0, [args.server_cpu_affinity])

    ucp.init()

    if args.object_type == "numpy":
        import numpy as np
    elif args.object_type == "cupy":
        import cupy as np

        np.cuda.runtime.setDevice(args.server_dev)
    else:
        import cupy as np

        import rmm

        rmm.reinitialize(
            pool_allocator=True,
            managed_memory=False,
            initial_pool_size=args.rmm_init_pool_size,
            devices=[args.server_dev],
        )
        np.cuda.runtime.setDevice(args.server_dev)
        np.cuda.set_allocator(rmm.rmm_cupy_allocator)

    async def run():
        async def server_handler(ep):

            msg_recv_list = []
            if not args.reuse_alloc:
                for _ in range(args.n_iter):
                    msg_recv_list.append(np.zeros(args.n_bytes, dtype="u1"))
            else:
                t = np.zeros(args.n_bytes, dtype="u1")
                for _ in range(args.n_iter):
                    msg_recv_list.append(t)

            assert msg_recv_list[0].nbytes == args.n_bytes
            for i in range(args.n_iter):
                await ep.recv(msg_recv_list[i], args.n_bytes)
                await ep.send(msg_recv_list[i], args.n_bytes)
            await ep.close()
            lf.close()

        lf = ucp.create_listener(server_handler)
        queue.put(lf.port)

        while not lf.closed():
            await asyncio.sleep(0.5)

    loop = asyncio.get_event_loop()
    loop.run_until_complete(run())
    loop.close()
Example #20
0
async def test_send_recv_bytes(size, blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    msg = bytearray(b"m" * size)
    msg_size = np.array([len(msg)], dtype=np.uint64)

    listener = ucp.create_listener(make_echo_server(lambda n: bytearray(n)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = bytearray(size)
    await client.recv(resp)
    assert resp == msg
Example #21
0
def test_check_transport(transports):
    transports_list = transports.split(",")
    inactive_transports = list(set(["posix", "tcp"]) - set(transports_list))

    ucp.reset()
    options = {"TLS": transports, "NET_DEVICES": "all"}
    ucp.init(options)

    active_transports = ucp.get_active_transports()
    for t in transports_list:
        assert any([at.startswith(t) for at in active_transports])
    for it in inactive_transports:
        assert any([not at.startswith(it) for at in active_transports])
Example #22
0
async def test_send_recv_numpy(size, dtype, blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    msg = np.arange(size, dtype=dtype)
    msg_size = np.array([msg.nbytes], dtype=np.uint64)

    listener = ucp.create_listener(
        make_echo_server(lambda n: np.empty(n, dtype=np.uint8)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = np.empty_like(msg)
    await client.recv(resp)
    np.testing.assert_array_equal(resp, msg)
Example #23
0
async def test_send_recv_error(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    async def say_hey_server(ep):
        await ep.send(bytearray(b"Hey"))

    listener = ucp.create_listener(say_hey_server)
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)

    msg = bytearray(100)
    with pytest.raises(
            ucp.exceptions.UCXError,
            match=r"length mismatch: 3 \(got\) != 100 \(expected\)"):
        await client.recv(msg)
Example #24
0
async def echo_pair(cuda_info=None):
    ucp.init()
    loop = asyncio.get_event_loop()
    listener = ucp.start_listener(ucp.make_server(cuda_info),
                                  is_coroutine=True)
    t = loop.create_task(listener.coroutine)
    address = ucp.get_address()
    client = await ucp.get_endpoint(address.encode(), listener.port)
    try:
        yield listener, client
    finally:
        ucp.destroy_ep(client)
        await t
        ucp.fin()
Example #25
0
async def test_send_recv_obj(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    async def echo_obj_server(ep):
        obj = await ep.recv_obj()
        await ep.send_obj(obj)

    listener = ucp.create_listener(echo_obj_server)
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)

    msg = bytearray(b"hello")
    await client.send_obj(msg)
    got = await client.recv_obj()
    assert msg == got
Example #26
0
async def test_send_recv_numba(size, dtype, blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)
    cuda = pytest.importorskip("numba.cuda")

    ary = np.arange(size, dtype=dtype)
    msg = cuda.to_device(ary)
    msg_size = np.array([msg.nbytes], dtype=np.uint64)
    listener = ucp.create_listener(
        make_echo_server(lambda n: cuda.device_array((n, ), dtype=np.uint8)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = cuda.device_array_like(msg)
    await client.recv(resp)
    np.testing.assert_array_equal(np.array(resp), np.array(msg))
Example #27
0
def initialize(
    create_cuda_context=True,
    enable_tcp_over_ucx=False,
    enable_infiniband=False,
    enable_nvlink=False,
    net_devices="",
):
    if create_cuda_context:
        try:
            numba.cuda.current_context()
        except Exception:
            logger.error("Unable to start CUDA Context", exc_info=True)

    if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
        try:
            import ucp
        except ImportError:
            logger.error(
                "UCX protocol requested but ucp module is not available",
                exc_info=True)
        else:
            options = {}
            if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
                tls = "tcp,sockcm,cuda_copy"
                tls_priority = "sockcm"

                if enable_infiniband:
                    tls = "rc," + tls
                if enable_nvlink:
                    tls = tls + ",cuda_ipc"

                options = {"TLS": tls, "SOCKADDR_TLS_PRIORITY": tls_priority}

                if net_devices is not None and net_devices != "":
                    options["NET_DEVICES"] = net_devices

            ucp.reset()
            ucp.init(options=options)

            ucx_env = {}
            for k, v in ucp.get_config().items():
                # Skip values that aren't actual environment variables (i.e., not strings)
                if isinstance(v, str):
                    ucx_env["UCX_" + k] = v

            # Set also UCX environment variables: required by Dask client. It may be best ti
            # to have the client asking the scheduler for the proper variables.
            os.environ.update(ucx_env)
Example #28
0
async def test_send_recv_obj_numpy(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    allocator = functools.partial(np.empty, dtype=np.uint8)

    async def echo_obj_server(ep):
        obj = await ep.recv_obj(allocator=allocator)
        await ep.send_obj(obj)

    listener = ucp.create_listener(echo_obj_server)
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)

    msg = bytearray(b"hello")
    await client.send_obj(msg)
    got = await client.recv_obj(allocator=allocator)
    assert msg == got
Example #29
0
async def test_send_recv_cupy(size, dtype, blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)
    cupy = pytest.importorskip("cupy")

    msg = cupy.arange(size, dtype=dtype)
    msg_size = np.array([msg.nbytes], dtype=np.uint64)

    listener = ucp.create_listener(
        make_echo_server(lambda n: cupy.empty((n, ), dtype=np.uint8)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = cupy.empty_like(msg)
    await client.recv(resp)
    np.testing.assert_array_equal(cupy.asnumpy(resp), cupy.asnumpy(msg))
Example #30
0
async def test_send_recv_error(blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    async def say_hey_server(ep):
        await ep.send(bytearray(b"Hey"))

    listener = ucp.create_listener(say_hey_server)
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)

    msg = bytearray(100)
    with pytest.raises(
            ucp.exceptions.UCXError,
            match=r"length mismatch: 3 \(got\) != 100 \(expected\)"):
        await client.recv(msg)
    await client.close()
    listener.close()
    del client
    assert listener.closed() is True
    del listener