Example #1
0
def test_nbytes():
    multi_dim = np.ones(shape=(10, 10))
    scalar = np.array(1)

    assert nbytes(scalar) == scalar.nbytes
    assert nbytes(multi_dim) == multi_dim.nbytes

    assert nbytes(memoryview(scalar)) == scalar.nbytes
    assert nbytes(memoryview(multi_dim)) == multi_dim.nbytes
Example #2
0
    async def write(
        self,
        msg: dict,
        serializers=("cuda", "dask", "pickle", "error"),
        on_error: str = "message",
    ):
        with log_errors():
            if self.closed():
                raise CommClosedError(
                    "Endpoint is closed -- unable to send message")
            try:
                if serializers is None:
                    serializers = ("cuda", "dask", "pickle", "error")
                # msg can also be a list of dicts when sending batched messages
                frames = await to_frames(
                    msg,
                    serializers=serializers,
                    on_error=on_error,
                    allow_offload=self.allow_offload,
                )
                nframes = len(frames)
                cuda_frames = tuple(
                    hasattr(f, "__cuda_array_interface__") for f in frames)
                sizes = tuple(nbytes(f) for f in frames)
                cuda_send_frames, send_frames = zip(
                    *((is_cuda, each_frame)
                      for is_cuda, each_frame in zip(cuda_frames, frames)
                      if nbytes(each_frame) > 0))

                # Send meta data

                # Send close flag and number of frames (_Bool, int64)
                await self.ep.send(struct.pack("?Q", False, nframes))
                # Send which frames are CUDA (bool) and
                # how large each frame is (uint64)
                await self.ep.send(
                    struct.pack(nframes * "?" + nframes * "Q", *cuda_frames,
                                *sizes))

                # Send frames

                # It is necessary to first synchronize the default stream before start
                # sending We synchronize the default stream because UCX is not
                # stream-ordered and syncing the default stream will wait for other
                # non-blocking CUDA streams. Note this is only sufficient if the memory
                # being sent is not currently in use on non-blocking CUDA streams.
                if any(cuda_send_frames):
                    synchronize_stream(0)

                for each_frame in send_frames:
                    await self.ep.send(each_frame)
                return sum(sizes)
            except (ucp.exceptions.UCXBaseException):
                self.abort()
                raise CommClosedError(
                    "While writing, the connection was closed")
Example #3
0
async def send(ep, frames):
    await ep.send(np.array([len(frames)], dtype=np.uint64))
    await ep.send(
        np.array([hasattr(f, "__cuda_array_interface__") for f in frames],
                 dtype=np.bool))
    await ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
    # Send frames
    for frame in frames:
        if nbytes(frame) > 0:
            await ep.send(frame)
Example #4
0
async def send(ep, frames):
    pytest.importorskip("distributed")
    from distributed.utils import nbytes

    await ep.send(np.array([len(frames)], dtype=np.uint64))
    await ep.send(
        np.array([hasattr(f, "__cuda_array_interface__") for f in frames],
                 dtype=np.bool))
    await ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
    # Send frames
    for frame in frames:
        if nbytes(frame) > 0:
            await ep.send(frame)
Example #5
0
def client(env, port, func):
    # wait for server to come up
    # receive cudf object
    # deserialize
    # assert deserialized msg is cdf
    # send receipt

    os.environ.update(env)
    before_rx, before_tx = total_nvlink_transfer()

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port)
        import cupy as cp

        cp.cuda.set_allocator(None)

        for i in range(ITERATIONS):
            bytes_used = pynvml.nvmlDeviceGetMemoryInfo(
                pynvml.nvmlDeviceGetHandleByIndex(0)).used
            print("Bytes Used:", bytes_used, i)

            frames, msg = await recv(ep)

            # Send meta data
            await send(ep, frames)

        close_msg = b"shutdown listener"
        close_msg_size = np.array([len(close_msg)], dtype=np.uint64)

        await ep.send(close_msg_size)
        await ep.send(close_msg)

        print("Shutting Down Client...")
        return msg["data"]

    rx_cuda_obj = asyncio.get_event_loop().run_until_complete(read())

    num_bytes = nbytes(rx_cuda_obj)
    print(f"TOTAL DATA RECEIVED: {num_bytes}")
    # nvlink only measures in KBs
    if num_bytes > 90000:
        rx, tx = total_nvlink_transfer()
        msg = f"RX BEFORE SEND: {before_rx} -- RX AFTER SEND: {rx} \
               -- TOTAL DATA: {num_bytes}"

        print(msg)
        assert rx > before_rx

    cuda_obj_generator = cloudpickle.loads(func)
    pure_cuda_obj = cuda_obj_generator()

    from cudf.tests.utils import assert_eq
    import cupy as cp

    if isinstance(rx_cuda_obj, cp.ndarray):
        cp.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
    else:
        assert_eq(rx_cuda_obj, pure_cuda_obj)
Example #6
0
        async def write(self, cdf):
            header, _frames = cdf.serialize()
            frames = [pickle.dumps(header)] + _frames

            # Send meta data
            await self.ep.send(np.array([len(frames)], dtype=np.uint64))
            await self.ep.send(
                np.array(
                    [hasattr(f, "__cuda_array_interface__") for f in frames],
                    dtype=np.bool,
                )
            )
            await self.ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
            # Send frames
            for frame in frames:
                if nbytes(frame) > 0:
                    await self.ep.send(frame)
Example #7
0
def client(port, func, comm_api):
    # wait for server to come up
    # receive cudf object
    # deserialize
    # assert deserialized msg is cdf
    # send receipt
    from distributed.utils import nbytes

    ucp.init()

    if comm_api == "am":
        register_am_allocators()

    # must create context before importing
    # cudf/cupy/etc

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port)
        msg = None
        import cupy

        cupy.cuda.set_allocator(None)
        for i in range(ITERATIONS):
            if comm_api == "tag":
                frames, msg = await recv(ep)
            else:
                frames, msg = await am_recv(ep)

        close_msg = b"shutdown listener"

        if comm_api == "tag":
            close_msg_size = np.array([len(close_msg)], dtype=np.uint64)

            await ep.send(close_msg_size)
            await ep.send(close_msg)
        else:
            await ep.am_send(close_msg)

        print("Shutting Down Client...")
        return msg["data"]

    rx_cuda_obj = asyncio.get_event_loop().run_until_complete(read())
    rx_cuda_obj + rx_cuda_obj
    num_bytes = nbytes(rx_cuda_obj)
    print(f"TOTAL DATA RECEIVED: {num_bytes}")

    cuda_obj_generator = cloudpickle.loads(func)
    pure_cuda_obj = cuda_obj_generator()

    if isinstance(rx_cuda_obj, cupy.ndarray):
        cupy.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
    else:
        from cudf.testing._utils import assert_eq

        assert_eq(rx_cuda_obj, pure_cuda_obj)
Example #8
0
    async def read(self, deserializers=("cuda", "dask", "pickle", "error")):
        with log_errors():
            if deserializers is None:
                deserializers = ("cuda", "dask", "pickle", "error")

            try:
                # Recv meta data

                # Recv close flag and number of frames (_Bool, int64)
                msg = host_array(struct.calcsize("?Q"))
                await self.ep.recv(msg)
                (shutdown, nframes) = struct.unpack("?Q", msg)

                if shutdown:  # The writer is closing the connection
                    raise CommClosedError("Connection closed by writer")

                # Recv which frames are CUDA (bool) and
                # how large each frame is (uint64)
                header_fmt = nframes * "?" + nframes * "Q"
                header = host_array(struct.calcsize(header_fmt))
                await self.ep.recv(header)
                header = struct.unpack(header_fmt, header)
                cuda_frames, sizes = header[:nframes], header[nframes:]
            except (
                    ucp.exceptions.UCXCloseError,
                    ucp.exceptions.UCXCanceled,
            ) + (getattr(ucp.exceptions, "UCXConnectionReset", ()), ):
                self.abort()
                raise CommClosedError("Connection closed by writer")
            else:
                # Recv frames
                frames = [
                    device_array(each_size)
                    if is_cuda else host_array(each_size)
                    for is_cuda, each_size in zip(cuda_frames, sizes)
                ]
                cuda_recv_frames, recv_frames = zip(
                    *((is_cuda, each_frame)
                      for is_cuda, each_frame in zip(cuda_frames, frames)
                      if nbytes(each_frame) > 0))

                # It is necessary to first populate `frames` with CUDA arrays and synchronize
                # the default stream before starting receiving to ensure buffers have been allocated
                if any(cuda_recv_frames):
                    synchronize_stream(0)

                for each_frame in recv_frames:
                    await self.ep.recv(each_frame)
                msg = await from_frames(
                    frames,
                    deserialize=self.deserialize,
                    deserializers=deserializers,
                    allow_offload=self.allow_offload,
                )
                return msg
Example #9
0
        async def write(ep):
            import cupy

            cupy.cuda.set_allocator(None)

            print("CREATING CUDA OBJECT IN SERVER...")
            cuda_obj_generator = cloudpickle.loads(func)
            cuda_obj = cuda_obj_generator()
            msg = {"data": to_serialize(cuda_obj)}
            frames = await to_frames(msg,
                                     serializers=("cuda", "dask", "pickle"))
            for i in range(ITERATIONS):
                # Send meta data
                await ep.send(np.array([len(frames)], dtype=np.uint64))
                await ep.send(
                    np.array(
                        [
                            hasattr(f, "__cuda_array_interface__")
                            for f in frames
                        ],
                        dtype=np.bool,
                    ))
                await ep.send(
                    np.array([nbytes(f) for f in frames], dtype=np.uint64))
                # Send frames
                for frame in frames:
                    if nbytes(frame) > 0:
                        await ep.send(frame)

            print("CONFIRM RECEIPT")
            close_msg = b"shutdown listener"
            msg_size = np.empty(1, dtype=np.uint64)
            await ep.recv(msg_size)

            msg = np.empty(msg_size[0], dtype=np.uint8)
            await ep.recv(msg)
            recv_msg = msg.tobytes()
            assert recv_msg == close_msg
            print("Shutting Down Server...")
            await ep.close()
            lf.close()
Example #10
0
 def check(obj, expected):
     assert nbytes(obj) == expected
     assert nbytes(memoryview(obj)) == expected
Example #11
0
def client(env, port, func):
    # wait for server to come up
    # receive cudf object
    # deserialize
    # assert deserialized msg is cdf
    # send receipt

    ucp.reset()
    os.environ.update(env)
    ucp.init()

    # must create context before importing
    # cudf/cupy/etc
    before_rx, before_tx = total_nvlink_transfer()

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port)
        msg = None
        import cupy

        cupy.cuda.set_allocator(None)
        for i in range(ITERATIONS):
            # storing cu objects in msg
            # we delete to minimize GPU memory usage
            # del msg
            try:
                # Recv meta data
                nframes = np.empty(1, dtype=np.uint64)
                await ep.recv(nframes)
                is_cudas = np.empty(nframes[0], dtype=np.bool)
                await ep.recv(is_cudas)
                sizes = np.empty(nframes[0], dtype=np.uint64)
                await ep.recv(sizes)
            except (ucp.exceptions.UCXCanceled,
                    ucp.exceptions.UCXCloseError) as e:
                msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
                raise e(msg)
            else:
                # Recv frames
                frames = []
                for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()):
                    if size > 0:
                        if is_cuda:
                            frame = cuda_array(size)
                        else:
                            frame = np.empty(size, dtype=np.uint8)
                        await ep.recv(frame)
                        frames.append(frame)
                    else:
                        if is_cuda:
                            frames.append(cuda_array(size))
                        else:
                            frames.append(b"")

            msg = await from_frames(frames)

        close_msg = b"shutdown listener"
        close_msg_size = np.array([len(close_msg)], dtype=np.uint64)

        await ep.send(close_msg_size)
        await ep.send(close_msg)

        print("Shutting Down Client...")
        return msg["data"]

    rx_cuda_obj = asyncio.get_event_loop().run_until_complete(read())
    rx_cuda_obj + rx_cuda_obj
    num_bytes = nbytes(rx_cuda_obj)
    print(f"TOTAL DATA RECEIVED: {num_bytes}")
    # nvlink only measures in KBs
    if num_bytes > 90000:
        rx, tx = total_nvlink_transfer()
        msg = f"RX BEFORE SEND: {before_rx} -- RX AFTER SEND: {rx} \
               -- TOTAL DATA: {num_bytes}"

        print(msg)
        assert rx > before_rx

    cuda_obj_generator = cloudpickle.loads(func)
    pure_cuda_obj = cuda_obj_generator()

    if isinstance(rx_cuda_obj, cupy.ndarray):
        cupy.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
    else:
        cudf.tests.utils.assert_eq(rx_cuda_obj, pure_cuda_obj)
Example #12
0
 def check(obj, expected):
     assert nbytes(obj) == expected
     assert nbytes(memoryview(obj)) == expected
Example #13
0
    async def write(self, msg, serializers=None, on_error="message"):
        stream = self.stream
        if stream is None:
            raise CommClosedError()

        frames = await to_frames(
            msg,
            allow_offload=self.allow_offload,
            serializers=serializers,
            on_error=on_error,
            context={
                "sender": self.local_info,
                "recipient": self.remote_info,
                **self.handshake_options,
            },
            frame_split_size=self.max_shard_size,
        )
        frames_nbytes = [nbytes(f) for f in frames]
        frames_nbytes_total = sum(frames_nbytes)

        header = pack_frames_prelude(frames)
        header = struct.pack("Q",
                             nbytes(header) + frames_nbytes_total) + header

        frames = [header, *frames]
        frames_nbytes = [nbytes(header), *frames_nbytes]
        frames_nbytes_total += frames_nbytes[0]

        if frames_nbytes_total < 2**17:  # 128kiB
            # small enough, send in one go
            frames = [b"".join(frames)]
            frames_nbytes = [frames_nbytes_total]

        try:
            # trick to enque all frames for writing beforehand
            for each_frame_nbytes, each_frame in zip(frames_nbytes, frames):
                if each_frame_nbytes:
                    if stream._write_buffer is None:
                        raise StreamClosedError()

                    if isinstance(each_frame, memoryview):
                        # Make sure that `len(data) == data.nbytes`
                        # See <https://github.com/tornadoweb/tornado/pull/2996>
                        each_frame = memoryview(each_frame).cast("B")

                    stream._write_buffer.append(each_frame)
                    stream._total_write_index += each_frame_nbytes

            # start writing frames
            stream.write(b"")
        except StreamClosedError as e:
            self.stream = None
            self._closed = True
            if not sys.is_finalizing():
                convert_stream_closed_error(self, e)
        except Exception:
            # Some OSError or a another "low-level" exception. We do not really know
            # what was already written to the underlying socket, so it is not even safe
            # to retry here using the same stream. The only safe thing to do is to
            # abort. (See also GitHub #4133).
            self.abort()
            raise

        return frames_nbytes_total