def test_send_recv_cu(cuda_obj_generator, enable_rmm):
    cuda_visible_devices_base = os.environ.get("CUDA_VISIBLE_DEVICES")
    ucx_net_devices_base = os.environ.get("UCX_NET_DEVICES")

    # grab first two devices
    cuda_visible_devices = get_cuda_devices()
    num_workers = len(cuda_visible_devices)

    port = random.randint(13000, 15500)
    # serialize function and send to the client and server
    # server will use the return value of the contents,
    # serialize the values, then send serialized values to client.
    # client will compare return values of the deserialized
    # data sent from the server

    server_env = get_environment_variables(cuda_visible_devices[0])

    func = cloudpickle.dumps(cuda_obj_generator)
    ctx = multiprocessing.get_context("spawn")

    os.environ.update(server_env)
    parent_conn, child_conn = multiprocessing.Pipe()
    server_process = ctx.Process(
        name="server",
        target=server,
        args=[server_env, port, func, enable_rmm, num_workers, child_conn],
    )
    server_process.start()

    server_msg = parent_conn.recv()
    assert server_msg == "initialized"

    client_processes = []
    print(cuda_visible_devices)
    for i in range(num_workers):
        # cudf will ping the driver for validity of device
        # this will influence device on which a cuda context is created.
        # work around is to update env with new CVD before spawning
        client_env = get_environment_variables(cuda_visible_devices[i])
        os.environ.update(client_env)

        proc = ctx.Process(
            name="client_" + str(i),
            target=client,
            args=[client_env, port, func, enable_rmm],
        )
        proc.start()
        client_processes.append(proc)

    # Ensure restoration of environment variables immediately after starting
    # processes, to avoid never restoring them in case of assertion failures below
    restore_environment_variables(cuda_visible_devices_base,
                                  ucx_net_devices_base)

    server_process.join()
    for i in range(len(client_processes)):
        client_processes[i].join()
        assert client_processes[i].exitcode == 0

    assert server_process.exitcode == 0

def empty_dataframe():
    import cudf

    return cudf.DataFrame({"a": [1.0], "b": [1.0]}).head(0)


def cupy_obj():
    import cupy

    size = 10**8
    return cupy.arange(size)


@pytest.mark.skipif(len(get_cuda_devices()) < 2,
                    reason="A minimum of two GPUs is required")
@pytest.mark.parametrize("cuda_obj_generator",
                         [dataframe, empty_dataframe, series, cupy_obj])
@pytest.mark.parametrize("enable_rmm", [True, False])
def test_send_recv_cu(cuda_obj_generator, enable_rmm):
    cuda_visible_devices_base = os.environ.get("CUDA_VISIBLE_DEVICES")
    ucx_net_devices_base = os.environ.get("UCX_NET_DEVICES")

    # grab first two devices
    cuda_visible_devices = get_cuda_devices()
    num_workers = len(cuda_visible_devices)

    port = random.randint(13000, 15500)
    # serialize function and send to the client and server
    # server will use the return value of the contents,