def test_take_put(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) for n in [5, 17, 333]: one_field_size = 8 buf_gpu = cl_array.zeros(queue, n * one_field_size, dtype=np.float32) dest_indices = cl_array.to_device(queue, np.array([0, 1, 2, 3, 32, 33, 34, 35], dtype=np.uint32)) read_map = cl_array.to_device(queue, np.array([7, 6, 5, 4, 3, 2, 1, 0], dtype=np.uint32)) cl_array.multi_take_put( arrays=[buf_gpu for i in range(n)], dest_indices=dest_indices, src_indices=read_map, src_offsets=[i * one_field_size for i in range(n)], dest_shape=(96,), )
def test_take_put(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) for n in [5, 17, 333]: one_field_size = 8 buf_gpu = cl_array.zeros(queue, n * one_field_size, dtype=np.float32) dest_indices = cl_array.to_device( queue, np.array([0, 1, 2, 3, 32, 33, 34, 35], dtype=np.uint32)) read_map = cl_array.to_device( queue, np.array([7, 6, 5, 4, 3, 2, 1, 0], dtype=np.uint32)) cl_array.multi_take_put( arrays=[buf_gpu for i in range(n)], dest_indices=dest_indices, src_indices=read_map, src_offsets=[i * one_field_size for i in range(n)], dest_shape=(96, ))