Exemple #1
0
    def _maybe_transpose(self, d_ary, d_out):
        """Transpose device arrays into row-major format if needed, as cuFFT
        can't handle column-major data."""

        transpose_in = len(d_ary.shape) == 2 and d_ary.is_f_contiguous()
        transpose_out = len(d_out.shape) == 2 and d_out.is_f_contiguous()
        if transpose_in:
            # Create a row-major device array
            used_in = DeviceNDArray(
                shape=(d_ary.shape[1], d_ary.shape[0]),
                strides=(d_ary.dtype.itemsize,
                         d_ary.dtype.itemsize * d_ary.shape[1]),
                dtype=d_ary.dtype)
            transpose(d_ary, used_in)
        else:
            used_in = d_ary
        if transpose_out:
            # Create a row-major device array
            used_out = DeviceNDArray(
                shape=d_out.shape,
                strides=(d_out.dtype.itemsize * d_out.shape[1],
                         d_out.dtype.itemsize),
                dtype=d_out.dtype)
        else:
            used_out = d_out
        return used_in, used_out, transpose_out
def test_pyarrow_memalloc(c, dtype):
    ctx, nb_ctx = context_choices[c]
    size = 10
    arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)

    # wrap CudaBuffer with numba device array
    mem = cbuf.to_numba()
    darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
    np.testing.assert_equal(darr.copy_to_host(), arr)
Exemple #3
0
def test_pyarrow_memalloc(c, dtype):
    ctx, nb_ctx = context_choices[c]
    size = 10
    arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)

    # wrap CudaBuffer with numba device array
    mem = cbuf.to_numba()
    darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
    np.testing.assert_equal(darr.copy_to_host(), arr)
def test_numba_context(c, dtype):
    ctx, nb_ctx = context_choices[c]
    size = 10
    with nb_cuda.gpus[0]:
        arr, cbuf = make_random_buffer(size, target='device',
                                       dtype=dtype, ctx=ctx)
        assert cbuf.context.handle == nb_ctx.handle.value
        mem = cbuf.to_numba()
        darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
        np.testing.assert_equal(darr.copy_to_host(), arr)
        darr[0] = 99
        cbuf.context.synchronize()
        arr2 = np.frombuffer(cbuf.copy_to_host(), dtype=dtype)
        assert arr2[0] == 99
Exemple #5
0
def test_numba_context(c, dtype):
    ctx, nb_ctx = context_choices[c]
    size = 10
    with nb_cuda.gpus[0]:
        arr, cbuf = make_random_buffer(size,
                                       target='device',
                                       dtype=dtype,
                                       ctx=ctx)
        assert cbuf.context.handle == nb_ctx.handle.value
        mem = cbuf.to_numba()
        darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
        np.testing.assert_equal(darr.copy_to_host(), arr)
        darr[0] = 99
        cbuf.context.synchronize()
        arr2 = np.frombuffer(cbuf.copy_to_host(), dtype=dtype)
        assert arr2[0] == 99
Exemple #6
0
def test_numba_memalloc(c, dtype):
    ctx, nb_ctx = context_choices[c]
    dtype = np.dtype(dtype)
    # Allocate memory using numba context
    # Warning: this will not be reflected in pyarrow context manager
    # (e.g bytes_allocated does not change)
    size = 10
    mem = nb_ctx.memalloc(size * dtype.itemsize)
    darr = DeviceNDArray((size, ), (dtype.itemsize, ), dtype, gpu_data=mem)
    darr[:5] = 99
    darr[5:] = 88
    np.testing.assert_equal(darr.copy_to_host()[:5], 99)
    np.testing.assert_equal(darr.copy_to_host()[5:], 88)

    # wrap numba allocated memory with CudaBuffer
    cbuf = cuda.CudaBuffer.from_numba(mem)
    arr2 = np.frombuffer(cbuf.copy_to_host(), dtype=dtype)
    np.testing.assert_equal(arr2, darr.copy_to_host())
def numba_cuda_DeviceNDArray(cbuf):
    """Return numba DeviceNDArray view of a pyarrow.cuda.CudaBuffer.
    """
    import numpy as np
    from numba.cuda.cudadrv.devicearray import DeviceNDArray
    dtype = np.dtype('uint8')
    return DeviceNDArray((cbuf.size, ), (dtype.itemsize, ),
                         dtype,
                         gpu_data=cbuf.to_numba())
def test_numba_memalloc(c, dtype):
    ctx, nb_ctx = context_choices[c]
    dtype = np.dtype(dtype)
    # Allocate memory using numba context
    # Warning: this will not be reflected in pyarrow context manager
    # (e.g bytes_allocated does not change)
    size = 10
    mem = nb_ctx.memalloc(size * dtype.itemsize)
    darr = DeviceNDArray((size,), (dtype.itemsize,), dtype, gpu_data=mem)
    darr[:5] = 99
    darr[5:] = 88
    np.testing.assert_equal(darr.copy_to_host()[:5], 99)
    np.testing.assert_equal(darr.copy_to_host()[5:], 88)

    # wrap numba allocated memory with CudaBuffer
    cbuf = cuda.CudaBuffer.from_numba(mem)
    arr2 = np.frombuffer(cbuf.copy_to_host(), dtype=dtype)
    np.testing.assert_equal(arr2, darr.copy_to_host())
Exemple #9
0
def gpu_view_as(arr, dtype, shape=None, strides=None):
    dtype = np.dtype(dtype)
    if strides is None:
        strides = (arr.strides if arr.dtype == dtype else dtype.itemsize)
    if shape is None:
        shape = (arr.shape if arr.dtype == dtype else arr.size //
                 dtype.itemsize)
    return DeviceNDArray(shape=shape,
                         strides=strides,
                         dtype=dtype,
                         gpu_data=arr.gpu_data)
Exemple #10
0
    def get_column(schema):
        offset = schema['data_buffer']['offset']
        raw_size = schema['data_buffer']['length']
        size = schema['length']
        assert schema['dtype']['bitwidth'] == 32
        assert schema['dtype']['name'] == 'FloatingPoint'
        raw_data_col1 = data_region[offset:offset + raw_size]
        assert raw_data_col1.size == raw_size

        dtype = np.dtype(np.float32)
        itemsize = dtype.itemsize
        ary = DeviceNDArray(shape=(raw_size // itemsize, ),
                            strides=(itemsize, ),
                            dtype=dtype,
                            gpu_data=raw_data_col1.gpu_data)
        hary = ary[:size].copy_to_host()
        return hary
Exemple #11
0
def test_pyarrow_jit(c, dtype):
    ctx, nb_ctx = context_choices[c]

    @nb_cuda.jit
    def increment_by_one(an_array):
        pos = nb_cuda.grid(1)
        if pos < an_array.size:
            an_array[pos] += 1

    # applying numba.cuda kernel to memory hold by CudaBuffer
    size = 10
    arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
    threadsperblock = 32
    blockspergrid = (arr.size + (threadsperblock - 1)) // threadsperblock
    mem = cbuf.to_numba()
    darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
    increment_by_one[blockspergrid, threadsperblock](darr)
    cbuf.context.synchronize()
    arr1 = np.frombuffer(cbuf.copy_to_host(), dtype=arr.dtype)
    np.testing.assert_equal(arr1, arr + 1)