Example #1
0
def mapped_array(shape,
                 dtype=np.float_,
                 strides=None,
                 order='C',
                 stream=0,
                 portable=False,
                 wc=False):
    """mapped_array(shape, dtype=np.float_, strides=None, order='C', stream=0,
                    portable=False, wc=False)

    Allocate a mapped ndarray with a buffer that is pinned and mapped on
    to the device. Similar to np.empty()

    :param portable: a boolean flag to allow the allocated device memory to be
              usable in multiple devices.
    :param wc: a boolean flag to enable writecombined allocation which is faster
        to write by the host and to read by the device, but slower to
        write by the host and slower to write by the device.
    """
    shape, strides, dtype = prepare_shape_strides_dtype(
        shape, strides, dtype, order)
    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
    buffer = current_context().memhostalloc(bytesize, mapped=True)
    npary = np.ndarray(shape=shape,
                       strides=strides,
                       dtype=dtype,
                       order=order,
                       buffer=buffer)
    mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
    mappedview.device_setup(buffer, stream=stream)
    return mappedview
Example #2
0
def managed_array(shape,
                  dtype=np.float_,
                  strides=None,
                  order='C',
                  stream=0,
                  attach_global=True):
    """managed_array(shape, dtype=np.float_, strides=None, order='C', stream=0,
                     attach_global=True)

    Allocate a np.ndarray with a buffer that is managed.
    Similar to np.empty().

    Managed memory is supported on Linux / x86 and PowerPC, and is considered
    experimental on Windows and Linux / AArch64.

    :param attach_global: A flag indicating whether to attach globally. Global
                          attachment implies that the memory is accessible from
                          any stream on any device. If ``False``, attachment is
                          *host*, and memory is only accessible by devices
                          with Compute Capability 6.0 and later.
    """
    shape, strides, dtype = prepare_shape_strides_dtype(
        shape, strides, dtype, order)
    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
    buffer = current_context().memallocmanaged(bytesize,
                                               attach_global=attach_global)
    npary = np.ndarray(shape=shape,
                       strides=strides,
                       dtype=dtype,
                       order=order,
                       buffer=buffer)
    managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
    managedview.device_setup(buffer, stream=stream)
    return managedview
Example #3
0
    def _do_getitem(self, item, stream=0):
        stream = self._default_stream(stream)
        typ, offset = self.dtype.fields[item]
        newdata = self.gpu_data.view(offset)

        if typ.shape == ():
            if typ.names is not None:
                return DeviceRecord(dtype=typ, stream=stream, gpu_data=newdata)
            else:
                hostary = np.empty(1, dtype=typ)
                _driver.device_to_host(dst=hostary,
                                       src=newdata,
                                       size=typ.itemsize,
                                       stream=stream)
            return hostary[0]
        else:
            shape, strides, dtype = \
                prepare_shape_strides_dtype(typ.shape,
                                            None,
                                            typ.subdtype[0], 'C')
            return DeviceNDArray(shape=shape,
                                 strides=strides,
                                 dtype=dtype,
                                 gpu_data=newdata,
                                 stream=stream)
Example #4
0
def device_array(shape, dtype=np.float_, strides=None, order='C', stream=0):
    """device_array(shape, dtype=np.float_, strides=None, order='C', stream=0)

    Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
    """
    shape, strides, dtype = prepare_shape_strides_dtype(
        shape, strides, dtype, order)
    return devicearray.DeviceNDArray(shape=shape,
                                     strides=strides,
                                     dtype=dtype,
                                     stream=stream)
Example #5
0
def pinned_array(shape, dtype=np.float_, strides=None, order='C'):
    """pinned_array(shape, dtype=np.float_, strides=None, order='C')

    Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
    (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
    """
    shape, strides, dtype = prepare_shape_strides_dtype(
        shape, strides, dtype, order)
    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
    buffer = current_context().memhostalloc(bytesize)
    return np.ndarray(shape=shape,
                      strides=strides,
                      dtype=dtype,
                      order=order,
                      buffer=buffer)
Example #6
0
def from_cuda_array_interface(desc, owner=None, sync=True):
    """Create a DeviceNDArray from a cuda-array-interface description.
    The ``owner`` is the owner of the underlying memory.
    The resulting DeviceNDArray will acquire a reference from it.

    If ``sync`` is ``True``, then the imported stream (if present) will be
    synchronized.
    """
    version = desc.get('version')
    # Mask introduced in version 1
    if 1 <= version:
        mask = desc.get('mask')
        # Would ideally be better to detect if the mask is all valid
        if mask is not None:
            raise NotImplementedError('Masked arrays are not supported')

    shape = desc['shape']
    strides = desc.get('strides')
    dtype = np.dtype(desc['typestr'])

    shape, strides, dtype = prepare_shape_strides_dtype(shape,
                                                        strides,
                                                        dtype,
                                                        order='C')
    size = driver.memory_size_from_info(shape, strides, dtype.itemsize)

    devptr = driver.get_devptr_for_active_ctx(desc['data'][0])
    data = driver.MemoryPointer(current_context(),
                                devptr,
                                size=size,
                                owner=owner)
    stream_ptr = desc.get('stream', None)
    if stream_ptr is not None:
        stream = external_stream(stream_ptr)
        if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
            stream.synchronize()
    else:
        stream = 0  # No "Numba default stream", not the CUDA default stream
    da = devicearray.DeviceNDArray(shape=shape,
                                   strides=strides,
                                   dtype=dtype,
                                   gpu_data=data,
                                   stream=stream)
    return da