Esempio n. 1
0
    def copy_to_device(self, ary, stream=0):
        """Copy `ary` to `self`.

        If `ary` is a CUDA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.
        """
        if ary.size == 0:
            # Nothing to do
            return

        sentry_contiguous(self)
        stream = self._default_stream(stream)

        self_core, ary_core = array_core(self), array_core(ary)
        if _driver.is_device_memory(ary):
            sentry_contiguous(ary)
            check_array_compatibility(self_core, ary_core)
            _driver.device_to_device(self, ary, self.alloc_size, stream=stream)
        else:
            # Ensure same contiguity. Only makes a host-side copy if necessary
            # (i.e., in order to materialize a writable strided view)
            ary_core = np.array(
                ary_core,
                order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
                subok=True,
                copy=not ary_core.flags['WRITEABLE'])
            check_array_compatibility(self_core, ary_core)
            _driver.host_to_device(self,
                                   ary_core,
                                   self.alloc_size,
                                   stream=stream)
Esempio n. 2
0
def auto_device(obj, stream=0, copy=True):
    """
    Create a DeviceRecord or DeviceArray like obj and optionally copy data from
    host to device. If obj already represents device memory, it is returned and
    no copy is made.
    """
    if _driver.is_device_memory(obj):
        return obj, False
    elif hasattr(obj, '__cuda_array_interface__'):
        return numba.cuda.as_cuda_array(obj), False
    else:
        if isinstance(obj, np.void):
            devobj = from_record_like(obj, stream=stream)
        else:
            # This allows you to pass non-array objects like constants and
            # objects implementing the array interface
            # https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html
            # into this function (with no overhead -- copies -- for `obj`s
            # that are already `ndarray`s.
            obj = np.array(obj, copy=False, subok=True)
            sentry_contiguous(obj)
            devobj = from_array_like(obj, stream=stream)
        if copy:
            devobj.copy_to_device(obj, stream=stream)
        return devobj, True
Esempio n. 3
0
def auto_device(obj, stream=0, copy=True, user_explicit=False):
    """
    Create a DeviceRecord or DeviceArray like obj and optionally copy data from
    host to device. If obj already represents device memory, it is returned and
    no copy is made.
    """
    if _driver.is_device_memory(obj):
        return obj, False
    elif hasattr(obj, '__cuda_array_interface__'):
        return numba.cuda.as_cuda_array(obj), False
    else:
        if isinstance(obj, np.void):
            devobj = from_record_like(obj, stream=stream)
        else:
            # This allows you to pass non-array objects like constants and
            # objects implementing the array interface
            # https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html
            # into this function (with no overhead -- copies -- for `obj`s
            # that are already `ndarray`s.
            obj = np.array(obj, copy=False, subok=True)
            sentry_contiguous(obj)
            devobj = from_array_like(obj, stream=stream)
        if copy:
            if config.CUDA_WARN_ON_IMPLICIT_COPY:
                if (not user_explicit and (not isinstance(obj, DeviceNDArray)
                                           and isinstance(obj, np.ndarray))):
                    msg = ("Host array used in CUDA kernel will incur "
                           "copy overhead to/from device.")
                    warn(NumbaPerformanceWarning(msg))
            devobj.copy_to_device(obj, stream=stream)
        return devobj, True
Esempio n. 4
0
 def _template(self, obj):
     self.assertTrue(driver.is_device_memory(obj))
     driver.require_device_memory(obj)
     if driver.USE_NV_BINDING:
         expected_class = driver.binding.CUdeviceptr
     else:
         expected_class = drvapi.cu_device_ptr
     self.assertTrue(isinstance(obj.device_ctypes_pointer, expected_class))
Esempio n. 5
0
    def _get_params(self):
        params = KernelParams()
        params.blockDimX = self.params.get('blockDimX', 1)
        params.blockDimY = self.params.get('blockDimY', 1)
        params.blockDimZ = self.params.get('blockDimZ', 1)
        params.gridDimX = self.params.get('gridDimX', 1)
        params.gridDimY = self.params.get('gridDimY', 1)
        params.gridDimZ = self.params.get('gridDimZ', 1)
        params.sharedMemBytes = self.params.get('sharedMemBytes', 0)

        if isinstance(self.kernel, AutoJitCUDAKernel):
            kernel = self.kernel.specialize(*self.args)
        elif isinstance(self.kernel, CUDAKernel):
            kernel = self.kernel
        else:
            raise Exception('invalid kernel type "%s"' %
                            type(self.kernel).__name__)
        params.func = kernel._func.get().handle

        retr, kernel_args = [], []
        for t, v in zip(kernel.argument_types, self.args):
            kernel._prepare_args(t, v, 0, retr, kernel_args)

        # TODO: take care of retr after graph launched
        if len(retr):
            raise Exception('host array as kernel node args not supported yet')

        param_vals = []
        for arg in kernel_args:
            if is_device_memory(arg):
                param_vals.append(addressof(device_ctypes_pointer(arg)))
            else:
                param_vals.append(addressof(arg))

        params.kernelParams = (c_void_p * len(param_vals))(
            *param_vals) if len(param_vals) else None
        params.extra = self.params.get('extra', None)
        return params
Esempio n. 6
0
 def _template(self, obj):
     self.assertTrue(driver.is_device_memory(obj))
     driver.require_device_memory(obj)
     self.assertTrue(isinstance(obj.device_ctypes_pointer,
                                drvapi.cu_device_ptr))
Esempio n. 7
0
 def _template(self, obj):
     self.assertTrue(driver.is_device_memory(obj))
     driver.require_device_memory(obj)
     self.assertTrue(isinstance(obj.device_ctypes_pointer,
                                drvapi.cu_device_ptr))
    return args


# Create the list of arguments - we compiled for float32[:], int32, float32[:],
# float32[:]
args = []
args += make_array_args(d_r)
args += [ctypes.c_int(13)]
args += make_array_args(d_x)
args += make_array_args(d_y)

# Make a list of pointers to the arguments
param_vals = []
for arg in args:
    if is_device_memory(arg):
        param_vals.append(addressof(device_ctypes_pointer(arg)))
    else:
        param_vals.append(addressof(arg))

params = (c_void_p * len(param_vals))(*param_vals)

# (see cudadrv.launch_kernel)

# CUresult cuLaunchKernel(CUfunction f,
#                        unsigned int gridDimX,
#                        unsigned int gridDimY,
#                        unsigned int gridDimZ,
#                        unsigned int blockDimX,
#                        unsigned int blockDimY,
#                        unsigned int blockDimZ,