def memalloc(self, nbytes): cp_mp = self._mp.malloc(nbytes) if self._logging: print("Allocated %d bytes at %x" % (nbytes, cp_mp.ptr)) self._allocations[cp_mp.ptr] = cp_mp return MemoryPointer(cuda.current_context(), ctypes.c_uint64(int(cp_mp.ptr)), nbytes, finalizer=self._make_finalizer(cp_mp, nbytes))
def memalloc(self, nbytes): # Allocate from the CuPy pool and wrap the result in a MemoryPointer as # required by Numba. cp_mp = self._mp.malloc(nbytes) if self._logging: print("Allocated %d bytes at %x" % (nbytes, cp_mp.ptr)) self._allocations[cp_mp.ptr] = cp_mp return MemoryPointer( cuda.current_context(), ctypes.c_uint64(int(cp_mp.ptr)), nbytes, finalizer=self._make_finalizer(cp_mp, nbytes) )
def memalloc(self, size): """ Allocate an on-device array from the RMM pool. """ buf = librmm.DeviceBuffer(size=size) ctx = self.context ptr = ctypes.c_uint64(int(buf.ptr)) finalizer = _make_emm_plugin_finalizer(ptr.value, self.allocations) # self.allocations is initialized by the parent, HostOnlyCUDAManager, # and cleared upon context reset, so although we insert into it here # and delete from it in the finalizer, we need not do any other # housekeeping elsewhere. self.allocations[ptr.value] = buf return MemoryPointer(ctx, ptr, size, finalizer=finalizer)
def device_array(shape, dtype=np.float, strides=None, order="C", stream=0): """ device_array(shape, dtype=np.float, strides=None, order='C', stream=0) Allocate an empty Numba device array. Clone of Numba `cuda.device_array`, but uses RMM for device memory management. """ shape, strides, dtype = cuda.api._prepare_shape_strides_dtype( shape, strides, dtype, order) datasize = cuda.driver.memory_size_from_info(shape, strides, dtype.itemsize) buf = librmm.DeviceBuffer(size=datasize, stream=stream) ctx = cuda.current_context() ptr = ctypes.c_uint64(int(buf.ptr)) mem = MemoryPointer(ctx, ptr, datasize, owner=buf) return cuda.cudadrv.devicearray.DeviceNDArray(shape, strides, dtype, gpu_data=mem)
def memalloc(self, size): """ Allocate an on-device array from the RMM pool. """ buf = librmm.DeviceBuffer(size=size) ctx = self.context if config.CUDA_USE_NVIDIA_BINDING: ptr = CUdeviceptr(int(buf.ptr)) else: # expect ctypes bindings in numba ptr = ctypes.c_uint64(int(buf.ptr)) finalizer = _make_emm_plugin_finalizer(int(buf.ptr), self.allocations) # self.allocations is initialized by the parent, HostOnlyCUDAManager, # and cleared upon context reset, so although we insert into it here # and delete from it in the finalizer, we need not do any other # housekeeping elsewhere. self.allocations[int(buf.ptr)] = buf return MemoryPointer(ctx, ptr, size, finalizer=finalizer)
def device_array_from_ptr(ptr, nelem, dtype=np.float, finalizer=None): """ device_array_from_ptr(ptr, size, dtype=np.float, stream=0) Create a Numba device array from a ptr, size, and dtype. """ # Handle Datetime Column if dtype == np.datetime64: dtype = np.dtype("datetime64[ms]") else: dtype = np.dtype(dtype) elemsize = dtype.itemsize datasize = elemsize * nelem shape = (nelem, ) strides = (elemsize, ) # note no finalizer -- freed externally! ctx = cuda.current_context() ptr = ctypes.c_uint64(int(ptr)) mem = MemoryPointer(ctx, ptr, datasize, finalizer=finalizer) return cuda.cudadrv.devicearray.DeviceNDArray(shape, strides, dtype, gpu_data=mem)
def memalloc(self, size): ptr = my_alloc(size) ctx = self.context finalizer = make_finalizer(ptr.value) return MemoryPointer(ctx, ptr, size, finalizer=finalizer)