def test_host_alloc_mapped(self): ary = cuda.mapped_array(10, dtype=np.uint32) ary.fill(123) self.assertTrue(all(ary == 123)) driver.device_memset(ary, 0, driver.device_memory_size(ary)) self.assertTrue(all(ary == 0)) self.assertTrue(sum(ary != 0) == 0)
def test_host_alloc_pinned(self): ary = cuda.pinned_array(10, dtype=np.uint32) ary.fill(123) self.assertTrue(all(ary == 123)) devary = cuda.to_device(ary) driver.device_memset(devary, 0, driver.device_memory_size(devary)) self.assertTrue(all(ary == 123)) devary.copy_to_host(ary) self.assertTrue(all(ary == 0))
def test_memset(self): dtype = np.dtype('uint32') n = 10 sz = dtype.itemsize * 10 devary = self.context.memalloc(sz) driver.device_memset(devary, 0xab, sz) hstary = np.empty(n, dtype=dtype) driver.device_to_host(hstary, devary, sz) hstary2 = np.array([0xabababab] * n, dtype=np.dtype('uint32')) self.assertTrue(np.all(hstary == hstary2))
def test_host_alloc_driver(self): n = 32 mem = cuda.current_context().memhostalloc(n, mapped=True) dtype = np.dtype(np.uint8) ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype, buffer=mem) magic = 0xab driver.device_memset(mem, magic, n) self.assertTrue(np.all(ary == magic)) ary.fill(n) recv = np.empty_like(ary) driver.device_to_host(recv, mem, ary.size) self.assertTrue(np.all(ary == recv)) self.assertTrue(np.all(recv == n))
def test_host_alloc_driver(self): n = 32 mem = cuda.current_context().memhostalloc(n, mapped=True) dtype = np.dtype(np.uint8) ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype, buffer=mem) magic = 0xAB driver.device_memset(mem, magic, n) self.assertTrue(np.all(ary == magic)) ary.fill(n) recv = np.empty_like(ary) driver.device_to_host(recv, mem, ary.size) self.assertTrue(np.all(ary == recv)) self.assertTrue(np.all(recv == n))
def _test_managed_alloc_driver(self, memory_factor, attach_global=True): # Verify that we can allocate and operate on managed # memory through the CUDA driver interface. total_mem_size = self.get_total_gpu_memory() n_bytes = int(memory_factor * total_mem_size) ctx = cuda.current_context() mem = ctx.memallocmanaged(n_bytes, attach_global=attach_global) dtype = np.dtype(np.uint8) n_elems = n_bytes // dtype.itemsize ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem) magic = 0xab device_memset(mem, magic, n_bytes) ctx.synchronize() # Note that this assertion operates on the CPU, so this # test effectively drives both the CPU and the GPU on # managed memory. self.assertTrue(np.all(ary == magic))
def test_host_alloc_mapped(self): ary = cuda.mapped_array(10, dtype=np.uint32) ary.fill(123) self.assertTrue(all(ary == 123)) driver.device_memset(ary, 0, driver.device_memory_size(ary)) self.assertTrue(all(ary == 0))