def test_callback(self): self.callback_called = False self.py_event = PyEvent() def callback(event, status): self.callback_called = True self.py_event.set() event = UserEvent(ctx) queue = Queue(ctx, ctx.devices[0]) queue.enqueue_wait_for_events(event) event2 = queue.marker() event2.add_callback(callback) self.assertEqual(event.status, Event.SUBMITTED) self.assertEqual(event2.status, Event.QUEUED) self.assertFalse(self.callback_called) event.complete() self.assertEqual(event.status, Event.COMPLETE) event2.wait() self.assertEqual(event2.status, Event.COMPLETE) event_is_set = self.py_event.wait(2) self.assertTrue(event_is_set, 'timed out waiting for callback') self.assertTrue(self.callback_called)
def test_copy_contig(self): queue = Queue(ctx, ctx.devices[0]) a = np.array([[1, 2], [3, 4]]) clbuf = DeviceMemoryView.from_host(ctx, a) copy_of = clbuf.copy(queue) queue.barrier() with copy_of.map(queue) as cpy: b = np.asarray(cpy) self.assertTrue(np.all(a == b))
def test_dim_reduce(self): queue = Queue(ctx, ctx.devices[0]) a = np.array([[1, 2], [3, 4], [5, 6]]) view = DeviceMemoryView.from_host(ctx, a) new_view = view[:, 0] self.assertEqual(new_view.ndim, a[:, 0].ndim) self.assertEqual(new_view.shape, a[:, 0].shape) self.assertEqual(new_view.offset_, 0) self.assertEqual(new_view.strides, a[:, 0].strides) with new_view.map(queue) as buf: b = np.asarray(buf) self.assertTrue(np.all(b == a[:, 0])) new_view = view[:, 1] with new_view.map(queue) as buf: b = np.asarray(buf) self.assertTrue(np.all(b == a[:, 1])) new_view = view[0, :] with new_view.map(queue) as buf: b = np.asarray(buf) self.assertTrue(np.all(b == a[0, :])) new_view = view[1, :] with new_view.map(queue) as buf: b = np.asarray(buf) self.assertTrue(np.all(b == a[1, :]))
def test_copy_2D(self): queue = Queue(ctx, ctx.devices[0]) a = np.arange(6 * 6).reshape([6, 6]) clbuf = DeviceMemoryView.from_host(ctx, a) slices = [ (slice(None, None, 2), slice(None, None, 2)), (slice(1, None, None), slice(1, None, None)), (slice(None, None, None), slice(1, None, None)), (slice(1, None, None), slice(None, None, None)), (slice(1, None, None), slice(0, None, 2)), (slice(None, None, 2), slice(1, None, 2)), (slice(1, None, 2), slice(None, None, 2)), ] for idx0, idx1 in slices: expected = a[idx0, idx1] sub_buf = clbuf[idx0, idx1] copy_of = sub_buf.copy(queue) with copy_of.map(queue) as cpy: b = np.asarray(cpy) expected = a[idx0, idx1] self.assertTrue(np.all(expected == b), (idx0, idx1))
def test_get_slice(self): queue = Queue(ctx, ctx.devices[0]) a = np.array([1, 2, 3, 4]) clbuf = DeviceMemoryView.from_host(ctx, a) self.assertEqual(clbuf._refcount, 1) new_buf = clbuf[::2] with new_buf.map(queue) as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[::2])) new_buf = clbuf[1::2] with new_buf.map(queue) as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[1::2])) new_buf = clbuf[::-1] with new_buf.map(queue) as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[::-1]))
def test_copy_1D(self): queue = Queue(ctx, ctx.devices[0]) a = np.array([1, 2, 3, 4]) clbuf = DeviceMemoryView.from_host(ctx, a) copy_of = clbuf[::2].copy(queue) with copy_of.map(queue) as cpy: b = np.asarray(cpy) self.assertTrue(np.all(a[::2] == b)) copy_of = clbuf[1::2].copy(queue) with copy_of.map(queue) as cpy: b = np.asarray(cpy) self.assertTrue(np.all(a[1::2] == b)) copy_of = clbuf[1:-1].copy(queue) with copy_of.map(queue) as cpy: b = np.asarray(cpy) self.assertTrue(np.all(a[1:-1] == b))
def test_refcount(self): a = np.array([[1, 2], [3, 4]]) clbuf = DeviceMemoryView.from_host(ctx, a) self.assertEqual(clbuf._refcount, 1) new_buf = clbuf[:, :-1] self.assertEqual(clbuf._refcount, 2) del new_buf gc.collect() self.assertEqual(clbuf._refcount, 1) self.assertEqual(clbuf.base, None) #create sub_buffer new_buf = clbuf[1, :] self.assertEqual(clbuf._refcount, 2) del new_buf gc.collect() self.assertEqual(clbuf._refcount, 1) queue = Queue(ctx) with clbuf.map(queue) as host: self.assertEqual(clbuf._refcount, 1) self.assertEqual(clbuf._refcount, 2, "unmap increments the refcount") del host gc.collect() #GPU may not decrement the ref count #unless finish is called queue.finish() self.assertEqual(clbuf._refcount, 1) event = PyEvent() def callback(mem): event.set()
def test_wait(self): event = UserEvent(ctx) queue = Queue(ctx, ctx.devices[0]) queue.enqueue_wait_for_events(event) event2 = queue.marker() self.assertEqual(event.status, Event.SUBMITTED) self.assertEqual(event2.status, Event.QUEUED) event.complete() self.assertEqual(event.status, Event.COMPLETE) event2.wait() self.assertEqual(event2.status, Event.COMPLETE)
def test_enqueue_native_kernel(self): if not ctx.devices[0].has_native_kernel: self.skipTest("Device does not support native kernels") queue = Queue(ctx, ctx.devices[0]) global foo foo = 0 def incfoo(arg, op=lambda a, b: 0): global foo foo = op(foo, arg) queue.enqueue_native_kernel(incfoo, 4, op=lambda a, b: a + b) queue.enqueue_native_kernel(incfoo, 3, op=lambda a, b: a * b) queue.finish() self.assertEqual(foo, 12)
def test_read_write(self): a = np.array([[1, 2], [3, 4]]) clbuf = DeviceMemoryView.from_host(ctx, a) queue = Queue(ctx, ctx.devices[0]) out = np.zeros_like(a) clbuf.read(queue, out, blocking=True) self.assertTrue(np.all(out == a)) clbuf.write(queue, a + 1, blocking=True) clbuf.read(queue, out, blocking=True) self.assertTrue(np.all(out == a + 1))
def test_map(self): image_format = cl.ImageFormat('CL_RGBA', 'CL_UNSIGNED_INT8') image = cl.empty_image(ctx, [4, 4], image_format) queue = Queue(ctx) with image.map(queue) as img: self.assertEqual(img.format, 'T{B:r:B:g:B:b:B:a:}') self.assertEqual(img.ndim, 2) self.assertEqual(img.shape, (4, 4)) array = np.asarray(img) array['r'] = 1 with image.map(queue) as img: array = np.asarray(img) self.assertTrue(np.all(array['r'] == 1))
def test_set_args(self): program = Program(ctx, source=source) program.build() generate_sin = program.kernel('generate_sin') generate_sin.argtypes = [global_memory(), ctypes.c_float] buf = empty(ctx, [10], ctype=cl.cl_float2) queue = Queue(ctx, ctx.devices[0]) generate_sin.set_args(buf, 1.0) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) expected = np.zeros([10], dtype=[('x', np.float32), ('y', np.float32)]) expected['x'] = np.arange(10) expected['y'] = np.sin(expected['x'] / 10) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1])) generate_sin.argnames = ['a', 'scale'] generate_sin.set_args(a=buf, scale=1.0) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1])) with self.assertRaises(TypeError): generate_sin.set_args(a=buf) generate_sin.__defaults__ = [1.0] generate_sin.set_args(a=buf) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
def test_map(self): a = np.array([[1, 2], [3, 4]]) view_a = memoryview(a) clbuf = DeviceMemoryView.from_host(ctx, a) queue = Queue(ctx, ctx.devices[0]) self.assertEqual(clbuf._mapcount, 0) with clbuf.map(queue, writeable=False) as buf: self.assertEqual(clbuf._mapcount, 1) self.assertEqual(buf.format, view_a.format) self.assertEqual(buf.shape, view_a.shape) self.assertEqual(buf.strides, view_a.strides) b = np.asarray(buf) self.assertTrue(np.all(b == a)) self.assertTrue(buf.readonly) # self.assertEqual(clbuf._mapcount, 0) with clbuf.map(queue, readable=False) as buf: self.assertEqual(clbuf._mapcount, 1) b = np.asarray(buf) b[::] = a[::-1] self.assertFalse(buf.readonly) # self.assertEqual(clbuf._mapcount, 0) with clbuf.map(queue, writeable=False) as buf: self.assertEqual(clbuf._mapcount, 1) b = np.asarray(buf) self.assertTrue(np.all(b == a[::-1]))
def test_getitem(self): queue = Queue(ctx, ctx.devices[0]) a = np.array([[1, 2], [3, 4]]) clbuf = DeviceMemoryView.from_host(ctx, a) with self.assertRaises(IndexError): clbuf[1, 1, 1] self.assertEqual(clbuf._refcount, 1) new_buf = clbuf[:, :-1] self.assertEqual(clbuf._refcount, 2) mapp = new_buf.map(queue) with mapp as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[:, :-1])) del buf del new_buf gc.collect() new_buf = clbuf[:, 1:] with new_buf.map(queue) as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[:, 1:])) new_buf = clbuf[1:, :] with new_buf.map(queue) as buf: b = np.asanyarray(buf) self.assertTrue(np.all(b == a[1:, :]))
def test_enqueue_native_kernel_refcount(self): if not ctx.devices[0].has_native_kernel: self.skipTest("Device does not support native kernels") queue = Queue(ctx, ctx.devices[0]) def incfoo(): pass self.assertEqual(sys.getrefcount(incfoo), 2) e = cl.UserEvent(ctx) queue.enqueue_wait_for_events(e) queue.enqueue_native_kernel(incfoo) self.assertEqual(sys.getrefcount(incfoo), 3) e.complete() queue.finish() self.assertEqual(sys.getrefcount(incfoo), 2)
def test_copy_2D_negative_stride(self): queue = Queue(ctx, ctx.devices[0]) a = np.arange(4 * 4).reshape([4, 4]) clbuf = DeviceMemoryView.from_host(ctx, a) slices = [ (slice(None, None, -2), slice(None, None, -2)), (slice(1, None, -1), slice(1, None, -1)), (slice(None, None, None), slice(1, None, -1)), (slice(1, None, -1), slice(None, None, -1)), (slice(1, None, -2), slice(1, None, -2)), (slice(None, None, -2), slice(1, None, -2)), (slice(1, None, -2), slice(None, None, -2)), ] for idx0, idx1 in slices: copy_of = clbuf[idx0, idx1].copy(queue) with copy_of.map(queue) as cpy: b = np.asarray(cpy) expected = a[idx0, idx1] self.assertTrue(np.all(expected == b))