def asarray(ctx, other, queue=None, copy=True): if not isinstance(other, cl.DeviceMemoryView): other = cl.from_host(ctx, other, copy=copy) array = CLArray._view_as_this(other) array.__array_init__(ctx, queue) return array
def test_from_host_no_copy(self): a = np.array([[1, 2], [3, 4]]) refcount = sys.getrefcount(a) clmem = cl.from_host(ctx, a, copy=False) # event = PyEvent() # def set_event(mem): # event.set() # clmem.add_destructor_callback(set_event) self.assertEqual(refcount + 1, sys.getrefcount(a)) del clmem gc.collect() # self.assertTrue(event.wait(1), 'event timed out. destructor_callback not called') self.assertEqual(refcount, sys.getrefcount(a)) clmem = cl.from_host(ctx, a, copy=False) view_a = memoryview(a) self.assertEqual(clmem.format, view_a.format) self.assertEqual(clmem.shape, view_a.shape) self.assertEqual(clmem.strides, view_a.strides) queue = cl.Queue(ctx) if queue.device.host_unified_memory: a[0, 0] = 100 with clmem.map(queue) as view: b = np.asarray(view) self.assertEqual(b[0, 0], 100) else: #TODO: should there be a test here? pass
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program( ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory( 'f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print(arr - (a + b)).sum()
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program(ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory('f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print (arr - (a + b)).sum()
def test_broadcast_0D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) one = cl.from_host(ctx, c_int(1)) a = cl.broadcast(one, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, 0)) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, 0))
def __call__(self, x, out=None, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) if out is None: out = cl.empty(queue.context, x.shape, x.format) unary_ufunc_kernel(queue, self.device_func, x, out) array = CLArray._view_as_this(out) array.__array_init__(queue) return array
def reduce(self, context, x, out=None, initial=0.0, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) #output, input, shared, group_size, initial=0.0 size = x.size shared = cl.local_memory(x.ctype, ndim=1, shape=[size]) group_size = size // 2 for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]: if group_size < item: group_size = item // 2 break else: group_size = 512 if out is None: out = cl.empty(queue.context, [1], x.format) kernel = reduce_kernel.compile(queue.context, function=self.device_func, output=cl.global_memory(out.ctype, flat=True), array=cl.global_memory(x.ctype, flat=True), shared=shared, group_size=cl.cl_uint, cly_meta=self.device_func.func_name) max_wgsize = kernel.work_group_size(queue.device) group_size = min(max_wgsize, group_size) kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def test_broadcast_2D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) npa = np.arange(10, dtype=c_float) z = np.zeros([10, 1]) ten = cl.from_host(ctx, npa) a = cl.broadcast(ten, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, sizeof(c_float))) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, sizeof(c_float))) self.assertTrue(np.all(b == z + npa))
def blitz(queue, func, out=None): ''' lets get blitzed! ''' func_ast = decompile_func(func) func_globals = func.func_globals.copy() if func.func_closure: func_globals.update({ name: cell.cell_contents for name, cell in zip(func.func_code.co_freevars, func.func_closure) }) blitzer = BlitzVisitor(func.func_code.co_filename, func_globals) blitzed = ast.Expression(blitzer.visit(func_ast)) blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval') blitzed_func = eval(blitzed_code) blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys())) args = {} for key, var in blitzer.locls.items(): if not isinstance(var, cl.DeviceMemoryView): var = cl.from_host(queue.context, var) args[key] = var shape = broadcast_shapes([var.shape for var in args.values()]) print "shape", shape for key, var in args.items(): args[key] = cl.broadcast(var, shape) print "out, **args", out, args blitz_kernel(queue, blitzed_func, out, **args)
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) host_init = np.arange(8, dtype=c_float) + 1 device_input = cl.from_host(ctx, host_init) output = ca.reduce(queue, lambda a, b: a + b, device_input) print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "data:", host_init print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "host sum:", host_init.sum() with output.map(queue) as view: print "device sum:", np.asarray(view).item() output = ca.reduce(queue, lambda a, b: a * b, device_input, initial=1.0) print "host product:", host_init.prod() with output.map(queue) as view: print "device product:", np.asarray(view).item()
def blitz(queue, func, out=None): ''' lets get blitzed! ''' func_ast = decompile_func(func) func_globals = func.func_globals.copy() if func.func_closure: func_globals.update({name:cell.cell_contents for name, cell in zip(func.func_code.co_freevars, func.func_closure)}) blitzer = BlitzVisitor(func.func_code.co_filename, func_globals) blitzed = ast.Expression(blitzer.visit(func_ast)) blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval') blitzed_func = eval(blitzed_code) blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys())) args = {} for key, var in blitzer.locls.items(): if not isinstance(var, cl.DeviceMemoryView): var = cl.from_host(queue.context, var) args[key] = var shape = broadcast_shapes([var.shape for var in args.values()]) print "shape", shape for key, var in args.items(): args[key] = cl.broadcast(var, shape) print "out, **args", out, args blitz_kernel(queue, blitzed_func, out, **args)