def test_broadcast_0D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) one = cl.from_host(ctx, c_int(1)) a = cl.broadcast(one, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, 0)) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, 0))
def __call__(self, context, x, y, out=None, queue=None): if queue is None: if hasattr(x, 'queue'): queue = x.queue elif hasattr(y, 'queue'): queue = y.queue else: queue = context.queue if not isinstance(x, cl.DeviceMemoryView): x = context.asarray(x) if not isinstance(y, cl.DeviceMemoryView): y = context.asarray(y) if y.queue != queue: queue.enqueue_wait_for_events(y.queue.marker()) if x.queue != queue: queue.enqueue_wait_for_events(x.queue.marker()) new_shape = broadcast_shape(x.shape, y.shape) a = cl.broadcast(x, new_shape) b = cl.broadcast(y, new_shape) if out is None: out = context.empty(shape=new_shape, ctype=x.format, queue=queue) # kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func, # a=cl.global_memory(a.format, flat=True), # b=cl.global_memory(b.format, flat=True), # out=cl.global_memory(out.format, flat=True), source_only=True) kernel = ufunc_kernel.compile(context, function=self.device_func, a=cl.global_memory(a.format, flat=True), b=cl.global_memory(b.format, flat=True), out=cl.global_memory(out.format, flat=True), cly_meta=self.device_func.func_name) kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def __call__(self, context, x, y, out=None, queue=None): if queue is None: if hasattr(x,'queue'): queue = x.queue elif hasattr(y,'queue'): queue = y.queue else: queue = context.queue if not isinstance(x, cl.DeviceMemoryView): x = context.asarray(x) if not isinstance(y, cl.DeviceMemoryView): y = context.asarray(y) if y.queue != queue: queue.enqueue_wait_for_events(y.queue.marker()) if x.queue != queue: queue.enqueue_wait_for_events(x.queue.marker()) new_shape = broadcast_shape(x.shape, y.shape) a = cl.broadcast(x, new_shape) b = cl.broadcast(y, new_shape) if out is None: out = context.empty(shape=new_shape, ctype=x.format, queue=queue) # kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func, # a=cl.global_memory(a.format, flat=True), # b=cl.global_memory(b.format, flat=True), # out=cl.global_memory(out.format, flat=True), source_only=True) kernel = ufunc_kernel.compile(context, function=self.device_func, a=cl.global_memory(a.format, flat=True), b=cl.global_memory(b.format, flat=True), out=cl.global_memory(out.format, flat=True), cly_meta=self.device_func.func_name) kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def test_broadcast_2D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) npa = np.arange(10, dtype=c_float) z = np.zeros([10, 1]) ten = cl.from_host(ctx, npa) a = cl.broadcast(ten, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, sizeof(c_float))) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, sizeof(c_float))) self.assertTrue(np.all(b == z + npa))
def setslice(context, arr, value): if not isinstance(value, cl.DeviceMemoryView): value = context.asarray(value) if value.queue != arr.queue: arr.queue.enqueue_wait_for_events(value.queue.marker()) value = cl.broadcast(value, arr.shape) kernel = setslice_kernel.compile(context, arr=cl.global_memory(arr.format, flat=True), value=cl.global_memory(value.format, flat=True), cly_meta='setslice') return kernel(arr.queue, arr, arr.array_info, value, value.array_info)
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) npa = np.arange(1.0 * 12.0, dtype=c_float) a = ca.arange(ctx, 12, ctype=c_float) out = ca.empty_like(a[:]) output = cl.broadcast(out, a[:].shape) ca.blitz(queue, lambda: a[:] + a[:] + 1, out=output) print npa[1:] + npa[:-1] with out.map() as view: print view
def blitz(queue, func, out=None): ''' lets get blitzed! ''' func_ast = decompile_func(func) func_globals = func.func_globals.copy() if func.func_closure: func_globals.update({ name: cell.cell_contents for name, cell in zip(func.func_code.co_freevars, func.func_closure) }) blitzer = BlitzVisitor(func.func_code.co_filename, func_globals) blitzed = ast.Expression(blitzer.visit(func_ast)) blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval') blitzed_func = eval(blitzed_code) blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys())) args = {} for key, var in blitzer.locls.items(): if not isinstance(var, cl.DeviceMemoryView): var = cl.from_host(queue.context, var) args[key] = var shape = broadcast_shapes([var.shape for var in args.values()]) print "shape", shape for key, var in args.items(): args[key] = cl.broadcast(var, shape) print "out, **args", out, args blitz_kernel(queue, blitzed_func, out, **args)
def blitz(queue, func, out=None): ''' lets get blitzed! ''' func_ast = decompile_func(func) func_globals = func.func_globals.copy() if func.func_closure: func_globals.update({name:cell.cell_contents for name, cell in zip(func.func_code.co_freevars, func.func_closure)}) blitzer = BlitzVisitor(func.func_code.co_filename, func_globals) blitzed = ast.Expression(blitzer.visit(func_ast)) blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval') blitzed_func = eval(blitzed_code) blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys())) args = {} for key, var in blitzer.locls.items(): if not isinstance(var, cl.DeviceMemoryView): var = cl.from_host(queue.context, var) args[key] = var shape = broadcast_shapes([var.shape for var in args.values()]) print "shape", shape for key, var in args.items(): args[key] = cl.broadcast(var, shape) print "out, **args", out, args blitz_kernel(queue, blitzed_func, out, **args)