def arange(ctx, *args, **kwargs): ''' ''' start = 0.0 step = 1.0 if len(args) == 1: stop = args[0] elif len(args) == 2: start = args[0] stop = args[1] elif len(args) == 3: start = args[0] stop = args[1] step = args[2] else: raise Exception("wrong number of arguments expected between 2-4 (got %i)" % (len(args) + 1)) size = int(math.ceil((stop - start) / float(step))) ctype = kwargs.get('ctype', 'f') queue = kwargs.get('queue', None) if queue is None: queue = cl.Queue(ctx) arr = empty(ctx, [size], ctype=ctype, queue=queue) _arange(queue, arr, start, step) return arr
def __init__(self, context, queue=None): self._context = context self.events = [] if queue is None: queue = cl.Queue(context) self.queue = queue
def main(): ctx = cl.Context() a = cl.empty(ctx, [256], cly.float2) queue = cl.Queue(ctx) generate_sin(queue, a) with a.map(queue) as view: array = np.asarray(view) print array
def linspace(ctx, start, stop, num=50, ctype='f', queue=None): ''' ''' if queue is None: queue = cl.Queue(ctx) arr = empty(ctx, [num], ctype=ctype, queue=queue) _linspace(queue, arr, float(start), float(stop)) return arr
def test_broadcast_0D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) one = cl.from_host(ctx, c_int(1)) a = cl.broadcast(one, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, 0)) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, 0))
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) npa = np.arange(1.0 * 12.0, dtype=c_float) a = ca.arange(ctx, 12, ctype=c_float) out = ca.empty_like(a[:]) output = cl.broadcast(out, a[:].shape) ca.blitz(queue, lambda: a[:] + a[:] + 1, out=output) print npa[1:] + npa[:-1] with out.map() as view: print view
def initialize(): global coords_dev, n_vertices ctx = cl.gl.context() coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=float2) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) queue = cl.Queue(ctx) with cl.gl.acquire(queue, coords_dev): generate_sin(queue, coords_dev) glEnableClientState(GL_VERTEX_ARRAY)
def main(): ctx = cl.Context(device_type=cl.Device.GPU) ret = cl.empty(ctx, [16], 'l') queue = cl.Queue(ctx) print setslice.compile(ctx, a=cl.global_memory('l'), value=c_int, source_only=True) # print setslice(queue, ret[::2], c_int(6)) # print setslice(queue, ret[1::2], c_int(5)) with ret.map(queue) as foo: print np.asarray(foo)
def test_broadcast_2D(self): with self.assertRaises(TypeError): cl.broadcast(None, [1]) npa = np.arange(10, dtype=c_float) z = np.zeros([10, 1]) ten = cl.from_host(ctx, npa) a = cl.broadcast(ten, [10, 10]) self.assertEqual(a.shape, (10, 10)) self.assertEqual(a.strides, (0, sizeof(c_float))) queue = cl.Queue(ctx) with a.map(queue) as view: b = np.asarray(view) self.assertEqual(b.shape, (10, 10)) self.assertEqual(b.strides, (0, sizeof(c_float))) self.assertTrue(np.all(b == z + npa))
def test_from_host_no_copy(self): a = np.array([[1, 2], [3, 4]]) refcount = sys.getrefcount(a) clmem = cl.from_host(ctx, a, copy=False) # event = PyEvent() # def set_event(mem): # event.set() # clmem.add_destructor_callback(set_event) self.assertEqual(refcount + 1, sys.getrefcount(a)) del clmem gc.collect() # self.assertTrue(event.wait(1), 'event timed out. destructor_callback not called') self.assertEqual(refcount, sys.getrefcount(a)) clmem = cl.from_host(ctx, a, copy=False) view_a = memoryview(a) self.assertEqual(clmem.format, view_a.format) self.assertEqual(clmem.shape, view_a.shape) self.assertEqual(clmem.strides, view_a.strides) queue = cl.Queue(ctx) if queue.device.host_unified_memory: a[0, 0] = 100 with clmem.map(queue) as view: b = np.asarray(view) self.assertEqual(b[0, 0], 100) else: #TODO: should there be a test here? pass
def __call__(self, queue_or_context, *args, **kwargs): ''' Call this kernel as a function. :param queue_or_context: a queue or context. if this is a context a queue is created and finish is called before return. :return: an OpenCL event. ''' if isinstance(queue_or_context, cl.Context): queue = cl.Queue(queue_or_context) else: queue = queue_or_context argnames = self.func.func_code.co_varnames[:self.func.func_code.co_argcount] defaults = self.func.func_defaults kwargs_ = kwargs.copy() kwargs_.pop('global_work_size', None) kwargs_.pop('global_work_offset', None) kwargs_.pop('local_work_size', None) arglist = cl.kernel.parse_args(self.func.__name__, args, kwargs_, argnames, defaults) kwarg_types = {argnames[i]:typeof(queue.context, arglist[i]) for i in range(len(argnames))} cl_kernel = self.compile(queue.context, **kwarg_types) kernel_args = self._unpack(argnames, arglist, kwarg_types) event = self.run_kernel(cl_kernel, queue, kernel_args, kwargs) #FIXME: I don't like that this breaks encapsulation if isinstance(event, EventRecord): event.set_kernel_args(kernel_args) if isinstance(queue_or_context, cl.Context): queue.finish() return event
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program( ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory( 'f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print(arr - (a + b)).sum()
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) host_init = np.arange(8, dtype=c_float) + 1 device_input = cl.from_host(ctx, host_init) output = ca.reduce(queue, lambda a, b: a + b, device_input) print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "data:", host_init print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "host sum:", host_init.sum() with output.map(queue) as view: print "device sum:", np.asarray(view).item() output = ca.reduce(queue, lambda a, b: a * b, device_input, initial=1.0) print "host product:", host_init.prod() with output.map(queue) as view: print "device product:", np.asarray(view).item()
def initialize(): global generate_sin, coords_dev, n_vertices ctx = cl.gl.context() if generate_sin is None: program = cl.Program(ctx, generate_sin_source).build() generate_sin = program.generate_sin generate_sin.argnames = 'a', generate_sin.argtypes = cl.global_memory(cl.cl_float2), generate_sin.global_work_size = lambda a: a.shape coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=cl.cl_float2) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) queue = cl.Queue(ctx) with cl.gl.acquire(queue, coords_dev): generate_sin(queue, coords_dev) glEnableClientState(GL_VERTEX_ARRAY)
gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 8 peaks y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(y) queue = cl.Queue(ctx) a = cl.empty(ctx, [200], cly.float2) event = generate_sin(queue, a) event.wait() print a with a.map(queue) as view: print np.asarray(view) #=============================================================================== # Plotting #=============================================================================== from maka import roo
def __init__(self, *args, **kwargs): cl.Context.__init__(self, *args, **kwargs) self._queue = cl.Queue(self)