Ejemplo n.º 1
0
def typeof(ctx, obj):
    if isinstance(obj, cl.MemoryObject):
        return cl.global_memory(obj.ctype,
                                ndim=len(obj.shape),
                                shape=obj.shape,
                                context=ctx)
    elif isinstance(obj, cl.local_memory):
        return obj
    elif isfunction(obj):
        return obj

    elif isinstance(obj, int):
        return ctypes.c_int
    elif isinstance(obj, float):
        return ctypes.c_float
    elif isinstance(obj, ctypes.Structure):
        return cl.constant_memory(type(obj), 0, (), context=ctx)


#        raise NotImplementedError("ctypes.Structure as parameter")
    else:
        try:
            view = memoryview(obj)
            return cl.global_memory(view.format,
                                    ndim=len(view.shape),
                                    shape=view.shape,
                                    context=ctx)
        except TypeError:
            pass

        return type(obj)
Ejemplo n.º 2
0
def setslice(context, arr, value):
    
    if not isinstance(value, cl.DeviceMemoryView):
        value = context.asarray(value)
       
    if value.queue != arr.queue:
        arr.queue.enqueue_wait_for_events(value.queue.marker())
         
    value = cl.broadcast(value, arr.shape)
    
    kernel = setslice_kernel.compile(context, arr=cl.global_memory(arr.format, flat=True),
                                     value=cl.global_memory(value.format, flat=True),
                                     cly_meta='setslice')
    
    return kernel(arr.queue, arr, arr.array_info, value, value.array_info)
Ejemplo n.º 3
0
    def create_grid(cls, nx=500, ny=500):
        ca = CLArrayContext(device_type=cls.DEVICE_TYPE)
        g = Grid(ca, nx, ny)
        
        dx2, dy2 = g.dx ** 2, g.dy ** 2
        dnr_inv = 0.5 / (dx2 + dy2)
          
        #self.ctx = cl.create_some_context()
    
        g.prg = cl.Program(ca, """
        __kernel void lp2dstep( __global float *u, const uint stidx )
        {          
            int i = get_global_id(0) + 1;
            int ny = %d;
        
            for ( int j = 1 + ( ( i + stidx ) %% 2 ); j<( %d-1 ); j+=2 ) {
                u[ny*j + i] = ((u[ny*(j-1) + i] + u[ny*(j+1) + i])*%g +
                                     (u[ny*j + i-1] + u[ny*j + i + 1])*%g)*%g;
            }
        }""" % (ny, ny, dy2, dx2, dnr_inv))
        
                        
        g.prg.build()

        g.lp2dstep = g.prg.lp2dstep
        
        g.lp2dstep.argnames = 'u', 'stidx'
        g.lp2dstep.argtypes = cl.global_memory(ctype='f'), cl.cl_uint
        g.lp2dstep.global_work_size = [nx - 2]
        
        g.queue = cl.Queue(ca)
        
        return g
Ejemplo n.º 4
0
    def test_call(self):

        expected = np.zeros([10], dtype=[('x', np.float32), ('y', np.float32)])
        expected['x'] = np.arange(10)
        expected['y'] = np.sin(expected['x'] / 10)
        
        program = Program(ctx, source=source)
        
        program.build()
        
        generate_sin = program.kernel('generate_sin')
        
        generate_sin.argtypes = [global_memory(), ctypes.c_float]
        
        buf = empty(ctx, [10], ctype=cl.cl_float2)
        
        queue = Queue(ctx, ctx.devices[0])
        
        size = [buf.size]
        with self.assertRaises(TypeError):
            generate_sin(queue, buf, 1.0)
        
        generate_sin(queue, buf, 1.0, global_work_size=size)
        
        with buf.map(queue) as host:
            self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0]))
            self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))

        generate_sin.global_work_size = lambda a, scale: [a.size]
        
        generate_sin(queue, buf, 1.0)
        
        with buf.map(queue) as host:
            self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0]))
            self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
Ejemplo n.º 5
0
    def __call__(self, context, x, y, out=None, queue=None):

        if queue is None:
            if hasattr(x, 'queue'):
                queue = x.queue
            elif hasattr(y, 'queue'):
                queue = y.queue
            else:
                queue = context.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = context.asarray(x)
        if not isinstance(y, cl.DeviceMemoryView):
            y = context.asarray(y)

        if y.queue != queue:
            queue.enqueue_wait_for_events(y.queue.marker())
        if x.queue != queue:
            queue.enqueue_wait_for_events(x.queue.marker())

        new_shape = broadcast_shape(x.shape, y.shape)

        a = cl.broadcast(x, new_shape)
        b = cl.broadcast(y, new_shape)

        if out is None:
            out = context.empty(shape=new_shape, ctype=x.format, queue=queue)


#        kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func,
#                                      a=cl.global_memory(a.format, flat=True),
#                                      b=cl.global_memory(b.format, flat=True),
#                                      out=cl.global_memory(out.format, flat=True), source_only=True)

        kernel = ufunc_kernel.compile(context,
                                      function=self.device_func,
                                      a=cl.global_memory(a.format, flat=True),
                                      b=cl.global_memory(b.format, flat=True),
                                      out=cl.global_memory(out.format,
                                                           flat=True),
                                      cly_meta=self.device_func.func_name)

        kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info)

        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 6
0
    def __call__(self, context, x, y, out=None, queue=None):
        
        if queue is None:
            if hasattr(x,'queue'):
                queue = x.queue
            elif hasattr(y,'queue'):
                queue = y.queue
            else:
                queue = context.queue
            
            
        if not isinstance(x, cl.DeviceMemoryView):
            x = context.asarray(x)
        if not isinstance(y, cl.DeviceMemoryView):
            y = context.asarray(y)
        
        if y.queue != queue:
            queue.enqueue_wait_for_events(y.queue.marker())
        if x.queue != queue:
            queue.enqueue_wait_for_events(x.queue.marker())
        
        new_shape = broadcast_shape(x.shape, y.shape)
        
        a = cl.broadcast(x, new_shape)
        b = cl.broadcast(y, new_shape)
        
        if out is None:
            out = context.empty(shape=new_shape, ctype=x.format, queue=queue)
        
#        kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func,
#                                      a=cl.global_memory(a.format, flat=True),
#                                      b=cl.global_memory(b.format, flat=True),
#                                      out=cl.global_memory(out.format, flat=True), source_only=True)

        kernel = ufunc_kernel.compile(context, function=self.device_func,
                                      a=cl.global_memory(a.format, flat=True),
                                      b=cl.global_memory(b.format, flat=True),
                                      out=cl.global_memory(out.format, flat=True), 
                                      cly_meta=self.device_func.func_name)
        

        kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info)
        
        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 7
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):

        if queue is None:
            queue = x.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)

        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])

        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512

        if out is None:
            out = cl.empty(queue.context, [1], x.format)

        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype,
                                                               flat=True),
                                       array=cl.global_memory(x.ctype,
                                                              flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)

        max_wgsize = kernel.work_group_size(queue.device)

        group_size = min(max_wgsize, group_size)

        kernel(queue, out, out.array_info, x, x.array_info, shared,
               shared.local_info, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)

        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 8
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):
        
        if queue is None:
            queue = x.queue
        
        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)
            
        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])
        
        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512
        
        if out is None:
            out = cl.empty(queue.context, [1], x.format)
        
        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype, flat=True),
                                       array=cl.global_memory(x.ctype, flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)
        
        max_wgsize = kernel.work_group_size(queue.device)
        
        group_size = min(max_wgsize, group_size)
        
        kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        
        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 9
0
def main():

    size = 10
    a = np.random.rand(size).astype('f')
    b = np.random.rand(size).astype('f')

    ctx = cl.Context()
    queue = cl.Queue(ctx)

    cla = cl.from_host(ctx, a, copy=True)
    clb = cl.from_host(ctx, b, copy=True)
    clc = cl.empty(ctx, [size], ctype='f')

    prg = cl.Program(
        ctx, """
        __kernel void add(__global const float *a,
        __global const float *b, __global float *c)
        {
          int gid = get_global_id(0);
          c[gid] = a[gid] + b[gid];
        }
        """).build()

    add = prg.add
    add.argtypes = cl.global_memory('f'), cl.global_memory(
        'f'), cl.global_memory('f')
    add.argnames = 'a', 'b', 'c'
    add.global_work_size = lambda a: a.shape

    add(queue, a=cla, b=clb, c=clc)

    with clc.map(queue) as view:
        print "view is a python memoryview object", view

        arr = np.asarray(view)

        print "Answer should be zero:"
        print(arr - (a + b)).sum()
Ejemplo n.º 10
0
def typeof(ctx, obj):
    if isinstance(obj, cl.MemoryObject):
        return cl.global_memory(obj.ctype, ndim=len(obj.shape), shape=obj.shape, context=ctx)
    elif isinstance(obj, cl.local_memory):
        return obj
    elif isfunction(obj):
        return obj
    
    elif isinstance(obj, int):
        return ctypes.c_int
    elif isinstance(obj, float):
        return ctypes.c_float
    elif isinstance(obj, ctypes.Structure):
        return cl.constant_memory(type(obj), 0, (), context=ctx)
#        raise NotImplementedError("ctypes.Structure as parameter")
    else:
        try:
            view = memoryview(obj)
            return cl.global_memory(view.format, ndim=len(view.shape), shape=view.shape, context=ctx)
        except TypeError:
            pass
        
        return type(obj)
Ejemplo n.º 11
0
def main():

    ctx = cl.Context(device_type=cl.Device.GPU)

    ret = cl.empty(ctx, [16], "l")

    queue = cl.Queue(ctx)

    print setslice.compile(ctx, a=cl.global_memory("l"), value=c_int, source_only=True)

    #    print setslice(queue, ret[::2], c_int(6))
    #    print setslice(queue, ret[1::2], c_int(5))

    with ret.map(queue) as foo:
        print np.asarray(foo)
Ejemplo n.º 12
0
def main():
    
    size = 10
    a = np.random.rand(size).astype('f')
    b = np.random.rand(size).astype('f')
    
    ctx = cl.Context()
    queue = cl.Queue(ctx)
    
    cla = cl.from_host(ctx, a, copy=True)
    clb = cl.from_host(ctx, b, copy=True)
    clc = cl.empty(ctx, [size], ctype='f')
    
    prg = cl.Program(ctx, """
        __kernel void add(__global const float *a,
        __global const float *b, __global float *c)
        {
          int gid = get_global_id(0);
          c[gid] = a[gid] + b[gid];
        }
        """).build()
    
    add = prg.add
    add.argtypes = cl.global_memory('f'), cl.global_memory('f'), cl.global_memory('f')
    add.argnames = 'a', 'b', 'c'
    add.global_work_size = lambda a: a.shape
    
    add(queue, a=cla, b=clb, c=clc)
    
    with clc.map(queue) as view:
        print "view is a python memoryview object", view
        
        arr = np.asarray(view)
        
        print "Answer should be zero:"
        print (arr - (a + b)).sum()
Ejemplo n.º 13
0
def main():

    ctx = cl.Context(device_type=cl.Device.GPU)

    ret = cl.empty(ctx, [16], 'l')

    queue = cl.Queue(ctx)

    print setslice.compile(ctx,
                           a=cl.global_memory('l'),
                           value=c_int,
                           source_only=True)

    #    print setslice(queue, ret[::2], c_int(6))
    #    print setslice(queue, ret[1::2], c_int(5))

    with ret.map(queue) as foo:
        print np.asarray(foo)
Ejemplo n.º 14
0
    def test_set_args(self):

        program = Program(ctx, source=source)
        
        program.build()
        
        generate_sin = program.kernel('generate_sin')
        
        generate_sin.argtypes = [global_memory(), ctypes.c_float]
        
        buf = empty(ctx, [10], ctype=cl.cl_float2)
        
        queue = Queue(ctx, ctx.devices[0])
        
        generate_sin.set_args(buf, 1.0)
        queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size])
        
        expected = np.zeros([10], dtype=[('x', np.float32), ('y', np.float32)])
        expected['x'] = np.arange(10)
        expected['y'] = np.sin(expected['x'] / 10)
        
        with buf.map(queue) as host:
            self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0]))
            self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))

        generate_sin.argnames = ['a', 'scale']
        generate_sin.set_args(a=buf, scale=1.0)
        queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size])
        
        with buf.map(queue) as host:
            self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0]))
            self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
            
        with self.assertRaises(TypeError):
            generate_sin.set_args(a=buf)
            
        generate_sin.__defaults__ = [1.0]
        generate_sin.set_args(a=buf)
        
        queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size])
        
        with buf.map(queue) as host:
            self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0]))
            self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
Ejemplo n.º 15
0
def initialize():
    global generate_sin, coords_dev, n_vertices
    
    ctx = cl.gl.context()

    if generate_sin is None:
        program = cl.Program(ctx, generate_sin_source).build()
        generate_sin = program.generate_sin
        
        generate_sin.argnames = 'a',
        generate_sin.argtypes = cl.global_memory(cl.cl_float2),
        generate_sin.global_work_size = lambda a: a.shape
    
    coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=cl.cl_float2)
    
    glClearColor(1, 1, 1, 1)
    glColor(0, 0, 1)
    
    queue = cl.Queue(ctx)
    
    with cl.gl.acquire(queue, coords_dev):
        generate_sin(queue, coords_dev)
        
    glEnableClientState(GL_VERTEX_ARRAY)
Ejemplo n.º 16
0
def initialize():
    global generate_sin, coords_dev, n_vertices

    ctx = cl.gl.context()

    if generate_sin is None:
        program = cl.Program(ctx, generate_sin_source).build()
        generate_sin = program.generate_sin

        generate_sin.argnames = 'a',
        generate_sin.argtypes = cl.global_memory(cl.cl_float2),
        generate_sin.global_work_size = lambda a: a.shape

    coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=cl.cl_float2)

    glClearColor(1, 1, 1, 1)
    glColor(0, 0, 1)

    queue = cl.Queue(ctx)

    with cl.gl.acquire(queue, coords_dev):
        generate_sin(queue, coords_dev)

    glEnableClientState(GL_VERTEX_ARRAY)
Ejemplo n.º 17
0
import clyther as cly

import opencl as cl

import clyther.runtime as clrt


@cly.global_work_size(lambda a: a.shape)
@cly.kernel
def foo(a):
    x = clrt.get_global_id(0)
    y = clrt.get_global_id(1)

    a[x, y] = x + y * 100


ctx = cl.Context(device_type=cl.Device.CPU)

queue = cl.Queue(ctx)

a = cl.empty(ctx, [4, 4], 'f')

foo(queue, a)

print foo._compile(ctx, a=cl.global_memory('f'), source_only=True)

import numpy as np
with a.map(queue) as view:
    print np.asarray(view)
Ejemplo n.º 18
0
import clyther as cly

import opencl as cl

import clyther.runtime as clrt

@cly.global_work_size(lambda a: a.shape)
@cly.kernel
def foo(a):
    x = clrt.get_global_id(0)
    y = clrt.get_global_id(1)
   
    a[x, y] = x + y * 100
     
ctx = cl.Context(device_type=cl.Device.CPU)

queue = cl.Queue(ctx)

a = cl.empty(ctx, [4, 4], 'f')

foo(queue, a)

print foo._compile(ctx, a=cl.global_memory('f'), source_only=True)

import numpy as np
with a.map(queue) as view:
    print np.asarray(view)
Ejemplo n.º 19
0
#Always have to create a context.
ctx = cl.Context()


@cly.global_work_size(lambda a: [a.size])
@cly.kernel
def generate_sin(a):

    gid = clrt.get_global_id(0)
    n = clrt.get_global_size(0)

    r = cl.cl_float(gid) / cl.cl_float(n)

    # sin wave with 8 peaks
    y = r * cl.cl_float(16.0 * 3.1415)

    # x is a range from -1 to 1
    a[gid].x = r * 2.0 - 1.0

    # y is sin wave
    a[gid].y = clrt.native_sin(y)


#===============================================================================
# Compile to openCL code
#===============================================================================

print generate_sin.compile(ctx,
                           a=cl.global_memory(cl.cl_float2),
                           source_only=True)
Ejemplo n.º 20
0
with a.map(queue) as view:
    print np.asarray(view)

#===============================================================================
# From here I can keep boiling down until I get the the bare openCL C framework 
#===============================================================================

#===============================================================================
# Plotting
#===============================================================================
from maka import roo

ctx = roo.start()
queue = cl.Queue(ctx)

a = cl.gl.empty_gl(ctx, [200], cly.float2)

event = generate_sin(queue, a)
event.wait()

roo.plot(a)

roo.show()

#===============================================================================
# Compile to openCL code 
#===============================================================================

print generate_sin.compile(ctx, a=cl.global_memory('f'), source_only=True)