Ejemplo n.º 1
0
def asarray(ctx, other, queue=None, copy=True):
    
    if not isinstance(other, cl.DeviceMemoryView):
        other = cl.from_host(ctx, other, copy=copy)
        
    array = CLArray._view_as_this(other)
    array.__array_init__(ctx, queue)
    
    return array
Ejemplo n.º 2
0
    def test_from_host_no_copy(self):

        a = np.array([[1, 2], [3, 4]])

        refcount = sys.getrefcount(a)

        clmem = cl.from_host(ctx, a, copy=False)

        #        event = PyEvent()
        #        def set_event(mem):
        #            event.set()

        #        clmem.add_destructor_callback(set_event)

        self.assertEqual(refcount + 1, sys.getrefcount(a))

        del clmem
        gc.collect()

        #        self.assertTrue(event.wait(1), 'event timed out. destructor_callback not called')

        self.assertEqual(refcount, sys.getrefcount(a))

        clmem = cl.from_host(ctx, a, copy=False)

        view_a = memoryview(a)

        self.assertEqual(clmem.format, view_a.format)
        self.assertEqual(clmem.shape, view_a.shape)
        self.assertEqual(clmem.strides, view_a.strides)

        queue = cl.Queue(ctx)

        if queue.device.host_unified_memory:
            a[0, 0] = 100
            with clmem.map(queue) as view:
                b = np.asarray(view)
                self.assertEqual(b[0, 0], 100)
        else:
            #TODO: should there be a test here?
            pass
Ejemplo n.º 3
0
    def test_from_host_no_copy(self):
        
        a = np.array([[1, 2], [3, 4]])
        
        refcount = sys.getrefcount(a)
        
        clmem = cl.from_host(ctx, a, copy=False)
        
#        event = PyEvent()
#        def set_event(mem):
#            event.set()
            
#        clmem.add_destructor_callback(set_event)
        
        self.assertEqual(refcount + 1, sys.getrefcount(a))
        
        del clmem
        gc.collect()
        
#        self.assertTrue(event.wait(1), 'event timed out. destructor_callback not called')
        
        self.assertEqual(refcount, sys.getrefcount(a))
        
        clmem = cl.from_host(ctx, a, copy=False)
        
        view_a = memoryview(a)
        
        self.assertEqual(clmem.format, view_a.format)
        self.assertEqual(clmem.shape, view_a.shape)
        self.assertEqual(clmem.strides, view_a.strides)
        
        queue = cl.Queue(ctx)
        
        if queue.device.host_unified_memory:
            a[0, 0] = 100
            with clmem.map(queue) as view:
                b = np.asarray(view) 
                self.assertEqual(b[0, 0], 100)
        else:
            #TODO: should there be a test here?
            pass
Ejemplo n.º 4
0
def main():

    size = 10
    a = np.random.rand(size).astype('f')
    b = np.random.rand(size).astype('f')

    ctx = cl.Context()
    queue = cl.Queue(ctx)

    cla = cl.from_host(ctx, a, copy=True)
    clb = cl.from_host(ctx, b, copy=True)
    clc = cl.empty(ctx, [size], ctype='f')

    prg = cl.Program(
        ctx, """
        __kernel void add(__global const float *a,
        __global const float *b, __global float *c)
        {
          int gid = get_global_id(0);
          c[gid] = a[gid] + b[gid];
        }
        """).build()

    add = prg.add
    add.argtypes = cl.global_memory('f'), cl.global_memory(
        'f'), cl.global_memory('f')
    add.argnames = 'a', 'b', 'c'
    add.global_work_size = lambda a: a.shape

    add(queue, a=cla, b=clb, c=clc)

    with clc.map(queue) as view:
        print "view is a python memoryview object", view

        arr = np.asarray(view)

        print "Answer should be zero:"
        print(arr - (a + b)).sum()
Ejemplo n.º 5
0
def main():
    
    size = 10
    a = np.random.rand(size).astype('f')
    b = np.random.rand(size).astype('f')
    
    ctx = cl.Context()
    queue = cl.Queue(ctx)
    
    cla = cl.from_host(ctx, a, copy=True)
    clb = cl.from_host(ctx, b, copy=True)
    clc = cl.empty(ctx, [size], ctype='f')
    
    prg = cl.Program(ctx, """
        __kernel void add(__global const float *a,
        __global const float *b, __global float *c)
        {
          int gid = get_global_id(0);
          c[gid] = a[gid] + b[gid];
        }
        """).build()
    
    add = prg.add
    add.argtypes = cl.global_memory('f'), cl.global_memory('f'), cl.global_memory('f')
    add.argnames = 'a', 'b', 'c'
    add.global_work_size = lambda a: a.shape
    
    add(queue, a=cla, b=clb, c=clc)
    
    with clc.map(queue) as view:
        print "view is a python memoryview object", view
        
        arr = np.asarray(view)
        
        print "Answer should be zero:"
        print (arr - (a + b)).sum()
Ejemplo n.º 6
0
 def test_broadcast_0D(self):
     
     with self.assertRaises(TypeError):
         cl.broadcast(None, [1])
         
     one = cl.from_host(ctx, c_int(1))
     
     a = cl.broadcast(one, [10, 10])
     self.assertEqual(a.shape, (10, 10))
     self.assertEqual(a.strides, (0, 0))
     
     queue = cl.Queue(ctx)
     with a.map(queue) as view:
         b = np.asarray(view)
         self.assertEqual(b.shape, (10, 10))
         self.assertEqual(b.strides, (0, 0))
Ejemplo n.º 7
0
    def test_broadcast_0D(self):

        with self.assertRaises(TypeError):
            cl.broadcast(None, [1])

        one = cl.from_host(ctx, c_int(1))

        a = cl.broadcast(one, [10, 10])
        self.assertEqual(a.shape, (10, 10))
        self.assertEqual(a.strides, (0, 0))

        queue = cl.Queue(ctx)
        with a.map(queue) as view:
            b = np.asarray(view)
            self.assertEqual(b.shape, (10, 10))
            self.assertEqual(b.strides, (0, 0))
Ejemplo n.º 8
0
    def __call__(self, x, out=None, queue=None):

        if queue is None:
            queue = x.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)

        if out is None:
            out = cl.empty(queue.context, x.shape, x.format)

        unary_ufunc_kernel(queue, self.device_func, x, out)

        array = CLArray._view_as_this(out)
        array.__array_init__(queue)
        return array
Ejemplo n.º 9
0
    def __call__(self, x, out=None, queue=None):
        
        if queue is None:
            queue = x.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)
        
        if out is None:
            out = cl.empty(queue.context, x.shape, x.format)
        
        unary_ufunc_kernel(queue, self.device_func, x, out)
        
        array = CLArray._view_as_this(out)
        array.__array_init__(queue)
        return array
Ejemplo n.º 10
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):

        if queue is None:
            queue = x.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)

        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])

        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512

        if out is None:
            out = cl.empty(queue.context, [1], x.format)

        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype,
                                                               flat=True),
                                       array=cl.global_memory(x.ctype,
                                                              flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)

        max_wgsize = kernel.work_group_size(queue.device)

        group_size = min(max_wgsize, group_size)

        kernel(queue, out, out.array_info, x, x.array_info, shared,
               shared.local_info, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)

        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 11
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):
        
        if queue is None:
            queue = x.queue
        
        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)
            
        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])
        
        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512
        
        if out is None:
            out = cl.empty(queue.context, [1], x.format)
        
        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype, flat=True),
                                       array=cl.global_memory(x.ctype, flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)
        
        max_wgsize = kernel.work_group_size(queue.device)
        
        group_size = min(max_wgsize, group_size)
        
        kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        
        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Ejemplo n.º 12
0
 def test_broadcast_2D(self):
     
     with self.assertRaises(TypeError):
         cl.broadcast(None, [1])
         
     npa = np.arange(10, dtype=c_float)
     z = np.zeros([10, 1])
     
     ten = cl.from_host(ctx, npa)
     
     a = cl.broadcast(ten, [10, 10])
     self.assertEqual(a.shape, (10, 10))
     self.assertEqual(a.strides, (0, sizeof(c_float)))
     
     queue = cl.Queue(ctx)
     with a.map(queue) as view:
         b = np.asarray(view)
         self.assertEqual(b.shape, (10, 10))
         self.assertEqual(b.strides, (0, sizeof(c_float)))
         self.assertTrue(np.all(b == z + npa))
Ejemplo n.º 13
0
    def test_broadcast_2D(self):

        with self.assertRaises(TypeError):
            cl.broadcast(None, [1])

        npa = np.arange(10, dtype=c_float)
        z = np.zeros([10, 1])

        ten = cl.from_host(ctx, npa)

        a = cl.broadcast(ten, [10, 10])
        self.assertEqual(a.shape, (10, 10))
        self.assertEqual(a.strides, (0, sizeof(c_float)))

        queue = cl.Queue(ctx)
        with a.map(queue) as view:
            b = np.asarray(view)
            self.assertEqual(b.shape, (10, 10))
            self.assertEqual(b.strides, (0, sizeof(c_float)))
            self.assertTrue(np.all(b == z + npa))
Ejemplo n.º 14
0
def blitz(queue, func, out=None):
    '''
    lets get blitzed!
    '''
    func_ast = decompile_func(func)

    func_globals = func.func_globals.copy()

    if func.func_closure:
        func_globals.update({
            name: cell.cell_contents
            for name, cell in zip(func.func_code.co_freevars,
                                  func.func_closure)
        })

    blitzer = BlitzVisitor(func.func_code.co_filename, func_globals)

    blitzed = ast.Expression(blitzer.visit(func_ast))

    blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval')
    blitzed_func = eval(blitzed_code)

    blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys()))

    args = {}

    for key, var in blitzer.locls.items():
        if not isinstance(var, cl.DeviceMemoryView):
            var = cl.from_host(queue.context, var)
        args[key] = var

    shape = broadcast_shapes([var.shape for var in args.values()])

    print "shape", shape

    for key, var in args.items():
        args[key] = cl.broadcast(var, shape)

    print "out, **args", out, args
    blitz_kernel(queue, blitzed_func, out, **args)
Ejemplo n.º 15
0
def main():
    
    ctx = cl.Context(device_type=cl.Device.GPU)
    queue = cl.Queue(ctx)
    
    host_init = np.arange(8, dtype=c_float) + 1
    device_input = cl.from_host(ctx, host_init)
    
    output = ca.reduce(queue, lambda a, b: a + b, device_input)
    
    print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "
    print "data:", host_init
    print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "
    print "host   sum:", host_init.sum()
    
    with output.map(queue) as view:
        print "device sum:", np.asarray(view).item()

    output = ca.reduce(queue, lambda a, b: a * b, device_input, initial=1.0)
    
    print "host   product:", host_init.prod()
    
    with output.map(queue) as view:
        print "device product:", np.asarray(view).item()
Ejemplo n.º 16
0
def blitz(queue, func, out=None):
    '''
    lets get blitzed!
    '''
    func_ast = decompile_func(func)
    
    func_globals = func.func_globals.copy()
    
    if func.func_closure:
        func_globals.update({name:cell.cell_contents for name, cell in zip(func.func_code.co_freevars, func.func_closure)}) 
        
    blitzer = BlitzVisitor(func.func_code.co_filename, func_globals)
    
    blitzed = ast.Expression(blitzer.visit(func_ast))
    
    blitzed_code = compile(blitzed, func.func_code.co_filename, 'eval')
    blitzed_func = eval(blitzed_code)
    
    blitz_kernel = create_n_arg_kernel(sorted(blitzer.locls.keys()))
    
    args = {}
    
    for key, var in blitzer.locls.items():
        if not isinstance(var, cl.DeviceMemoryView):
            var = cl.from_host(queue.context, var)
        args[key] = var
        
    shape = broadcast_shapes([var.shape for var in args.values()])
    
    print "shape", shape
    
    for key, var in args.items():
        args[key] = cl.broadcast(var, shape)
        
    print "out, **args", out, args
    blitz_kernel(queue, blitzed_func, out, **args)