Ejemplo n.º 1
0
 def __new__(subtype, shape, dtype=np.float32, buffer=None, offset=0, strides=None, order=None, info=None):
     obj = np.ndarray.__new__(subtype, shape, dtype, buffer, offset, strides, order)
     if backend in {"ocl", "opencl", "OCL"}:
         obj.ocl_buf = cl.clCreateBuffer(context, np.prod(shape) * obj.itemsize)
         obj.host_dirty = False
         obj.ocl_dirty = False
     obj.register = None
     return obj
Ejemplo n.º 2
0
    def __call__(self, im):
        output = zeros_like(im.data)
        in_buf, evt = buffer_from_ndarray(self.queue, im.data, blocking=False)
        evt.wait()
        self.kernel.setarg(0, in_buf, sizeof(cl_mem))

        out_buf = clCreateBuffer(self.context, output.nbytes)
        self.kernel.setarg(1, out_buf, sizeof(cl_mem))
        evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size)
        evt.wait()
        _, evt = buffer_to_ndarray(self.queue, out_buf, output)
        evt.wait()
        del in_buf
        del out_buf
        return Array(unique_name(), output)
Ejemplo n.º 3
0
 def __new__(subtype,
             shape,
             dtype=np.float32,
             buffer=None,
             offset=0,
             strides=None,
             order=None,
             info=None):
     obj = np.ndarray.__new__(subtype, shape, dtype, buffer, offset,
                              strides, order)
     if backend in {"ocl", "opencl", "OCL"}:
         obj.ocl_buf = cl.clCreateBuffer(context,
                                         np.prod(shape) * obj.itemsize)
         obj.host_dirty = False
         obj.ocl_dirty = False
     obj.register = None
     return obj
Ejemplo n.º 4
0
    def __call__(self, im, num_powers, border):
        out_shape = [num_powers] + list(im.shape)
        output = np.empty(out_shape, dtype=np.float32)

        in_buf, evt = buffer_from_ndarray(self.queue, im.data, blocking=False)
        evt.wait()
        self.kernel.setarg(0, in_buf, sizeof(cl_mem))

        out_buf = clCreateBuffer(self.queue.context, output.nbytes)
        self.kernel.setarg(1, out_buf, sizeof(cl_mem))

        evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size)
        evt.wait()

        self.kernel2.setarg(0, out_buf, sizeof(cl_mem))

        for power in range(num_powers):
            self.kernel2.setarg(1, power, sizeof(cl_int))
            evt = clEnqueueNDRangeKernel(self.queue, self.kernel2, self.global_size)
            evt.wait()

        _, evt = buffer_to_ndarray(self.queue, out_buf, output)
        evt.wait()
        return Array(unique_name(), output)
Ejemplo n.º 5
0
 def allocate(self, length, dtype, name):
     mem = cl.clCreateBuffer(self.queue.context, length * dtype.itemsize)
     mem.ptr_type = np.ctypeslib.ndpointer(dtype)()
     mem.ptr_type._global = True
     return mem, SymbolRef(name, mem)