def __new__(subtype, shape, dtype=np.float32, buffer=None, offset=0, strides=None, order=None, info=None): obj = np.ndarray.__new__(subtype, shape, dtype, buffer, offset, strides, order) if backend in {"ocl", "opencl", "OCL"}: obj.ocl_buf = cl.clCreateBuffer(context, np.prod(shape) * obj.itemsize) obj.host_dirty = False obj.ocl_dirty = False obj.register = None return obj
def __call__(self, im): output = zeros_like(im.data) in_buf, evt = buffer_from_ndarray(self.queue, im.data, blocking=False) evt.wait() self.kernel.setarg(0, in_buf, sizeof(cl_mem)) out_buf = clCreateBuffer(self.context, output.nbytes) self.kernel.setarg(1, out_buf, sizeof(cl_mem)) evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size) evt.wait() _, evt = buffer_to_ndarray(self.queue, out_buf, output) evt.wait() del in_buf del out_buf return Array(unique_name(), output)
def __call__(self, im, num_powers, border): out_shape = [num_powers] + list(im.shape) output = np.empty(out_shape, dtype=np.float32) in_buf, evt = buffer_from_ndarray(self.queue, im.data, blocking=False) evt.wait() self.kernel.setarg(0, in_buf, sizeof(cl_mem)) out_buf = clCreateBuffer(self.queue.context, output.nbytes) self.kernel.setarg(1, out_buf, sizeof(cl_mem)) evt = clEnqueueNDRangeKernel(self.queue, self.kernel, self.global_size) evt.wait() self.kernel2.setarg(0, out_buf, sizeof(cl_mem)) for power in range(num_powers): self.kernel2.setarg(1, power, sizeof(cl_int)) evt = clEnqueueNDRangeKernel(self.queue, self.kernel2, self.global_size) evt.wait() _, evt = buffer_to_ndarray(self.queue, out_buf, output) evt.wait() return Array(unique_name(), output)
def allocate(self, length, dtype, name): mem = cl.clCreateBuffer(self.queue.context, length * dtype.itemsize) mem.ptr_type = np.ctypeslib.ndpointer(dtype)() mem.ptr_type._global = True return mem, SymbolRef(name, mem)