def send_textures(self): images = [] max_width = 0 max_height = 0 if self.scene.textures: textures = {k: v[0] for k, v in self.scene.textures.items()} textures = {v: k for k, v in self.scene.textures.items()} for i in sorted(textures.keys()): print(i) v = textures[i] if v is None: continue print(v) image = self.load_image(v) if image.shape[0] > max_height: max_height = image.shape[0] if image.shape[1] > max_width: max_width = image.shape[1] images.append(image) if len(images) == 0: images = [np.zeros((128, 128, 3))] max_width = 128 max_height = 128 print(max_width, max_height) images = [ np.pad(image, ((0, max_height - image.shape[0]), (0, max_width - image.shape[1]), (0, 4 - image.shape[2])), "wrap") for image in images ] n_images = len(images) images = np.concatenate(images, 0) img_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8) image = cl.Image(self.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, img_format, hostbuf=images.flatten(), is_array=True, shape=(max_width, max_height, n_images), pitches=(max_width * 4, max_width * max_height * 4)) self.textures = image
def d_create_image_cubemap(self, cubemap): """ create an image opencl from numpy image rgba """ size = cubemap[0].shape[0] added_array = cubemap[0] for i in range(1, 6): added_array = numpy.append(added_array, cubemap[i]) image = cl.Image( self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT), (size, size, 6), None, added_array, True) return image
def LoadImage(context, fileName): im = Image.open(fileName) # Make sure the image is RGBA formatted if im.mode != "RGBA": im = im.convert("RGBA") # Convert to uint8 buffer buffer = im.tostring() clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8) clImage = cl.Image(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, clImageFormat, im.size, None, buffer) return clImage, im.size
def do_test(buf): global program, context queue = pyopencl.CommandQueue(context) #input image: iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) #flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR iimage = pyopencl.Image(context, flags, iformat, shape=shape, hostbuf=buf) #output image: oformat = pyopencl.ImageFormat(pyopencl.channel_order.RGBA, pyopencl.channel_type.UNORM_INT8) oimage = pyopencl.Image(context, mem_flags.WRITE_ONLY, oformat, shape=shape) program.EXAMPLE(queue, globalWorkSize, localWorkSize, iimage, oimage) #in a real application, we would readback the output image here queue.finish()
def run_render(render, debug=False): debug=True ctx = cl.create_some_context(interactive=False) queue = cl.CommandQueue(ctx) x = render.x y = render.y outbuf_np = np.empty(int(x * y * 4)).astype(np.uint8) # outbuf_g = cl.Buffer(ctx, mf.WRITE_ONLY, outbuf_np.nbytes) fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8) outbuf_g = cl.Image(ctx, mf.WRITE_ONLY, fmt, shape=(x, y)) (xwidth, ywidth) = calc_width(render.x, render.y, render.zoom); xscale = xwidth / render.x yscale = ywidth / render.y xmin = render.center_r - xwidth / 2 ymin = render.center_i - ywidth / 2 if debug: print("xwidth: " + str(xwidth)) print("ywidth: " + str(ywidth)) print("xscale: " + str(xscale)) print("yscale: " + str(yscale)) print("xmin: " + str(xmin)) print("ymin: " + str(ymin)) print("(x,y): " + str((x,y))) print("outbuf_g.size: " + str(outbuf_g.size)) print("outbuf_g.int_ptr: " + str(outbuf_g.int_ptr)) with open("fractal.cl", 'r') as f: prog_src = f.read() # do not let the GPU do too much debug logging because that lags the system if debug and x < 500 and y < 500: prog_src = "#define DEBUG 1\n" + prog_src prog = cl.Program(ctx, prog_src).build() t = time() prog.sum(queue, (x, y), None, outbuf_g, np.float64(xmin), np.float64(ymin), np.float64(xscale), np.float64(yscale) ) queue.finish() t = time() - t print('time:', t, 's') print(1/t, 'Hz') cl.enqueue_copy(queue, outbuf_np, outbuf_g, origin=(0,0), region=(x, y)) queue.finish() img = Image.fromarray(outbuf_np.reshape((y,x,4)), 'RGBA') img.save(render.output) print('wrote', render.output)
def init_image(ctx, ary, num_channels=None, mode="r", norm_int=False): if not ary.flags.c_contiguous: raise ValueError("array must be C-contiguous") dtype = ary.dtype if num_channels is None: from pyopencl.array import vec try: dtype, num_channels = vec.type_to_scalar_and_count[dtype] except KeyError: # It must be a scalar type then. num_channels = 1 shape = ary.shape strides = ary.strides elif num_channels == 1: shape = ary.shape strides = ary.strides else: if ary.shape[-1] != num_channels: raise RuntimeError("last dimension must be equal to number of channels") shape = ary.shape[:-1] strides = ary.strides[:-1] if mode == "r": mode_flags = cl.mem_flags.READ_ONLY elif mode == "w": mode_flags = cl.mem_flags.WRITE_ONLY else: raise ValueError("invalid value '%s' for 'mode'" % mode) img_format = { 1: cl.channel_order.R, 2: cl.channel_order.RG, 3: cl.channel_order.RGB, 4: cl.channel_order.RGBA, }[num_channels] assert ary.strides[-1] == ary.dtype.itemsize if norm_int: channel_type = cl.DTYPE_TO_CHANNEL_TYPE_NORM[dtype] else: channel_type = cl.DTYPE_TO_CHANNEL_TYPE[dtype] return cl.Image(ctx, mode_flags, cl.ImageFormat(img_format, channel_type), shape=shape[::-1])
def vglClImageUpload(img): global ocl mf = cl.mem_flags # IMAGE VARS print("-> vglClImageUpload: Starting.") if (img.getVglShape().getNFrames() == 1): origin = (0, 0, 0) region = (img.getVglShape().getWidth(), img.getVglShape().getHeigth(), 1) shape = (img.getVglShape().getWidth(), img.getVglShape().getHeigth()) imgFormat = cl.ImageFormat(vl.cl_channel_order(img), vl.cl_channel_type(img)) img.oclPtr = cl.Image(ocl.context, mf.READ_ONLY, imgFormat, shape) elif (img.getVglShape().getNFrames() > 1): origin = (0, 0, 0) region = (img.getVglShape().getWidth(), img.getVglShape().getHeigth(), img.getVglShape().getNFrames()) shape = (img.getVglShape().getWidth(), img.getVglShape().getHeigth(), img.getVglShape().getNFrames()) imgFormat = cl.ImageFormat(vl.cl_channel_order(img), vl.cl_channel_type(img)) img.oclPtr = cl.Image(ocl.context, mf.READ_ONLY, imgFormat, shape) else: print("vglClImageUpload: VglImage NFrames wrong. NFrames returns:", img.getVglShape().getNFrames()) exit() # COPYING NDARRAY IMAGE TO OPENCL IMAGE OBJECT cl.enqueue_copy(ocl.commandQueue, img.get_oclPtr(), img.get_ipl(), origin=origin, region=region, is_blocking=True) print("<- vglClImageUpload: Ending.\n")
def points2lab(points, shape, distance="L2"): dstFloatImage = cl.Image(ctx, mf.WRITE_ONLY, floatFormat, shape=shape) dstUintImage = cl.Image(ctx, mf.READ_WRITE, uint8Format, shape=shape) temp = np.zeros(shape[1::-1] + (4, ), dtype=np.float32) pointsBuffer = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.array(points, dtype=np.int32).ravel()) if distance == "L1": prg.drawVoronoiL1(queue, shape, None, dstUintImage, np.uint32(len(points)), pointsBuffer, np.uint32(1)) elif distance == "L2": prg.drawVoronoiL2(queue, shape, None, dstUintImage, np.uint32(len(points)), pointsBuffer, np.uint32(1)) elif distance == "Linf": prg.drawVoronoiLinf(queue, shape, None, dstUintImage, np.uint32(len(points)), pointsBuffer, np.uint32(1)) prg.rgb2lab(queue, shape, None, dstUintImage, dstFloatImage) cl.enqueue_copy(queue, temp, dstFloatImage, origin=(0, 0), region=shape) dstUintImage.release() dstFloatImage.release() return temp
def loadJPG(self, filename): img = matplotlib.image.imread(filename) self.source_width = img.shape[1] self.source_height = img.shape[0] if self.parm("width").eval() != 0: self.width = self.parm("width").eval() else: self.width = self.source_width if self.parm("height").eval() != 0: self.height = self.parm("height").eval() else: self.height = self.source_height r = numpy.array(img[:,:,0],dtype=numpy.int8) g = numpy.array(img[:,:,1],dtype=numpy.int8) b = numpy.array(img[:,:,2],dtype=numpy.int8) self.devInBufferR = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=r) self.devInBufferG = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=g) self.devInBufferB = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=b)
def get_similar_device_image_object(self, ctx, queue): if (self.imgDim == vc.VGL_IMAGE_2D_IMAGE()): shape = (self.vglshape.getWidth(), self.vglshape.getHeight()) mf = cl.mem_flags imgFormat = cl.ImageFormat(self.get_toDevice_channel_order(), self.get_toDevice_dtype()) img_copy = cl.Image(ctx, mf.WRITE_ONLY, imgFormat, shape) elif (self.imgDim == vc.VGL_IMAGE_3D_IMAGE()): shape = (self.vglshape.getWidth(), self.vglshape.getHeight(), self.vglshape.getNFrames()) mf = cl.mem_flags imgFormat = cl.ImageFormat(self.get_toDevice_channel_order(), self.get_toDevice_dtype()) img_copy = cl.Image(ctx, mf.WRITE_ONLY, imgFormat, shape) print("--> Orig:", self.get_device_image().width, self.get_device_image().height, self.get_device_image().depth) print("--> Copy:", img_copy.width, img_copy.height, img_copy.depth) return img_copy
def load_image(ctx, file_name): im = Image.open(file_name) # Make sure the image is RGBA formatted if im.mode != "RGBA": im = im.convert("RGBA") # Convert to uint8 buffer buf = im.tobytes() cl_image_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8) cl_image = cl.Image( ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, cl_image_format, im.size, None, buf) return cl_image, im.size
def test_image_2d(self, device, ctx_getter): context = ctx_getter() if not device.image_support: from py.test import skip skip("images not supported on %s" % device) prg = cl.Program( context, """ __kernel void copy_image( __global float4 *dest, __read_only image2d_t src, sampler_t samp, int width) { int x = get_global_id(0); int y = get_global_id(1); /* const sampler_t samp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; */ dest[x + width*y] = read_imagef(src, samp, (float2)(x, y)); // dest[x + width*y] = get_image_height(src); } """).build() a = numpy.random.rand(1024, 1024, 4).astype(numpy.float32) queue = cl.CommandQueue(context) mf = cl.mem_flags a_img = cl.Image(context, mf.READ_ONLY | mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT), shape=a.shape[:2], hostbuf=a) a_dest = cl.Buffer(context, mf.READ_WRITE, a.nbytes) samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, numpy.int32(a.shape[0])) a_result = numpy.empty_like(a) cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True) print a_result.dtype assert la.norm(a_result - a) == 0
def __init__(self, ntheta, nphi, tol=1e-7, context=None): ''' Create OpenCL kernels to convert samples of a harmonic function, sampled at regular points on the unit sphere, into cubic b-spline coefficients. These coefficients can be used for rapid, GPU-based interpolation at arbitrary locations. ''' if nphi % 2 != 0: raise ValueError('The number of azimuthal samples must be even') self.ntheta = ntheta self.nphi = nphi # This is the polar-ring grid shape self.grid = 2 * (ntheta - 1), nphi // 2 # Set the desired precision of the filter coefficients if tol > 0: zp = math.sqrt(3) - 2. self.precision = int(math.log(tol) / math.log(abs(zp))) else: self.precision = ntheta # Don't let the precision exceed the number of samples! self.precision = min(self.precision, min(ntheta, nphi)) # Grab the provided context or create a default self.context = util.grabcontext(context) # Build the program for the context t = Template(filename=self._kernel, output_encoding='ascii') self.prog = cl.Program( self.context, t.render(ntheta=ntheta, nphi=nphi, p=self.precision)).build() # Create a command queue for the context self.queue = cl.CommandQueue(self.context) # Create an image that will store the spline coefficients # Remember to pad the columns to account for repeated boundaries mf = cl.mem_flags self.coeffs = cl.Image( self.context, mf.READ_WRITE, cl.ImageFormat(cl.channel_order.RG, cl.channel_type.FLOAT), [g + 3 for g in self.grid]) # The poles will be stored so they need not be interpolated self.poles = 0., 0.
def getOutHostBuffer(self): device_buffer = self.getOutDevBuffer() host_buffer = numpy.empty((self.xRes(), self.yRes(), 4), dtype = numpy.float16) self.engine.openclQueue().finish() quantized_buffer = cl.Image(self.engine.openclContext(), self.engine.mf.READ_WRITE, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.HALF_FLOAT), shape=self.shape()) with self.engine.openclQueue() as queue: evt = self.common_program.quantize_show(queue, self.shape(), None, device_buffer, quantized_buffer ) evt.wait() with self.engine.openclQueue() as queue: evt = cl.enqueue_copy(queue, host_buffer, quantized_buffer, origin=(0,0), region=self.shape()) evt.wait() return host_buffer
def _load_exr(ctx, filename): f = OpenEXR.InputFile(filename) dw = f.header()['dataWindow'] shape = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) sz = shape[0] * shape[1] buf = np.empty(4 * sz, np.float32) buf[0::4] = _read_channel(f, 'R') buf[1::4] = _read_channel(f, 'G') buf[2::4] = _read_channel(f, 'B') buf[3::4] = np.ones(sz, np.float32) return cl.Image(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT), shape=shape, hostbuf=buf)
def test_int_ptr(ctx_factory): def do_test(obj): new_obj = type(obj).from_int_ptr(obj.int_ptr) assert obj == new_obj assert type(obj) is type(new_obj) ctx = ctx_factory() device, = ctx.devices platform = device.platform do_test(device) do_test(platform) do_test(ctx) queue = cl.CommandQueue(ctx) do_test(queue) evt = cl.enqueue_marker(queue) do_test(evt) prg = cl.Program( ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg) do_test(prg.sum) n = 2000 a_buf = cl.Buffer(ctx, 0, n * 4) do_test(a_buf) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) do_test(img)
def allocate_texture(ctx, shape, hostbuf=None, support_1D=False): """ Allocate an OpenCL image ("texture"). :param ctx: OpenCL context :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2 do not support 1D images, so 1D images are handled as 2D with one row :param support_1D: force the image to be 1D if the shape has only one dim """ if len(shape) == 1 and not (support_1D): shape = (1, ) + shape return pyopencl.Image( ctx, pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR, pyopencl.ImageFormat(pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT), hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32))
def frame_preprocessing(self, lower_bound, upper_bound): # *Load and convert source image frame = np.array(self.frame) # *Set properties h = frame.shape[0] w = frame.shape[1] mask = np.zeros((1, 2), cl.cltypes.float4) mask[0, 0] = (lower_bound) # Lower bound mask[0, 1] = (upper_bound) # Upper bound # *Buffors frame_buf = cl.image_from_array(GPUSetup.context, frame, 4) fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8) dest_buf = cl.Image(GPUSetup.context, cl.mem_flags.WRITE_ONLY, fmt, shape=(w, h)) # *RGB to HSV GPUSetup.program.rgb2hsv(GPUSetup.queue, (w, h), None, frame_buf, dest_buf) self.hsv = np.empty_like(frame) cl.enqueue_copy(GPUSetup.queue, self.hsv, dest_buf, origin=(0, 0), region=(w, h)) # *Apply mask frame_buf = cl.image_from_array(GPUSetup.context, self.hsv, 4) mask_buf = cl.Buffer(GPUSetup.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=mask) GPUSetup.program.hsv_mask(GPUSetup.queue, (w, h), None, frame_buf, mask_buf, dest_buf) self.after_mask = np.empty_like(frame) cl.enqueue_copy(GPUSetup.queue, self.after_mask, dest_buf, origin=(0, 0), region=(w, h)) return self.after_mask
def compute(self, lock, cl_context, cl_queue): super(COP2_HalfTone, self).compute() if self.hasInputs(): self.devOutBuffer = cl.Image(cl_context, cl.mem_flags.READ_WRITE, self.image_format, shape=self.input(0).shape()) self.width = self.xRes() self.height = self.yRes() exec_evt = self.program.filter(cl_queue, (self.width, self.height), None, self.input(0).getCookedData(), self.devOutBuffer, numpy.int32(self.input(0).xRes()), numpy.int32(self.input(0).yRes()), numpy.float32(self.parm("density").eval()), numpy.int32(self.parm("quality").eval()), numpy.int32(self.parm("mode").evalAsInt()), ) exec_evt.wait() else: raise BaseException("No input specified !!!")
def _make_constant_cl_args(self): self.n_iters_cl = cl.Buffer( self.CTX, self.MF.READ_ONLY|self.MF.COPY_HOST_PTR, hostbuf=np.array([self.n_iters]).astype(np.int32) ) self.size_cl = cl.Buffer( self.CTX, self.MF.READ_ONLY|self.MF.COPY_HOST_PTR, hostbuf=np.array((self.width+self.height*1j,)).astype(np.complex64) ) image_fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8) self.image_cl = cl.Image( self.CTX, self.MF.WRITE_ONLY, image_fmt, shape=(self.width, self.height) )
def get_similar_oclPtr_object(img): global ocl mf = cl.mem_flags opencl_device = None if (isinstance(img.get_oclPtr(), cl.Image)): #print("get_similar_oclPtr_object: oclPtr is cl.Image.") imgFormat = cl.ImageFormat(vl.cl_channel_order(img), vl.cl_channel_type(img)) opencl_device = cl.Image(ocl.context, mf.WRITE_ONLY, imgFormat, img.get_oclPtr().shape) elif isinstance(img.get_oclPtr(), cl.Buffer): #print("get_similar_oclPtr_object: oclPtr is cl.Buffer.") opencl_device = cl.Buffer(ocl.context, mf.WRITE_ONLY, img.get_ipl().nbytes) return opencl_device
def alloc_image(ctx: cl.Context, dim: tuple, flags=cl.mem_flags.READ_WRITE): endianness = get_endianness(ctx) if endianness == "both": raise RuntimeError( "Context has both little and big endian devices, which is not currently supported" ) elif endianness == sys.byteorder: order = cl.channel_order.BGRA else: if endianness == "little": order = cl.channel_order.BGRA else: order = cl.channel_order.ARGB fmt = cl.ImageFormat(order, cl.channel_type.UNORM_INT8) return numpy.empty((*dim, 4), dtype=numpy.uint8), cl.Image(ctx, flags, fmt, shape=dim)
def compute(self): self.width, self.height = self.input(0).size self.devOutBuffer = cl.Image(self.engine.ctx, self.engine.mf.READ_WRITE, self.image_format, shape=(self.width, self.height)) sampler = cl.Sampler( self.engine.ctx, True, # Normalized coordinates cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.LINEAR) exec_evt = self.program.run_blend( self.engine.queue, self.size, None, self.input(0).getOutDevBuffer(), self.input(1).getOutDevBuffer(), self.devOutBuffer, sampler, numpy.int32(self.width), numpy.int32(self.height), numpy.float32(self.parm("factor").evalAsFloat())) exec_evt.wait()
def parallel_prediction_errors(self, image): """ Get the MILC prediction errors for a 3D image by means of OpenCL accelerated computation Keyword arguments: image -- a 3D numpy array (bitmap image) Return: a 3D numpy array of the same shape of "image", containing the prediction errors """ mf = cl.mem_flags # Define the image format for the prediction errors err_format = cl.ImageFormat(channel_order=cl.channel_order.R, channel_type=DataType.CL_ERR.value) # Define the input image from the numpy 3D array source_image = cl.image_from_array(self.ctx, image) original_shape = numpy.shape(image) cl_shape = list( reversed(original_shape)) # inverted shape (pyOpenCL bug?) # output image output_image = cl.Image(self.ctx, mf.WRITE_ONLY, err_format, shape=cl_shape) # sampler. pixels out of range have a value of '0' sampler = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) # enqueue kernel self.program.image_test(self.queue, original_shape, None, source_image, output_image, sampler) # read the resulting image into a numpy array output_data = numpy.empty(shape=cl_shape, dtype=DataType.ERR.value) cl.enqueue_read_image(self.queue, output_image, (0, 0, 0), cl_shape, output_data) return output_data.reshape(original_shape)
def main(): CL_CODE = ''' constant float R_weight = 0.6; constant float G_weight = 0.4; constant float B_weight = 0.8; constant float ALL_weight = 1.8; constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; kernel void gray(__read_only image2d_t src_img, __write_only image2d_t dst_img) { int x = get_global_id(0); int y = get_global_id(1); int2 coord = (int2)(x, y); uint4 pixel = read_imageui(src_img, sampler, coord); uint g = (uint)((pixel[0] * R_weight + pixel[1] * G_weight + pixel[2] * B_weight) / ALL_weight); pixel = g; pixel[3] = 255; write_imageui(dst_img, coord, pixel); } ''' plf = [(cl.context_properties.PLATFORM, cl.get_platforms()[0])] ctx = cl.Context(dev_type=cl.device_type.GPU, properties=plf) prg = cl.Program(ctx, CL_CODE).build() queue = cl.CommandQueue(ctx) mf = cl.mem_flags src_raw = np.asarray(Image.open('res/tile-z16.png').convert("RGBA")) src_img = cl.image_from_array(ctx, src_raw, 4) (w, h, _) = src_raw.shape image_size = (w, h) fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8) dst_img = cl.Image(ctx, mf.WRITE_ONLY, fmt, shape=image_size) dst_raw = np.empty_like(src_raw) prg.gray(queue, image_size, (1, 1), src_img, dst_img) cl.enqueue_copy(queue, dst_raw, dst_img, origin=(0, 0), region=image_size) Image.fromarray(dst_raw).show()
def convert(self, img): src = numpy.fromstring(img.bits().asstring(img.byteCount()), dtype=numpy.uint8) src.shape = h, w, _ = img.height(), img.width(), 4 mf = cl.mem_flags src_buf = cl.image_from_array(self.ctx, src, 4) fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8) dest_buf = cl.Image(self.ctx, mf.WRITE_ONLY, fmt, shape=(w, h)) self.prg.convert(self.queue, (w, h), None, src_buf, dest_buf, numpy.int32(w), numpy.int32(h)) dest = numpy.empty_like(src) cl.enqueue_copy(self.queue, dest, dest_buf, origin=(0, 0), region=(w, h)) return QtGui.QImage(str(dest.data), w, h, QtGui.QImage.Format_RGB32)
def circles2rgb(circles, shape, backgroundColor, scale=1): scaleShape = (int(shape[0] * scale), int(shape[1] * scale)) circlesBuffer = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.array(circles, dtype=np.int32).ravel()) dstUintImage = cl.Image(ctx, mf.READ_WRITE, uint8Format, shape=scaleShape) temp = np.zeros(scaleShape[1::-1] + (4, ), dtype=np.uint8) prg.drawCircles(queue, scaleShape, None, dstUintImage, circlesBuffer, np.uint8(len(circles)), np.uint8(backgroundColor), np.uint8(scale)) cl.enqueue_copy(queue, temp, dstUintImage, origin=(0, 0), region=scaleShape).wait() dstUintImage.release() circlesBuffer.release() return temp
def __init__(self): self.dst = np.empty((N, N, 4)).astype(np.uint8) self.dst_buf = cl.Buffer(context, mf.WRITE_ONLY, self.dst.nbytes) self.inv_matrix = cl.Buffer(context, mf.READ_ONLY, 16 * 4) self.matrix = cl.Buffer(context, mf.READ_ONLY, 16 * 4) with open('kernel.cl', 'r') as f: self.program = cl.Program(context, f.read()).build("-cl-mad-enable") print self.program.get_build_info(context.devices[0], cl.program_build_info.LOG) self.dstTex = glGenTextures(1) glBindTexture(GL_TEXTURE_2D, self.dstTex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, N, N, 0, GL_RGBA, GL_UNSIGNED_BYTE, None) glBindTexture(GL_TEXTURE_2D, 0) print_info(self.program, cl.program_info) print_info(self.program.pdbTracer, cl.kernel_info) grid = np.array(range(256), dtype=np.float32) / 256 x1, x2 = np.meshgrid(grid, grid) rad = np.sqrt(x1) phi = 2 * np.pi * x2 phimap = np.dstack((np.cos(phi) * rad, np.sin(phi) * rad, np.sqrt(1 - rad * rad), 0 * rad)) self.p = phimap fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT) self.phimap = cl.Image(context, mf.READ_ONLY | mf.COPY_HOST_PTR, fmt, shape=phimap.shape[:2], hostbuf=np.array(phimap, order='C'))
def compute(self, lock, cl_context, cl_queue): super(COP2_Render, self).compute() self.image_width = self.parm("size1").eval() self.image_height = self.parm("size2").eval() print("render init") self.renderer.init(self.image_width, self.image_height, 16) print("rendering") image_array = np.flip(self.renderer.renderFrame(), (0, 1)) print("rendering done") try: print("dev buffer") self.devOutBuffer = cl.Image( cl_context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, self.image_format, shape=(self.image_width, self.image_height), hostbuf=image_array.astype("float32")) except: raise
def __init__(self, image_width, image_height, opencl_file, gl_image=None): enable_gl_sharing = gl_image is not None init_opencl(enable_gl_sharing) with open(opencl_file, 'r') as f: source = f.read() self._program = cl.Program( context, source).build(options=['-I', f'"{os.path.dirname(opencl_file)}"']) if gl_image is None: shape = (image_height, image_width) fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8) self._result_image = cl.Image(context, mem.WRITE_ONLY, fmt, shape) else: from OpenGL.GL import GL_TEXTURE_2D self._result_image = cl.GLTexture(context, mem.WRITE_ONLY, GL_TEXTURE_2D, 0, gl_image, dims=2)