Beispiel #1
0
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.cached_shape = imshape

        self.clIm = cla.Array(self.q, imshape, np.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx,
                                       'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii',
                                        'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)

        # starburst storage

        clImageFormat = cl.ImageFormat(cl.channel_order.R,
                                       cl.channel_type.FLOAT)

        self.clIm2D = cl.Image(self.ctx,
                               mf.READ_ONLY,
                               clImageFormat,
                               imshape)

        # Create sampler for sampling image object
        self.imSampler = cl.Sampler(self.ctx,
                                    False,  # Non-normalized coordinates
                                    cl.addressing_mode.CLAMP_TO_EDGE,
                                    cl.filter_mode.LINEAR)

        self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q)

        self.calcF = self.radial_prg.calcF
        self.calcOM = self.radial_prg.calcOM
   //color.w = 1.0f;
   color.xyz=avg;
   color.w = 1.0f;
   
   write_imagef(output,coord,color);
   
}
""").build()

# 打开图片文件
src1 = Image.open('temp/images/f2.png')
print(src1.size)
dist = Image.new('RGBA',(640,480),(255,255,255))

# OpenCL处理的图片文件格式RGBA,unit8
imageFormat = cl.ImageFormat(cl.channel_order.RGBA,cl.channel_type.UNSIGNED_INT8)

# 将图片从Host复制到Device
img1 = cl.Image(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,imageFormat,src1.size,None,src1.tobytes())
output = cl.Image(context=ctx,flags=mf.WRITE_ONLY,format=imageFormat,shape=src1.size)

# 根据图片大小定义WorkSize
localWorkSize = ( 8, 8 )  
globalWorkSize = ( RoundUp(localWorkSize[0], src1.size[0]),  
                    RoundUp(localWorkSize[1], src1.size[1]))
# 执行Kernel
prg.backto1980_filter(queue,globalWorkSize,localWorkSize,img1,output)


buffer = np.zeros(src1.size[0] * src1.size[1] * 4, np.uint8)  
origin = ( 0, 0, 0 )  
Beispiel #3
0
import pyopencl as cl
import numpy as np
import sys

platforms = cl.get_platforms()
platform = platforms[0]
devs = platform.get_devices(cl.device_type.GPU)
dev = devs[0]
mf = cl.mem_flags
ctx = cl.Context([dev])
queue = cl.CommandQueue(ctx, dev)

a = np.arange(24).astype(np.int32).reshape(3, 4, 2)
b1 = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
fmt = cl.ImageFormat(cl.channel_order.R, cl.channel_type.SIGNED_INT32)
i1 = cl.Image(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, fmt, hostbuf=a)

prog = cl.Program(
    ctx, """
#define GL_ID (int4)(get_global_id(1), get_global_id(0), get_global_id(2), 0)

__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
    CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
__kernel void Image(
    __read_only image3d_t img)
{
    int4 id = GL_ID;
    int4 cl = read_imagei(img, sampler, id);
    printf("%d, %d, %d: %d\\n", id.x, id.y, id.z, cl.x);
}
def main():
    
    imageObjects = [ 0, 0 ]
            
    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1
    
    
    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1
        
    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)
    
    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1
    
    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])
    
    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, 
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               imgSize)                               
    
    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")
    
    # Call the kernel directly
    localWorkSize = ( 16, 16 )
    globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
                       RoundUp(localWorkSize[1], imgSize[1]) )

    program.gaussian_filter(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
                            sampler,
                            numpy.int32(imgSize[0]),
                            numpy.int32(imgSize[1]))
         
    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( imgSize[0], imgSize[1], 1 )
    
    cl.enqueue_read_image(commandQueue, imageObjects[1],
                          origin, region, buffer).wait()
    
    print "Executed program succesfully."
    
    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)
Beispiel #5
0
    def convert_image_yuv(self, image):
        start = time.time()
        iplanes = image.get_planes()
        width = image.get_width()
        height = image.get_height()
        strides = image.get_rowstride()
        pixels = image.get_pixels()
        assert pixels, "failed to get pixels from %s" % image
        assert iplanes==ImageWrapper._3_PLANES, "we only handle planar data as input!"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        assert len(strides)==len(pixels)==3, "invalid number of planes or strides (should be 3)"
        assert width>=self.src_width and height>=self.src_height, "expected source image with dimensions of at least %sx%s but got %sx%s" % (self.src_width, self.src_height, width, height)

        #adjust work dimensions for subsampling:
        #(we process N pixels at a time in each dimension)
        divs = get_subsampling_divs(self.src_format)
        wwidth = dimdiv(self.dst_width, max(x_div for x_div, _ in divs))
        wheight = dimdiv(self.dst_height, max(y_div for _, y_div in divs))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        kernelargs = [self.queue, globalWorkSize, localWorkSize]

        iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8)
        input_images = []
        for i in range(3):
            _, y_div = divs[i]
            shape = strides[i], self.src_height//y_div
            plane = pixels[i]
            if type(plane)==_memoryview:
                plane = plane.tobytes()
            if type(plane)==str:
                flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
            else:
                flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
            iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=plane)
            input_images.append(iimage)

        #output image:
        oformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNORM_INT8)
        oimage = pyopencl.Image(self.context, mem_flags.WRITE_ONLY | mem_flags.ALLOC_HOST_PTR, oformat, shape=(self.dst_width, self.dst_height))

        kernelargs += input_images + [numpy.int32(self.src_width), numpy.int32(self.src_height),
                       numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                       self.sampler, oimage]

        kstart = time.time()
        log("convert_image(%s) calling %s%s after upload took %.1fms",
              image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
        kend = time.time()
        log("%s took %.1fms", self.kernel_function, 1000.0*(kend-kstart))

        out_array = numpy.empty(self.dst_width*self.dst_height*4, dtype=numpy.byte)
        log("out array=%s", out_array)
        pyopencl.enqueue_copy(self.queue, out_array, oimage, origin=(0,0), region=(self.dst_width,self.dst_height))
        self.queue.finish()
        log("readback using %s took %.1fms", CHANNEL_ORDER_TO_STR.get(self.channel_order), 1000.0*(time.time()-kend))
        #free input images:
        for iimage in input_images:
            iimage.release()
        oimage.release()
        self.time += time.time()-start
        self.frames += 1
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, out_array.data, self.dst_format, 24, self.dst_width*4, planes=ImageWrapper.PACKED)
Beispiel #6
0
    def convert_image_rgb(self, image):
        start = time.time()
        iplanes = image.get_planes()
        width = image.get_width()
        height = image.get_height()
        stride = image.get_rowstride()
        pixels = image.get_pixels()
        #log("convert_image(%s) planes=%s, pixels=%s, size=%s", image, iplanes, type(pixels), len(pixels))
        assert pixels, "failed to get pixels from %s" % image
        assert iplanes==ImageWrapper.PACKED, "we only handle packed data as input!"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        assert width>=self.src_width and height>=self.src_height, "expected source image with dimensions of at least %sx%s but got %sx%s" % (self.src_width, self.src_height, width, height)

        #adjust work dimensions for subsampling:
        #(we process N pixels at a time in each dimension)
        divs = get_subsampling_divs(self.dst_format)
        wwidth = dimdiv(self.dst_width, max([x_div for x_div, _ in divs]))
        wheight = dimdiv(self.dst_height, max([y_div for _, y_div in divs]))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        #input image:
        iformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNSIGNED_INT8)
        shape = (stride//4, self.src_height)
        log("convert_image() type=%s, input image format=%s, shape=%s, work size: local=%s, global=%s", type(pixels), iformat, shape, localWorkSize, globalWorkSize)
        idata = pixels
        if type(idata)==_memoryview:
            idata = idata.tobytes()
        if type(idata)==str:
            #str is not a buffer, so we have to copy the data
            #alternatively, we could copy it first ourselves using this:
            #pixels = numpy.fromstring(pixels, dtype=numpy.byte).data
            #but I think this would be even slower
            flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
        else:
            flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
        iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=idata)

        kernelargs = [self.queue, globalWorkSize, localWorkSize,
                      iimage, numpy.int32(self.src_width), numpy.int32(self.src_height),
                      numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                      self.sampler]

        #calculate plane strides and allocate output buffers:
        strides = []
        out_buffers = []
        out_sizes = []
        for i in range(3):
            x_div, y_div = divs[i]
            p_stride = roundup(self.dst_width // x_div, max(2, localWorkSize[0]))
            p_height = roundup(self.dst_height // y_div, 2)
            p_size = p_stride * p_height
            #log("output buffer for channel %s: stride=%s, height=%s, size=%s", i, p_stride, p_height, p_size)
            out_buf = pyopencl.Buffer(self.context, mem_flags.WRITE_ONLY, p_size)
            out_buffers.append(out_buf)
            kernelargs += [out_buf, numpy.int32(p_stride)]
            strides.append(p_stride)
            out_sizes.append(p_size)

        kstart = time.time()
        log("convert_image(%s) calling %s%s after %.1fms", image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
        kend = time.time()
        log("%s took %.1fms", self.kernel_function_name, 1000.0*(kend-kstart))

        #read back:
        pixels = []
        for i in range(3):
            out_array = numpy.empty(out_sizes[i], dtype=numpy.byte)
            pixels.append(out_array.data)
            pyopencl.enqueue_copy(self.queue, out_array, out_buffers[i], is_blocking=False)
        readstart = time.time()
        log("queue read events took %.1fms (3 planes of size %s, with strides=%s)", 1000.0*(readstart-kend), out_sizes, strides)
        self.queue.finish()
        readend = time.time()
        log("wait for read events took %.1fms", 1000.0*(readend-readstart))
        iimage.release()
        #free output buffers:
        for out_buf in out_buffers:
            out_buf.release()
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, pixels, self.dst_format, 24, strides, planes=ImageWrapper._3_PLANES)
Beispiel #7
0
	def loadEXR(self, filename):
		import OpenEXR
		import Imath
		
		pt = Imath.PixelType(Imath.PixelType.HALF)
		image = OpenEXR.InputFile(filename)
		header = image.header()
		dw = header['dataWindow']
		channels = header['channels']
		size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1)
		self.source_width = size[0]
		self.source_height = size[1]
		
		if self.parm("width").eval() != 0:
			self.width = self.parm("width").eval()
		else:
			self.width = self.source_width
					
		if self.parm("height").eval() != 0:
			self.height = self.parm("height").eval() 
		else:
			self.height = self.source_height
		
		redstr = image.channel('R', pt)
		host_buff_r = numpy.fromstring(redstr, dtype = numpy.float16)
		host_buff_r.shape = (size[1], size[0]) # Numpy arrays are (row, col)
		self.devInBufferR = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.HALF_FLOAT), shape=(self.source_width, self.source_height,), pitches=(self.source_width * 2,), hostbuf=host_buff_r)
		
		greenstr = image.channel('G', pt)
		host_buff_g = numpy.fromstring(greenstr, dtype = numpy.float16)
		host_buff_g.shape = (size[1], size[0]) # Numpy arrays are (row, col)
		self.devInBufferG = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.HALF_FLOAT), shape=(self.source_width, self.source_height,), pitches=(self.source_width * 2,), hostbuf=host_buff_g)
		
		bluestr = image.channel('B', pt)
		host_buff_b = numpy.fromstring(bluestr, dtype = numpy.float16)
		host_buff_b.shape = (size[1], size[0]) # Numpy arrays are (row, col)
		self.devInBufferB = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.HALF_FLOAT), shape=(self.source_width, self.source_height,), pitches=(self.source_width * 2,), hostbuf=host_buff_b)
		
		if(channels.get('A') is not None):
			alphastr = image.channel('A', pt)
			host_buff_a = numpy.fromstring(alphastr, dtype = numpy.float16)
			host_buff_a.shape = (size[1], size[0]) # Numpy arrays are (row, col)
			self.devInBufferA = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.HALF_FLOAT), shape=(self.source_width, self.source_height,), pitches=(self.source_width * 2,), hostbuf=host_buff_a)
		else:
			self.devInBufferA = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.HALF_FLOAT), shape=(self.source_width, self.source_height,), pitches=(self.source_width * 2,), hostbuf=numpy.ones(self.source_width * self.source_height, dtype = numpy.float16))
def applyMorphOp(imgIn, op):
    "apply morphological operation to image using GPU"

    # (1) setup OpenCL
    platforms = cl.get_platforms(
    )  # a platform corresponds to a driver (e.g. AMD)
    platform = platforms[1]  # take first platform
    devices = platform.get_devices(
        cl.device_type.GPU)  # get GPU devices of selected platform
    device = devices[0]  # take first GPU
    context = cl.Context([device])  # put selected GPU into context object
    queue = cl.CommandQueue(
        context, device)  # create command queue for selected GPU and context

    # (2) get shape of input image, allocate memory for output to which result can be copied to
    shape = imgIn.T.shape
    imgOut = np.empty_like(imgIn)

    # (2) create image buffers which hold images for OpenCL
    imgInBuf = cl.Image(
        context,
        cl.mem_flags.READ_ONLY,
        cl.ImageFormat(cl.channel_order.LUMINANCE, cl.channel_type.UNORM_INT8),
        shape=shape)  # holds a gray-valued image of given shape
    imgOutBuf = cl.Image(
        context,
        cl.mem_flags.WRITE_ONLY,
        cl.ImageFormat(cl.channel_order.LUMINANCE, cl.channel_type.UNORM_INT8),
        shape=shape)  # placeholder for gray-valued image of given shape

    # (3) load and compile OpenCL program
    program = cl.Program(context, open('Erosion_Dilation.cl').read()).build()

    # (3) from OpenCL program, get kernel object and set arguments (input image, operation type, output image)
    kernel = cl.Kernel(
        program, 'morphOpKernel')  # name of function according to kernel.py
    kernel.set_arg(0, imgInBuf)  # input image buffer
    kernel.set_arg(1, np.uint32(
        op))  # operation type passed as an integer value (dilate=0, erode=1)
    kernel.set_arg(2, imgOutBuf)  # output image buffer

    # (4) copy image to device, execute kernel, copy data back
    cl.enqueue_copy(queue,
                    imgInBuf,
                    imgIn,
                    origin=(0, 0),
                    region=shape,
                    is_blocking=False)  # copy image from CPU to GPU
    cl.enqueue_nd_range_kernel(
        queue, kernel, shape, None
    )  # execute kernel, work is distributed across shape[0]*shape[1] work-items (one work-item per pixel of the image)
    cl.enqueue_copy(
        queue,
        imgOut,
        imgOutBuf,
        origin=(0, 0),
        region=shape,
        is_blocking=True
    )  # wait until finished copying resulting image back from GPU to CPU

    return imgOut
Beispiel #9
0
	def loadImage(self, imgpath):
		print("Opening image to be processed")
		self.mf = cl.mem_flags

		# GETTING NDARRAY IMAGE AND DATA ABOUT THE IMAGE
		self.img = io.imread(imgpath)
		self.img_dtype = self.img.dtype
		self.img_ndim = self.img.ndim
		self.img_shape = (self.img.shape[1],self.img.shape[0])
		self.img_origin = (0, 0, 0)
		self.img_region = (self.img_shape[0], self.img_shape[1], 1)

		if( self.img.dtype == np.uint8 ):
			self.img_dtype_cl = cl.channel_type.UNORM_INT8

		# GETTING THE DIMENSIONS OF THE IMAGE
		if( self.img_ndim == 1 ):
			# WHAT TO DO IF THE IMAGE IS 1-DIMENSIONAL
			self.img_channel_order_cl = cl.channel_order.LUMINANCE
		elif( self.img_ndim == 2 ):
			# IF THE IMAGE IS 2-DIMENSIONAL, THEN IT IS A SHADES OF GRAY IMAGE
			# AND THE IMAGE TYPE IS LUMINANCE
			self.img_channel_order_cl = cl.channel_order.LUMINANCE
			self.img_nchannels = 1
		elif( self.img_ndim == 3 ):
			# IF THE IMAGE ARRAY IS 3-DIMENSIONAL, THEN IT HAS MORE THAN 1 COLOR CHANNEL

			if( self.img[0,0,:].size == 2 ):
				# THEN IT CAN BE ANY 2-CHANNEL IMAGE.
				# DON'T HAVE ACCES TO ANY IMAGE LIKE THAT YET.
				self.img_nchannels = 2

			if( self.img[0,0,:].size == 3 ):
				# THEN IT CAN BE ANY 3-CHANNEL IMAGE. IS NEEDED TO ADD THE 4TH CHANNEL TO IT
				self.img_nchannels = 3

				"""
					DON'T KNOW YET HOW TO DISCOVER IF IT IS RGB, RBG, BGR, GBR, HSV OR SOME OTHER TYPE.
					FOR NOW, THIS PROGRAM WAS JUST TESTED WITH RGB AND SHOULD WORK WITH ITS 3-CHANNEL VARIANTS.
					HERE IS JUST ADDED AN ALPHA CHANNEL FOR THE IMAGE TO BE IN RGBA (BGRA, RBGA, [...], FORMAT)
				"""

				# TURNING INTO RGBA IMAGE
				self.img_aux = np.empty((self.img.shape[0],self.img.shape[1],4), self.img_dtype)
				self.img_aux[:,:,0] = self.img[:,:,0]
				self.img_aux[:,:,1] = self.img[:,:,1]
				self.img_aux[:,:,2] = self.img[:,:,2]
				self.img_aux[:,:,3] = 255

				self.img = self.img_aux
				self.img_channel_order_cl = cl.channel_order.RGBA

		elif( self.img_ndim == 4 ):
			# THEN IT COULD BE ANY IMAGE WITH 4 COLOR CHANNELS
			# DON'T NEED TO DO ENYTHING WITH THE IMAGE
			# SO, JUST MAKING THE IMAGE FORMAT OBJECT
			self.img_channel_order_cl = cl.channel_order.RGBA

		# SETTING THE OPENCL IMAGE OBJECTS, WITHOUT THE COPY
		self.imgFormat = cl.ImageFormat(self.img_channel_order_cl, self.img_dtype_cl)
		self.img_in_cl = cl.Buffer(self.ctx, self.mf.READ_ONLY, self.img.nbytes)
		self.img_out_cl = cl.Buffer(self.ctx, self.mf.WRITE_ONLY, self.img.nbytes)

		# COPYING NDARRAY IMAGE TO OPENCL IMAGE OBJECT
		cl.enqueue_copy(self.queue, self.img_in_cl, self.img.tobytes(), is_blocking=True)
Beispiel #10
0
	def loadJPG(self, filename):
		img = matplotlib.image.imread(filename)
		
		self.source_width = img.shape[1]
		self.source_height = img.shape[0]
		
		if self.parm("width").eval() != 0:
			self.width = self.parm("width").eval()
		else:
			self.width = self.source_width
					
		if self.parm("height").eval() != 0:
			self.height = self.parm("height").eval() 
		else:
			self.height = self.source_height
			
		r = numpy.array(img[:,:,0],dtype=numpy.int8)
		g = numpy.array(img[:,:,1],dtype=numpy.int8)
		b = numpy.array(img[:,:,2],dtype=numpy.int8)
		
		self.devInBufferR = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=r)
		self.devInBufferG = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=g)
		self.devInBufferB = cl.Image(self.engine.ctx, self.engine.mf.READ_ONLY | self.engine.mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.UNORM_INT8), shape=(self.source_width, self.source_height,), pitches=(self.source_width,), hostbuf=b)
def main():

    imageObjects = [0, 0]

    #if len(sys.argv) != 3:
    #    print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
    #    return 1

    # create context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # load image
    im = Image.open('../datas/f2.jpg')
    if im.mode != "RGBA":
        im = im.convert("RGBA")
    imgSize = im.size
    buffer = im.tobytes()

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNSIGNED_INT8)
    imageObjects[0] = cl.Image(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
        clImageFormat, imgSize, None, buffer)
    imageObjects[1] = cl.Image(ctx, cl.mem_flags.WRITE_ONLY, clImageFormat,
                               imgSize)

    # load the kernel source code
    #kernelFile = open("grayscale.cl", "r")
    kernelSrc = """
    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |   
                          CLK_ADDRESS_CLAMP_TO_EDGE |   
                          CLK_FILTER_NEAREST;  
  
__kernel void rgbaToGrayscale(__read_only image2d_t srcImg,  
                              __write_only image2d_t dstImg)  
{  
    // Converts RGBA image to gray scale intensity using the following formula:   
    // I = 0.2126 * R + 0.7152 * G + 0.0722 * B   
  
    int2 coord = (int2) (get_global_id(0), get_global_id(1));  
    int width = get_image_width(srcImg);  
    int height = get_image_height(srcImg);  
  
    if (coord.x < width && coord.y < height)  
    {  
        uint4 color = read_imageui(srcImg, sampler, coord);  
        float luminance = 0.2126f * color.x + 0.7152f * color.y + 0.0722f * color.z;  
        color.x = color.y = color.z = (uint)luminance;  
          
        // Write the output value to image  
        write_imageui(dstImg, coord, color);  
    }  
}"""

    # Create OpenCL program
    program = cl.Program(ctx, kernelSrc).build()

    # Call the kernel directly
    localWorkSize = (16, 16)
    globalWorkSize = (RoundUp(localWorkSize[0],
                              imgSize[0]), RoundUp(localWorkSize[1],
                                                   imgSize[1]))

    gr = time.time()

    program.rgbaToGrayscale(queue, globalWorkSize, localWorkSize,
                            imageObjects[0], imageObjects[1])

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (imgSize[0], imgSize[1], 1)

    cl.enqueue_read_image(queue, imageObjects[1], origin, region,
                          buffer).wait()
    print(time.time() - gr)

    print("Executed program succesfully.")

    # Save the image to disk
    gsim = Image.frombytes("RGBA", imgSize, buffer.tobytes())
    gsim.save('../temp/cl-out.png')
Beispiel #12
0
    def __init__(self, images, labels):

        self.images = np.asarray(images, dtype=np.float32).flatten()
        self.labels = np.asarray(labels, dtype=np.int32).flatten()
        self.entries = [Entry(self.images[IMG_SIZE * i:IMG_SIZE * (i+1)], \
            self.labels[i]) for i in range(len(self.labels))]

        """
        Populate CL information
        """
        self.cl_dev = clu.Q.device
        self.cl_height = int(self.cl_dev.get_info(cl.device_info \
            .IMAGE3D_MAX_HEIGHT) / IMG_ROWS) * IMG_ROWS
        self.cl_width = int(self.cl_dev.get_info(cl.device_info \
            .IMAGE3D_MAX_WIDTH) / IMG_COLS) * IMG_COLS
        self.cl_size = len(self.images)
        self.cl_depth = math.ceil(self.cl_size / (self.cl_height * \
            self.cl_width))
        self.cl_region = (self.cl_height, self.cl_width, self.cl_depth)
        self.cl_per_row = int(self.cl_width / IMG_COLS)
        self.cl_per_depth = int((self.cl_width * self.cl_height) \
            / IMG_SIZE)
        self.cl_format = cl.ImageFormat(cl.channel_order.R, \
            cl.channel_type.FLOAT)
        self.cl_length = np.int32(len(self.entries))

        """
        Build CL program
        """
        # Macros
        kernel_cl = """
#define PER_ROW {per_row:d}
#define PER_DEPTH {per_depth:d}
#define SIZE {size:d}
#define ROWS {rows:d}
#define COLS {cols:d}
#define HR {hr:f}
#define HC {hc:f}
        """.format(per_row=self.cl_per_row, per_depth=self.cl_per_depth, \
            size=IMG_SIZE, rows=IMG_ROWS, cols=IMG_COLS,\
            hr=(IMG_ROWS - 1) / 2.0, hc=(IMG_COLS - 1) / 2.0)

        # Rest header
        kernel_cl += """
#define CY(ry) -(ry) + HR
#define CX(rx) (rx) - HC
#define RY(cy) HR - (cy)
#define RX(cx) (cx) + HC
// ABS is for absolute coords in the image
// 1 is for destination 0 is for source
#define GL_ID_102 (int4)( \
    get_global_id(1), \
    get_global_id(0), \
    get_global_id(2), \
    0)
#define IDX(abs) abs.z * PER_DEPTH \
    + (int)(abs.y / ROWS) * PER_ROW \
    + (int)(abs.x / COLS)
#define ARY_IDX(idx, abs) idx * SIZE \
    + (abs.y % ROWS) * COLS \
    + abs.x % COLS
#define CARTESIAN(abs) (float4)( \
    (abs.x % COLS) - HC, \
    -(abs.y % ROWS) + HR, \
    abs.z, \
    0.0f)
#define RASTER(cart) (float4)( \
    cart.x + HC, \
    HR - cart.y, \
    cart.z, \
    0.0f)
#define NO_Z_INTERPOLATION(abs) (float4)( \
    abs.x, \
    abs.y, \
    abs.z + 0.5f, /* +0.5f makes z not interpolated */ \
    0.0f)
#define SIGN_INT(i) ((i > 0) - (i < 0))

__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
    CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
__constant sampler_t linear_sampler = CLK_NORMALIZED_COORDS_FALSE |
    CLK_ADDRESS_NONE | CLK_FILTER_LINEAR;

__kernel void ArrayToImage(
    __read_only image3d_t ary,
    __write_only image3d_t img,
    const int length
)
{

    int4 ary_pos = GL_ID_102;
    int ary_idx = ary_pos.z * PER_DEPTH * SIZE + ary_pos.y * COLS \
        * PER_ROW + ary_pos.x;
    int idx = ary_idx / SIZE;

    // return if padded
    if(idx >= length) {
        return;
    }

    // Array index in current depth
    int ary_idx_in_depth = ary_idx % (PER_DEPTH * SIZE);
    // Array index in each image
    int ary_idx_in_each = ary_idx_in_depth % SIZE;

    int idx_in_depth = idx % PER_DEPTH;
    int base_y = (idx_in_depth / PER_ROW) * ROWS;
    int base_x = (idx_in_depth % PER_ROW) * COLS;

    int img_y = base_y + ary_idx_in_each / COLS;
    int img_x = base_x + ary_idx_in_each % COLS;

    int4 img_pos = (int4)(img_x, img_y, ary_pos.z, 0);

    float4 cl = read_imagef(ary, sampler, ary_pos);
    write_imagef(img, img_pos, cl);

}

__kernel void ImageToArray(
    __read_only image3d_t img,
    __write_only image3d_t ary,
    const int length
)
{

    int4 img_pos = GL_ID_102;
    int idx = img_pos.z * PER_DEPTH + (int)(img_pos.y / ROWS) * PER_ROW \
        + (int)(img_pos.x / COLS);

    if(idx >= length) return;

    int idx_in_depth = idx % PER_DEPTH;
    int ary_idx = img_pos.z * PER_DEPTH * SIZE + idx_in_depth * SIZE + \
        (img_pos.y % ROWS) * COLS + (img_pos.x % COLS);
    int ary_idx_in_depth = ary_idx % (PER_DEPTH * SIZE);
    int4 ary_pos = (int4)(
        ary_idx_in_depth % (PER_ROW * COLS),
        ary_idx_in_depth / (PER_ROW * COLS),
        img_pos.z,
        0
    );

    float4 cl = read_imagef(img, sampler, img_pos);
    write_imagef(ary, ary_pos, cl);

}
        """

        # Kernels
        kernel_cl += "\n".join([ \
            self.invert_cl, self.rotate_cl, self.noise_cl, self.scale_cl, \
            self.corner_cl])

        self.program = cl.Program(clu.CTX, kernel_cl).build()
Beispiel #13
0
def gpu_filter(in_put='in.jpg'):
    gpu_filter_ = GpuFilter()
    image_objects = [0, 0]
    # if len(sys.argv) != 3:
    #     print "  : " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
    #     exit()

    # Create an OpenCL context on first available platform
    context, device = gpu_filter_.create_context()

    if context is None:
        print "Failed to create OpenCL context."
        exit()

    # Create a command-queue on the first device available on the context that has been created
    command_queue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        exit()

    # Load input image from file and load it into an OpenCL image object
    image_objects[0], img_size = gpu_filter_.load_image(context, in_put)
    # print image_objects[0], img_size

    # Create output image object
    cl_image_format = cl.ImageFormat(cl.channel_order.RGBA,
                                     cl.channel_type.UNORM_INT8)

    image_objects[1] = cl.Image(context, cl.mem_flags.WRITE_ONLY,
                                cl_image_format, img_size)

    # Create sampler for sampling image object
    sampler = cl.Sampler(
        context,
        False,  # Non-normalized coordinates
        cl.addressing_mode.CLAMP,
        cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = gpu_filter_.create_program(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    local_work_size = (16, 16)
    global_work_size = (gpu_filter_.round_up(local_work_size[0], img_size[0]),
                        gpu_filter_.round_up(local_work_size[1], img_size[1]))
    program.gaussian_filter(command_queue, global_work_size, local_work_size,
                            image_objects[0], image_objects[1], sampler,
                            numpy.int32(img_size[0]), numpy.int32(img_size[1]))

    # Read the output buffer back to the Host
    cl_buffer = numpy.zeros(img_size[0] * img_size[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (img_size[0], img_size[1], 1)
    cl.enqueue_read_image(command_queue, image_objects[1], origin, region,
                          cl_buffer).wait()
    print "Executed program successfully."

    # return the image matrix
    return gpu_filter_.image_convert(cl_buffer, img_size)
def parallelSumRed(imgRGBA, width, height):
    global c1
    global c2
    C = 0.
    F = 259. * (C + 255.) / (255. * (259. - C))
    #print(F)

    #Create buffers
    #host -> device
    width_buf = cl.Buffer(ctx,
                          mf.READ_ONLY | mf.COPY_HOST_PTR,
                          hostbuf=numpy.int32(width))
    height_buf = cl.Buffer(ctx,
                           mf.READ_ONLY | mf.COPY_HOST_PTR,
                           hostbuf=numpy.int32(height))
    dest_sum_buf = cl.Buffer(ctx,
                             mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                             hostbuf=numpy.int32(0))
    dest_sumY_buf = cl.Buffer(ctx,
                              mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                              hostbuf=numpy.int32(0))

    dest_N_buf = cl.Buffer(ctx,
                           mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                           hostbuf=numpy.int32(0))
    F_buf = cl.Buffer(ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=numpy.float32(F))

    clImage = cl.Image(
        ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
        cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
        (640, 480), None, imgRGBA.tostring())
    clOutImage = cl.Image(
        ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR,
        cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
        (640, 480), None, imgRGBA.tostring())

    sampler = cl.Sampler(
        ctx,
        False,  #  Non-normalized coordinates
        cl.addressing_mode.CLAMP_TO_EDGE,
        cl.filter_mode.NEAREST)

    #compile openCL code
    prg = cl.Program(ctx, kernel).build()

    #define grid size
    gridSizeX = 640
    gridSizeY = 480

    globalWorkSize = (gridSizeX, gridSizeY)

    #run kernel
    prg.getLaserCoord(
        queue, globalWorkSize, clImage, clOutImage, sampler, width_buf,
        height_buf, dest_sum_buf, dest_N_buf, dest_sumY_buf
    )  #can't use Intel CPU for now, need to install NVidia drivers; use AMD for now

    #set up output buffers
    sumX = numpy.empty_like(0)
    sumY = numpy.empty_like(0)
    N = numpy.empty_like(0)
    buff = numpy.zeros(
        width * height * 4, numpy.uint8
    )  #output is numpy array of (640, 480, 4); need to convert to RGBA -> RGB -> BGR and then display
    origin = (0, 0, 0)
    region = (width, height, 1)

    #device -> host
    cl.enqueue_copy(queue, sumX,
                    dest_sum_buf)  #from 3rd arg on device to 2nd arg on host
    cl.enqueue_copy(queue, N, dest_N_buf)
    cl.enqueue_copy(queue, sumY, dest_sumY_buf)

    cl.enqueue_read_image(queue, clOutImage, origin, region, buff).wait()

    #print("N = " + str(N) + "; SumX = " + str(sumX) + "; SumY = " + str(sumY))

    #print(buff) #remember that every fourth value is alpha = 255
    offsetX = 0
    offsetY = 0

    if N != 0:
        print("LASER (x,y) = (" + str(sumX / N) + ", " + str(sumY / N) + ")")

    if N > 5:
        offsetX = sumX / N - 320.
        offsetY = sumY / N - 240.

    return (buff, int(offsetX), int(offsetY))
    def get_costvolume(self,
                       left_img,
                       right_img,
                       dispRange=64,
                       thread_num=(16, 16, 4),
                       host_mem=False):
        assert left_img.shape == right_img.shape, "Shape of both imgs are different"

        # image object configuration
        mf = cl.mem_flags
        if len(left_img.shape) == 3:
            assert left_img.shape[2] >= 3, "Unavailable type of imgs"
            # RGB/BGR
            if left_img.shape[2] == 3:
                left_img = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGBA)
                right_img = cv2.cvtColor(right_img, cv2.COLOR_BGR2RGBA)
            fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
        elif len(left_img.shape) == 2:
            # GRAY
            fmt = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
        else:
            print("Unavailable type of imgs")
            exit()

        # arguments
        h = left_img.shape[0]
        w = left_img.shape[1]
        var_height = np.int32(h)
        var_width = np.int32(w)
        var_dispRange = np.int32(dispRange)

        if thread_num is not None:
            assert w % thread_num[0] == 0 and h % thread_num[
                1] == 0 and dispRange % thread_num[2] == 0

        # output
        costvolume = np.zeros((h, w, dispRange),
                              dtype=np.float32)  # main output

        #### numpy to cl::Image2D memory parsing
        ctx = self.ctx
        left_image2D = cl.Image(ctx,
                                mf.READ_ONLY | mf.COPY_HOST_PTR,
                                fmt,
                                shape=(w, h),
                                hostbuf=left_img)
        right_image2D = cl.Image(ctx,
                                 mf.READ_ONLY | mf.COPY_HOST_PTR,
                                 fmt,
                                 shape=(w, h),
                                 hostbuf=right_img)
        cost_buffer = cl.Buffer(ctx, mf.READ_WRITE, size=w * h * dispRange *
                                4)  # (w x h x dispRange) x float

        queue = self.queue
        #### get cost
        self.get_cost(
            queue,
            (w, h, dispRange),  # global size
            (thread_num),  # local size
            left_image2D,
            right_image2D,  # input
            cost_buffer,  # output
            var_width,
            var_dispRange).wait()

        if host_mem:
            costvolume = np.zeros((h, w, dispRange),
                                  dtype=np.float32)  # host memory
            cl.enqueue_copy(queue, costvolume, cost_buffer, is_blocking=True)
            return cost_buffer, costvolume
        else:
            return cost_buffer