コード例 #1
0
    def convert_image_yuv(self, image):
        start = time.time()
        iplanes = image.get_planes()
        width = image.get_width()
        height = image.get_height()
        strides = image.get_rowstride()
        pixels = image.get_pixels()
        assert iplanes==ImageWrapper._3_PLANES, "we only handle planar data as input!"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        assert len(strides)==len(pixels)==3, "invalid number of planes or strides (should be 3)"
        assert width>=self.src_width and height>=self.src_height, "expected source image with dimensions of at least %sx%s but got %sx%s" % (self.src_width, self.src_height, width, height)

        #adjust work dimensions for subsampling:
        #(we process N pixels at a time in each dimension)
        divs = get_subsampling_divs(self.src_format)
        wwidth = dimdiv(self.dst_width, max(x_div for x_div, _ in divs))
        wheight = dimdiv(self.dst_height, max(y_div for _, y_div in divs))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        kernelargs = [self.queue, globalWorkSize, localWorkSize]

        iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8)
        input_images = []
        for i in range(3):
            _, y_div = divs[i]
            plane = pixels[i]
            if type(plane)==str:
                flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
            else:
                flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
            shape = strides[i], self.src_height/y_div
            iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=plane)
            input_images.append(iimage)

        #output image:
        oformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNORM_INT8)
        oimage = pyopencl.Image(self.context, mem_flags.WRITE_ONLY, oformat, shape=(self.dst_width, self.dst_height))

        kernelargs += input_images + [numpy.int32(self.src_width), numpy.int32(self.src_height),
                       numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                       self.sampler, oimage]

        kstart = time.time()
        log("convert_image(%s) calling %s%s after upload took %.1fms",
              image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
        self.queue.finish()
        #free input images:
        for iimage in input_images:
            iimage.release()
        kend = time.time()
        log("%s took %.1fms", self.kernel_function, 1000.0*(kend-kstart))

        out_array = numpy.empty(self.dst_width*self.dst_height*4, dtype=numpy.byte)
        pyopencl.enqueue_read_image(self.queue, oimage, (0, 0), (self.dst_width, self.dst_height), out_array)
        self.queue.finish()
        log("readback using %s took %.1fms", CHANNEL_ORDER_TO_STR.get(self.channel_order), 1000.0*(time.time()-kend))
        self.time += time.time()-start
        self.frames += 1
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, out_array.data, self.dst_format, 24, self.dst_width*4, planes=ImageWrapper.PACKED)
コード例 #2
0
ファイル: gradient.py プロジェクト: Rejzor/Python_stuff
def gpu_gradient():
				
	if len(sys.argv) != 3:
		print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
		return 1
	
	# create context and command queue
	ctx = cl.create_some_context()
	queue = cl.CommandQueue(ctx)
	
	# load image
	im = Image.open(sys.argv[1])
	if im.mode != "RGBA":
		im = im.convert("RGBA")
	imgSize = im.size
	buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4

	
	# Create ouput image object
	clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, 
								cl.channel_type.UNSIGNED_INT8)
	input_image = cl.Image(ctx,
								cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
								clImageFormat,
								imgSize,
								None,
								buffer)
	output_image = cl.Image(ctx,
							cl.mem_flags.WRITE_ONLY,
							clImageFormat,
							imgSize)

	# load the kernel source code
	kernelFile = open("gradient.cl", "r")
	kernelSrc = kernelFile.read()

	# Create OpenCL program
	program = cl.Program(ctx, kernelSrc).build()
	# Call the kernel directly
	globalWorkSize = ( imgSize[0],imgSize[1] ) 
	gpu_start_time = time()
	program.gradient(queue,
							globalWorkSize,
							None,
							input_image,
							output_image)
		
	# Read the output buffer back to the Host
	buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
	origin = ( 0, 0, 0 )
	region = ( imgSize[0], imgSize[1], 1 )
	
	cl.enqueue_read_image(queue, output_image,
						origin, region, buffer).wait()
	
	# Save the image to disk
	gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
	gsim.save("GPU_"+sys.argv[2])
	gpu_end_time = time()
	print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
コード例 #3
0
ファイル: lab7.py プロジェクト: obask/CL
def main():
    imageObjects = [0, 0]
    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1
    # Create an OpenCL context on first available platform
    context, device = CreateContext()
    if context == None:
        print "Failed to create OpenCL context."
        return 1

    # Create a command-queue on the first device available
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1

    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])
    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context, cl.mem_flags.WRITE_ONLY, clImageFormat,
                               imgSize)
    # Create sampler for sampling image object
    sampler = cl.Sampler(
        context,
        False,  # Non-normalized coordinates
        cl.addressing_mode.CLAMP_TO_EDGE,
        cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    localWorkSize = (16, 16)
    globalWorkSize = (RoundUp(localWorkSize[0],
                              imgSize[0]), RoundUp(localWorkSize[1],
                                                   imgSize[1]))
    program.gaussian_filter(commandQueue, globalWorkSize, localWorkSize,
                            imageObjects[0], imageObjects[1], sampler,
                            numpy.int32(imgSize[0]), numpy.int32(imgSize[1]))

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (imgSize[0], imgSize[1], 1)

    cl.enqueue_read_image(commandQueue, imageObjects[1], origin, region,
                          buffer).wait()
    print "Executed program successfully."
    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)
コード例 #4
0
ファイル: ocltypes.py プロジェクト: robintw/gputools
    def get(self, **kwargs):
        queue = get_device().queue
        if hasattr(self,"shape"):
            imshape = self.shape
        else:
            imshape = (self.width,)

        dshape = imshape[::-1]
        if self.format.channel_count>1:
            dshape += (self.format.channel_count,)
        out = np.empty(dshape,dtype=self.dtype)
        pyopencl.enqueue_read_image(queue,self,[0]*len(dshape),imshape,out)

        return out.reshape(dshape)
コード例 #5
0
ファイル: ocltypes.py プロジェクト: robintw/gputools
    def get(self, **kwargs):
        queue = get_device().queue
        if hasattr(self, "shape"):
            imshape = self.shape
        else:
            imshape = (self.width, )

        dshape = imshape[::-1]
        if self.format.channel_count > 1:
            dshape += (self.format.channel_count, )
        out = np.empty(dshape, dtype=self.dtype)
        pyopencl.enqueue_read_image(queue, self, [0] * len(dshape), imshape,
                                    out)

        return out.reshape(dshape)
コード例 #6
0
ファイル: ocltypes.py プロジェクト: gpwright/gputools
    def get(self, **kwargs):
        queue = get_device().queue
        if hasattr(self, "shape"):
            imshape = self.shape
        else:
            imshape = (self.width, )

        dshape = imshape[::-1]
        ndim = len(imshape)
        if self.num_channels > 1:
            dshape += (self.num_channels, )
            #dshape = (self.num_channels,) + dshape
        out = np.empty(dshape, dtype=self.dtype)
        cl.enqueue_read_image(queue, self, [0] * ndim, imshape, out)

        return out
コード例 #7
0
ファイル: ocltypes.py プロジェクト: maweigert/gputools
    def get(self, **kwargs):
        queue = get_device().queue
        if hasattr(self,"shape"):
            imshape = self.shape
        else:
            imshape = (self.width,)


        dshape = imshape[::-1]
        ndim = len(imshape)
        if self.num_channels>1:
            dshape += (self.num_channels,)
            #dshape = (self.num_channels,) + dshape
        out = np.empty(dshape,dtype=self.dtype)
        cl.enqueue_read_image(queue,self,[0]*ndim,imshape,out)

        return out
コード例 #8
0
    def parallel_prediction_errors(self, image):
        """ Get the MILC prediction errors for a 3D image by means of OpenCL accelerated computation

            Keyword arguments:
            image --  a 3D numpy array (bitmap image)

            Return:
            a 3D numpy array of the same shape of "image", containing the prediction errors
        """

        mf = cl.mem_flags
        # Define the image format for the prediction errors
        err_format = cl.ImageFormat(channel_order=cl.channel_order.R,
                                    channel_type=DataType.CL_ERR.value)

        # Define the input image from the numpy 3D array
        source_image = cl.image_from_array(self.ctx, image)

        original_shape = numpy.shape(image)
        cl_shape = list(
            reversed(original_shape))  # inverted shape (pyOpenCL bug?)

        # output image
        output_image = cl.Image(self.ctx,
                                mf.WRITE_ONLY,
                                err_format,
                                shape=cl_shape)

        # sampler. pixels out of range have a value of '0'
        sampler = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)

        # enqueue kernel
        self.program.image_test(self.queue, original_shape, None, source_image,
                                output_image, sampler)

        # read the resulting image into a numpy array
        output_data = numpy.empty(shape=cl_shape, dtype=DataType.ERR.value)
        cl.enqueue_read_image(self.queue, output_image, (0, 0, 0), cl_shape,
                              output_data)

        return output_data.reshape(original_shape)
コード例 #9
0
ファイル: project.py プロジェクト: sam0426933/pyopencl_HW
def main():
    k = 0.5
    # pan_data = scm.imread('taipei_pan.jpg')
    # mul_data = scm.imread('taipei_mul.jpg')
    mul = Image.open("test_mul.jpg")
    pan = Image.open("test_pan.jpg")

    mul_data = np.array(mul)
    pan_data = np.array(pan)

    
    r = mul_data[:, :, 0]
    g = mul_data[:, :, 1]
    b = mul_data[:, :, 2]

    ## float64 to float32
    pan_data = pan_data.astype(np.float32)
    
    r = r.astype(np.float32)
    g = g.astype(np.float32)
    b = b.astype(np.float32)

    if __debug__:
	    print("pan type: " + str(type(pan_data[0,0])))
	    print("mul type: " + str(type(mul_data[:,:,0][0,0])))
	    print("r: " + str(type(r[0,0])))
	    print("g: " + str(type(g[0,0])))
	    print("b: " + str(type(b[0,0])))
    
    time_start = time.time()

    i = (r * 0.171 + g * 0.2 + b * 0.171) / 0.632 ## float64
    if __debug__:
	    print("i: " + str(type(i[0,0])))
    kx__pan_minus_iii = k * (pan_data - i)
    if __debug__:
	    print("kx__pan_minus_iii: " + str(type(kx__pan_minus_iii[0,0])))

    # coe = pan_data / (i + kx__pan_minus_iii) ## float64

    with np.errstate(divide='ignore', invalid='ignore'):
        denominator = (i + kx__pan_minus_iii)
        coe = pan_data / denominator
        coe[denominator == 0] = 0
    if __debug__:
	    print("coe: " + str(type(coe[0,0])))
    nr        = coe * (r + kx__pan_minus_iii)
    ng     = coe * (g + kx__pan_minus_iii)
    nb       = coe * (b + kx__pan_minus_iii)
    output_img = np.empty_like(mul_data)
    if __debug__:
	    print("nr: " + str(type(nr[0,0])))

    finish_time = time.time() - time_start

    # 刪除 nan
    nr = np.nan_to_num(nr)
    ng = np.nan_to_num(ng)
    nb = np.nan_to_num(nb)
    
    # 溢位問題
    nr[nr > 255] = 255
    ng[ng > 255] = 255
    nb[nb > 255] = 255

    # 小於 0 預設也不是 0 (其實也是溢位問題)
    nr[nr < 0] = 0
    ng[ng < 0] = 0
    nb[nb < 0] = 0

    # 預設非四捨五入(rte) 而是無條件捨去
    nr = np.round(nr)
    ng = np.round(ng)
    nb = np.round(nb)
    
    output_img[:, :, 0] = nr
    output_img[:, :, 1] = ng
    output_img[:, :, 2] = nb


    # for index in range(0, len(nr.ravel()), 1):
    #     if nr.ravel()[index] > 255:
    #         print("nr: " + str(nr.ravel()[index]) + ", output_img: " + str(output_img[:, :, 0].ravel()[index]))

    # compare = output_img[:,:,0].ravel()==nr.ravel()
    # com_index = np.where(compare == False)[0]
        
    # print("Different index length: " + str(len(com_index)))
    
    # for dif_id in com_index:
    #     if not math.isnan(nr.ravel()[dif_id]):
    #          print("id : " + str(dif_id) + "  output_img: " + str(output_img[:,:,0].ravel()[dif_id]) + "  nr: " + str(nr.ravel()[dif_id]))
    
    # scm.imsave("output.jpg", output_img)

    print ("finish time:" + str(finish_time) + " s")

    ## ===========================================================
    device = cl.get_platforms()[0].get_devices()[1]

    ctx = cl.Context([device])
    queue = cl.CommandQueue(ctx, 
                            properties=cl.command_queue_properties.PROFILING_ENABLE)


    # convert image
    if mul.mode != "RGBA":
    	mul = mul.convert("RGBA") 
    mulSize = mul.size
	
    # set mul alpha to pan
    mul.putalpha(pan)
    mul_str = mul.tostring()

    
    # image format
    mulImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
    				    cl.channel_type.UNSIGNED_INT8)

    time_start = time.time()

    # input buf (mul+pan)
    mul_buf = cl.Image(ctx,
    		       cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
    		       mulImageFormat,
    		       mulSize,
    		       None,
    		       mul_str)
    
    # create ouput image object
    result_buf = cl.Image(ctx,
    			  cl.mem_flags.WRITE_ONLY,
    			  mulImageFormat,
    			  mulSize)

    finish_time = time.time() - time_start

    print ("host to device finish time:" + str(finish_time) + " s")
	
    # load kernel.cl
    kernelFile = open("kernel.cl", "r")
    kernelSrc = kernelFile.read()

    # create OpenCL program
    program = cl.Program(ctx, kernelSrc).build()
    
    # program parameter
    localWorkSize = (16,  16)
    globalWorkSize = ( RoundUp(localWorkSize[0], mulSize[0]),
    		       RoundUp(localWorkSize[1], mulSize[1]) )


    # warm up
    program.calculate(queue,
    		      globalWorkSize,
    		      localWorkSize,
    		      mul_buf,
    		      result_buf)
    # execute
    exec_evt = program.calculate(queue,
    				 globalWorkSize,
    				 localWorkSize,
    				 mul_buf,
    				 result_buf)
    exec_evt.wait()
    elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)

    print("OpenCL execute success!")
    print("OpenCL finish time: %g s" % elapsed)

    
    # read  output buffer to the Host
    mul_str = np.zeros(mulSize[0] * mulSize[1] * 4, np.uint8)
    origin = ( 0, 0, 0 )
    region = ( mulSize[0], mulSize[1], 1 )

    time_start = time.time()
    

    # cl.enqueue_read_image(queue, result_buf,
    # 			  origin, region, mul_str).wait()
    exec_evt = cl.enqueue_read_image(queue, result_buf,
    				     origin, region, mul_str)

    exec_evt.wait()
    elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)
    print("OpenCL get result success!")
    print("OpenCL get result finish time: %g s" % elapsed)

    # save image
    cl_output_img = Image.fromstring("RGBA", mulSize, mul_str.tostring())
    cl_output_img.save("cl_output.jpg")


    # check result
    result_data =  np.array(cl_output_img)[:,:,[0,1,2]].ravel()
    output_img = output_img.ravel()
    compare = (output_img == result_data)
    equal = np.all(compare)
    # equal_close = np.allclose(output_img, result_data)
    
    if not equal:
        print("Results doesn't match!!")
        print(output_img)
        print("================================================================")
        print(result_data)
        print("================================================================")
        com_index = np.where(compare == False)[0]
        
        print("Different index length: " + str(len(com_index)))

        # for dif_id in com_index:
        #     print("id : " + str(dif_id) + "  ori: " + str(output_img[dif_id]) + "  rst: " + str(result_data[dif_id]))

    else:
        print("Results OK")
コード例 #10
0
def main():
    
    os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'

    imageObjects = [ 0, 0, 0 ]
            
    # Main
    if len(sys.argv) != 4:
        print "USAGE: " + sys.argv[0] + " <source> <palette> <output>"
        return 1
	
    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1
        
    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)
    
    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1
    
    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], srcSize = LoadImage(context, sys.argv[1])
    imageObjects[1], palSize = LoadImage(context, sys.argv[2])

    pixels = srcSize[0] * srcSize[1]

    if (palSize[0] * palSize[1] != pixels):
        print "Images do not contain the same number of pixels."
        return 1

#########################################
###
###
### TODO Rearrange the palette to the correct dimensions here
###
###
#########################################

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, 
                                   cl.channel_type.UNORM_INT8)
    bufferObject = cl.Buffer(context,
                             cl.mem_flags.READ_WRITE,
			     pixels * 4 * 4)
    imageObjects[2] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               srcSize)

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "quad_swap.cl")
    
    # Call the kernel directly
    localWorkSize = (16,)
    globalWorkSize = ( RoundUp(localWorkSize[0], srcSize[0]*srcSize[1]/2), )

    program.quad_swap(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
			    bufferObject,
                            imageObjects[2],
                            sampler,
                            numpy.int32(srcSize[0]),
                            numpy.int32(srcSize[1]))
        
    # Read the output buffer back to the Host
    buffer = numpy.zeros(srcSize[0] * srcSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( srcSize[0], srcSize[1], 1 )
    
    cl.enqueue_read_image(commandQueue, imageObjects[2], origin, region, buffer).wait()
    
    # Save the image to disk
    SaveImage(sys.argv[3], buffer, srcSize)
コード例 #11
0
def main():

    imageObjects = [0, 0]

    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1

    # create context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # load image
    im = Image.open(sys.argv[1])
    if im.mode != "RGBA":
        im = im.convert("RGBA")
    imgSize = im.size
    buffer = im.tostring()

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNSIGNED_INT8)
    imageObjects[0] = cl.Image(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
        clImageFormat, imgSize, None, buffer)
    imageObjects[1] = cl.Image(ctx, cl.mem_flags.WRITE_ONLY, clImageFormat,
                               imgSize)

    # load the kernel source code
    #kernelFile = open("grayscale.cl", "r")
    kernelSrc = """
    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |   
                          CLK_ADDRESS_CLAMP_TO_EDGE |   
                          CLK_FILTER_NEAREST;  
  
__kernel void rgbaToGrayscale(__read_only image2d_t srcImg,  
                              __write_only image2d_t dstImg)  
{  
    // Converts RGBA image to gray scale intensity using the following formula:   
    // I = 0.2126 * R + 0.7152 * G + 0.0722 * B   
  
    int2 coord = (int2) (get_global_id(0), get_global_id(1));  
    int width = get_image_width(srcImg);  
    int height = get_image_height(srcImg);  
    int threshold = 100;
  
    if (coord.x < width && coord.y < height)  
    {  
        
        uint4 color = read_imageui(srcImg, sampler, coord);  
               
        if(color.x>=threshold){  
            color.x = 255;
        }
        else{
        color.x = 0;
        }
        
        if(color.y>=threshold){  
            color.y = 255;
        }
        else{
        color.y = 0;
        }
        
        if(color.z>=threshold){  
            color.z = 255;
        }
        else{
        color.z = 0;
        }
        
        
        
        
          
        // Write the output value to image  
        write_imageui(dstImg, coord, color);  
    }  
    
}"""

    # Create OpenCL program
    program = cl.Program(ctx, kernelSrc).build()

    # Call the kernel directly
    localWorkSize = (16, 16)
    globalWorkSize = (RoundUp(localWorkSize[0],
                              imgSize[0]), RoundUp(localWorkSize[1],
                                                   imgSize[1]))

    gr = time.time()

    program.rgbaToGrayscale(queue, globalWorkSize, localWorkSize,
                            imageObjects[0], imageObjects[1])

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (imgSize[0], imgSize[1], 1)

    cl.enqueue_read_image(queue, imageObjects[1], origin, region,
                          buffer).wait()
    print time.time() - gr

    print "Executed program succesfully."

    # Save the image to disk
    gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
    gsim.save(sys.argv[2])
コード例 #12
0
ファイル: pycltesttex.py プロジェクト: haehn/sandbox
                     cl.channel_type.UNSIGNED_INT8),
                     shape=OutImg.shape)

prg = cl.Program(ctx, """
     const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | 
        CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP_TO_EDGE;

     __kernel void ImageDS(__read_only image2d_t sourceImage, __write_only image2d_t targetImage)
     {

        int w = get_image_width(targetImage);
        int h = get_image_height(targetImage);

        int outX = get_global_id(0);
        int outY = get_global_id(1);
        int2 posOut = {outX, outY};

        float inX = outX / (float) w;
        float inY = outY / (float) h;
        float2 posIn = (float2) (inX, inY);

        float4 pixel = read_imagef(sourceImage, sampler, posIn);
        write_imagef(targetImage, posOut, pixel);

     }
     """).build()

prg.ImageDS(queue, OutImg.shape, None, dev_Img, dev_OutImg)
cl.enqueue_read_image(queue, dev_OutImg, (0, 0), OutImg.shape, OutImg).wait()
cv2.imwrite("out.jpg", OutImg)
コード例 #13
0
def main():

    imageObjects = [ 0, 0 ]

    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1


    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1

    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1

    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               imgSize)

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    localWorkSize = ( 16, 16 )
    globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
                       RoundUp(localWorkSize[1], imgSize[1]) )

    program.gaussian_filter(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
                            sampler,
                            numpy.int32(imgSize[0]),
                            numpy.int32(imgSize[1]))

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( imgSize[0], imgSize[1], 1 )

    cl.enqueue_read_image(commandQueue, imageObjects[1],
                          origin, region, buffer).wait()

    print "Executed program succesfully."

    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)
コード例 #14
0
def gpu_filter(in_put='in.jpg'):
    gpu_filter_ = GpuFilter()
    image_objects = [0, 0]
    # if len(sys.argv) != 3:
    #     print "  : " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
    #     exit()

    # Create an OpenCL context on first available platform
    context, device = gpu_filter_.create_context()

    if context is None:
        print "Failed to create OpenCL context."
        exit()

    # Create a command-queue on the first device available on the context that has been created
    command_queue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        exit()

    # Load input image from file and load it into an OpenCL image object
    image_objects[0], img_size = gpu_filter_.load_image(context, in_put)
    # print image_objects[0], img_size

    # Create output image object
    cl_image_format = cl.ImageFormat(cl.channel_order.RGBA,
                                     cl.channel_type.UNORM_INT8)

    image_objects[1] = cl.Image(context, cl.mem_flags.WRITE_ONLY,
                                cl_image_format, img_size)

    # Create sampler for sampling image object
    sampler = cl.Sampler(
        context,
        False,  # Non-normalized coordinates
        cl.addressing_mode.CLAMP,
        cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = gpu_filter_.create_program(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    local_work_size = (16, 16)
    global_work_size = (gpu_filter_.round_up(local_work_size[0], img_size[0]),
                        gpu_filter_.round_up(local_work_size[1], img_size[1]))
    program.gaussian_filter(command_queue, global_work_size, local_work_size,
                            image_objects[0], image_objects[1], sampler,
                            numpy.int32(img_size[0]), numpy.int32(img_size[1]))

    # Read the output buffer back to the Host
    cl_buffer = numpy.zeros(img_size[0] * img_size[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (img_size[0], img_size[1], 1)
    cl.enqueue_read_image(command_queue, image_objects[1], origin, region,
                          cl_buffer).wait()
    print "Executed program successfully."

    # return the image matrix
    return gpu_filter_.image_convert(cl_buffer, img_size)
コード例 #15
0
def copy_image_to_host(queue, output, size, dtype):
    buffer = np.zeros(size[0] * size[1] * 4, dtype=dtype)
    origin = (0, 0, 0)
    region = (size[0], size[1], 1)
    cl.enqueue_read_image(queue, output, origin, region, buffer).wait()
    return buffer
コード例 #16
0
def parallelSumRed(imgRGBA, width, height):
    global c1
    global c2
    C = 0.
    F = 259.*(C + 255.)/(255.*(259. - C))
    #print(F)

    #Create buffers
    #host -> device
    width_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.int32(width))
    height_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.int32(height))
    dest_sum_buf = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.int32(0))
    dest_sumY_buf = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.int32(0))

    dest_N_buf = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.int32(0))
    F_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.float32(F))
    
    clImage = cl.Image(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
                               (640, 480), None, imgRGBA.tostring() )
    clOutImage = cl.Image(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
                               (640, 480), None, imgRGBA.tostring() )

    sampler = cl.Sampler(ctx,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    #compile openCL code
    prg = cl.Program(ctx, kernel).build()

    #define grid size
    gridSizeX = 640
    gridSizeY = 480
    
    globalWorkSize = (gridSizeX, gridSizeY)

    #run kernel
    prg.getLaserCoord(queue, globalWorkSize, 
            clImage, clOutImage, sampler, width_buf, height_buf, dest_sum_buf, dest_N_buf, dest_sumY_buf) #can't use Intel CPU for now, need to install NVidia drivers; use AMD for now

    #set up output buffers
    sumX = numpy.empty_like(0)
    sumY = numpy.empty_like(0)
    N = numpy.empty_like(0)
    buff = numpy.zeros(width * height * 4, numpy.uint8) #output is numpy array of (640, 480, 4); need to convert to RGBA -> RGB -> BGR and then display
    origin = (0,0,0)
    region = (width, height,1)

    #device -> host
    cl.enqueue_copy(queue, sumX, dest_sum_buf) #from 3rd arg on device to 2nd arg on host
    cl.enqueue_copy(queue, N, dest_N_buf)
    cl.enqueue_copy(queue, sumY, dest_sumY_buf)
    
    cl.enqueue_read_image(queue, clOutImage, origin, region, buff).wait()
    
    #print("N = " + str(N) + "; SumX = " + str(sumX) + "; SumY = " + str(sumY))
    
    #print(buff) #remember that every fourth value is alpha = 255
    offsetX = 0
    offsetY = 0
    
    if N!=0:
        print("LASER (x,y) = (" + str(sumX/N) + ", " + str(sumY/N) + ")")

    if N>5:
        offsetX = sumX/N-320.
        offsetY = sumY/N-240.

    return (buff, int(offsetX), int(offsetY))
コード例 #17
0
# 打开图片文件
src1 = Image.open('temp/images/f1.png')
src2 = Image.open('temp/images/f2.png')
dist = Image.new('RGBA',(640,480),(255,255,255))

# OpenCL处理的图片文件格式RGBA,unit8
imageFormat = cl.ImageFormat(cl.channel_order.RGBA,cl.channel_type.UNSIGNED_INT8)

# 将图片从Host复制到Device
img1 = cl.Image(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,imageFormat,src1.size,None,src1.tobytes())
img2 = cl.Image(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,imageFormat,src2.size,None,src2.tobytes())
output = cl.Image(context=ctx,flags=mf.WRITE_ONLY,format=imageFormat,shape=src1.size)

# 根据图片大小定义WorkSize
localWorkSize = ( 8, 8 )  
globalWorkSize = ( RoundUp(localWorkSize[0], src1.size[0]),  
                    RoundUp(localWorkSize[1], src1.size[1]))
# 执行Kernel
prg.image_add(queue,globalWorkSize,localWorkSize,img1,img2,output)


buffer = np.zeros(src1.size[0] * src1.size[1] * 4, np.uint8)  
origin = ( 0, 0, 0 )  
region = ( src1.size[0], src1.size[1], 1 )  
# 将处理好的图片从设备复制到HOST 
cl.enqueue_read_image(queue, output,
                        origin, region, buffer).wait()
# 保存图片
dist = Image.frombytes("RGBA",src1.size, buffer.tobytes())
dist.save('temp/images/cl-output.png')
dist.show()
コード例 #18
0
gray_image = cl.Image(gpu_context, memory_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)
gaussian_image = cl.Image(gpu_context, cl.mem_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)
sobel_image = cl.Image(gpu_context, cl.mem_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)
angles_image = cl.Image(gpu_context, cl.mem_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)
edges_image = cl.Image(gpu_context, cl.mem_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)
thin_edges_image = cl.Image(gpu_context, cl.mem_flags.READ_ONLY, cl_single_chanel_image_format, frame_size)

sobel_mask_x = numpy.array([-1,0,1,-2,0,2,-1,0,1], dtype=numpy.int32)
sobel_mask_y = numpy.array([-1,-2,-1,0,0,0,1,2,1], dtype=numpy.int32)
gaussian_mask = numpy.array([2,4,5,4,2,4,9,12,9,4,5,12,15,12,5,4,9,12,9,4,2,4,5,4,2], dtype=numpy.int32)
sobel_x_buffer = cl.Buffer(gpu_context, memory_flags.READ_ONLY | memory_flags.COPY_HOST_PTR, hostbuf=sobel_mask_x)
sobel_y_buffer = cl.Buffer(gpu_context, memory_flags.READ_ONLY | memory_flags.COPY_HOST_PTR, hostbuf=sobel_mask_y)
gaussian_buffer = cl.Buffer(gpu_context, memory_flags.READ_ONLY | memory_flags.COPY_HOST_PTR, hostbuf=gaussian_mask)

while True:
  frame = cv.QueryFrame(stream)
	cv.ShowImage("camera_window1", frame)
	cv.CvtColor( frame, img, cv.CV_RGB2RGBA)
	frame_string = cv.GetMat(img).tostring()
	clImage = cl.Image(gpu_context,cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,rgba_image_format,frame_size,None,frame_string)
	event = gpu_program.convert_to_gray(command_queue,globalWorkSize,localWorkSize,clImage, gray_image, sampler, numpy.int32(frame_x), numpy.int32(frame_y)).wait()
	event2 = gpu_program.apply_gaussian_mask(command_queue,globalWorkSize,localWorkSize,gray_image, gaussian_image,gaussian_buffer, sampler, numpy.int32(frame_x), numpy.int32(frame_y)).wait()
	event2 = gpu_program.apply_sobel_mask(command_queue,globalWorkSize,localWorkSize,gaussian_image, sobel_image,angles_image,sobel_x_buffer,sobel_y_buffer, sampler, numpy.int32(frame_x), numpy.int32(frame_y)).wait()
	event2 = gpu_program.find_edges(command_queue,globalWorkSize,localWorkSize,sobel_image, edges_image,angles_image,sampler, numpy.int32(frame_x), numpy.int32(frame_y)).wait()
	event2 = gpu_program.suppress_edges(command_queue,globalWorkSize,localWorkSize,edges_image,sobel_image,angles_image, thin_edges_image,sampler, numpy.int32(frame_x), numpy.int32(frame_y)).wait()
	event3 = cl.enqueue_read_image(command_queue, thin_edges_image,origin, region, thin_edges_array).wait()
	cv.ShowImage("camera_window3", cv.fromarray(thin_edges_array.reshape(frame_y,frame_x)))
	if cv.WaitKey(10) == 27:
		breakcv.DestroyWindow("camera_window")
コード例 #19
0
			               np.int32(imgSize[0]),
			               np.int32(imgSize[1])).wait()
        for i in xrange(ntimes):
            if i%2 == 0:
                m, n = 1, 2
            else:
                m, n = 2, 1
            program.blur_filter(commandQueue, globalWorkSize, None,
                                imageObjects[m], imageObjects[n], sampler,
                                np.int32(imgSize[0]),
                                np.int32(imgSize[1])).wait()
        t13=time.clock()
        print t13 - t12, " Run Kernel..."
        t14 = time.clock()
        buf2 = np.zeros(imgSize[0] * imgSize[1] * 4, np.uint8)
        cl.enqueue_read_image(commandQueue, imageObjects[2],
                              origin, region, buf2, is_blocking=True)
        IMG_3 = buf2.reshape(imgSize[1], imgSize[0], 4)
        t15 = time.clock()
        print t15 - t14, " Read Image from GPU..."
        print "Executed program succesfully."
        t16 = time.clock()
        print t16 - t4, " Total GPU..."

    else: ## device == CPU
        t4 = time.clock()
        im = Image.open(inFile)
        img = np.array(im)
        IMG_1 = scale_img(img, 8)
        img1 = Image.fromarray(IMG_1)
        # Make sure the image is RGBA formatted
        if img1.mode != "RGBA":
コード例 #20
0
import os, glob
directory = os.path.dirname(filename)
files = glob.glob(directory+'/thumbnail_*')

for filename in files:
    

  # load a 512x512 image
  Img = cv2.imread(filename, cv2.CV_LOAD_IMAGE_GRAYSCALE)

  OutImg = np.empty(shape=(width/factor, height/factor), dtype=np.uint8) # create Output-Image
  # OutImg = np.empty(shape=(100,100), dtype=np.uint8) # create Output-Image

  mf = cl.mem_flags
  dev_Img = cl.Image(ctx,
                       mf.READ_ONLY | mf.USE_HOST_PTR,
                       cl.ImageFormat(cl.channel_order.R,     
                       cl.channel_type.UNSIGNED_INT8),
                       hostbuf=Img)
  dev_OutImg = cl.Image(ctx,
                       mf.WRITE_ONLY,
                       cl.ImageFormat(cl.channel_order.R,     
                       cl.channel_type.UNSIGNED_INT8),
                       shape=OutImg.shape)


  prg.ImageDS(queue, OutImg.shape, None, dev_Img, dev_OutImg)
  cl.enqueue_read_image(queue, dev_OutImg, (0, 0), OutImg.shape, OutImg).wait()
  # cv2.imwrite("/tmp/sub_cl.jpg", OutImg)
コード例 #21
0
def main():

    imageObjects = [ 0, 0, 0, 0 ]

    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1


    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1

    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1

    print("Device Global Memory Size => %s MB"%(device.global_mem_size/(1024*1024)))
    print("Device Max Memory Allocation Size => %s MB"%(device.max_mem_alloc_size/(1024*1024)))

    cl_kernels = {}
    host_kernels = {}


    for cell_type in ganglion_cells:
        for centre_type in ganglion_cells[cell_type]:

            width = ganglion_cells[cell_type][centre_type]['width']
            pos_sigma = ganglion_cells[cell_type][centre_type]['sigma']
            neg_sigma = 3.*pos_sigma

            pos_gaussian = makeGaussianKernel(width, pos_sigma).flatten().astype(numpy.float32)
            neg_gaussian = makeGaussianKernel(width, neg_sigma).flatten().astype(numpy.float32)
            diff_of_gaussian = (pos_gaussian - neg_gaussian)
            host_kernels["%s-%s"%(cell_type, centre_type)] = diff_of_gaussian
            cl_kernels["%s-%s"%(cell_type, centre_type)] = cl.Buffer(context,
                                                                     cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                                                                     hostbuf=(host_kernels["%s-%s"%(cell_type, centre_type)]))




    print(host_kernels)
    cap = cv2.VideoCapture('sts-11-landing.webm')

    ret, frame = cap.read()
    height, width, channels = frame.shape
    print("VIDEO width = %s, height = %s, channels = %s"%(width, height, channels))
    frame_buffer = numpy.zeros(width * height * channels, numpy.uint8)

    numBits = 8
    imgSize = (width, height)
    fourcc = cv.CV_FOURCC(*'XVID')
    fps = 20.0
    out = cv2.VideoWriter('output.avi',fourcc, fps, (3*width, 2*height))


    # Load input image from file and load it into
    # an OpenCL image object
    # imageObjects[0], imgSize = LoadImage(context, sys.argv[1])


    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGB,
                                   cl.channel_type.UNORM_INT8)


    frame_buffer = numpy.array( frame[:,:] ).flatten().astype(numpy.uint8)#.tostring()
    print(frame_buffer)
    imageObjects[0] =  cl.Buffer(context,
                                 cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
                                 #clImageFormat,
                                 #imgSize,
                                 hostbuf=(frame_buffer))

    print("CL img 0 -> (%s, %s, %s)"%(imageObjects[0].width, imageObjects[0].height, imageObjects[0].element_size))

    imageObjects[1] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               imgSize)

    print("CL img 1 -> (%s, %s, %s)"%(imageObjects[1].width, imageObjects[1].height, imageObjects[1].element_size))

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")



    commandQueue.finish()

    #print("Device => %s "%(device.global_mem_size/(1024*1024)))

    #print("first frame copy")
    vid2CL(commandQueue, width, height, frame, imageObjects[0])

    while(cap.isOpened()):

        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
        composite = cv.CreateImage((width*3, height*2), numBits, channels)

        for cell_type in ganglion_cells:
            for centre_type in ganglion_cells[cell_type]:
                print("%s -> %s"%(cell_type, centre_type))

                k_width = ganglion_cells[cell_type][centre_type]['width']
                local = 16 #minPowerOf2(width)
                localWorkSize = ( local, local )
                globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
                                   RoundUp(localWorkSize[1], imgSize[1]) )

                print("starting convolution")
                start_time = time.time()

                program.convolution(commandQueue, globalWorkSize, localWorkSize,
                                    imageObjects[0], imageObjects[1],
                                    cl_kernels["%s-%s"%(cell_type, centre_type)],
                                    numpy.int32(k_width),
                                    sampler,
                                    numpy.int32(imgSize[0]),
                                    numpy.int32(imgSize[1]))
                print("end of %s-%s convolution %s (sec)"%(cell_type, centre_type, time.time()-start_time))

                # Read the output buffer back to the Host
                buff = numpy.zeros(width * height * channels, numpy.uint8)
                origin = ( 0, 0, 0 )
                region = ( imgSize[0], imgSize[1], 1 )

                cl.enqueue_read_image(commandQueue, imageObjects[1], origin, region, buff).wait()

                x_steps = ganglion_cells[cell_type][centre_type]['out_x']
                y_steps = ganglion_cells[cell_type][centre_type]['out_y']
                out_origin = (x_steps*width, y_steps*height)

                buff2CV(imgSize, channels, out_origin, buff, composite)

                #SaveImage("%s-%s---%s"%(cell_type, centre_type, sys.argv[2]), buff, imgSize)


        out.write(composite)
        ret, frame = cap.read()
        print("later frame copy")
        vid2CL(commandQueue, width, height, frame, imageObjects[0])

    print "Executed program succesfully."
コード例 #22
0
def parallelSumRed(imgRGBA, width, height):
    global c1
    global c2
    C = 0.
    F = 259. * (C + 255.) / (255. * (259. - C))
    #print(F)

    #Create buffers
    #host -> device
    width_buf = cl.Buffer(ctx,
                          mf.READ_ONLY | mf.COPY_HOST_PTR,
                          hostbuf=numpy.int32(width))
    height_buf = cl.Buffer(ctx,
                           mf.READ_ONLY | mf.COPY_HOST_PTR,
                           hostbuf=numpy.int32(height))
    dest_sum_buf = cl.Buffer(ctx,
                             mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                             hostbuf=numpy.int32(0))
    dest_sumY_buf = cl.Buffer(ctx,
                              mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                              hostbuf=numpy.int32(0))

    dest_N_buf = cl.Buffer(ctx,
                           mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                           hostbuf=numpy.int32(0))
    F_buf = cl.Buffer(ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=numpy.float32(F))

    clImage = cl.Image(
        ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
        cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
        (640, 480), None, imgRGBA.tostring())
    clOutImage = cl.Image(
        ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR,
        cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8),
        (640, 480), None, imgRGBA.tostring())

    sampler = cl.Sampler(
        ctx,
        False,  #  Non-normalized coordinates
        cl.addressing_mode.CLAMP_TO_EDGE,
        cl.filter_mode.NEAREST)

    #compile openCL code
    prg = cl.Program(ctx, kernel).build()

    #define grid size
    gridSizeX = 640
    gridSizeY = 480

    globalWorkSize = (gridSizeX, gridSizeY)

    #run kernel
    prg.getLaserCoord(
        queue, globalWorkSize, clImage, clOutImage, sampler, width_buf,
        height_buf, dest_sum_buf, dest_N_buf, dest_sumY_buf
    )  #can't use Intel CPU for now, need to install NVidia drivers; use AMD for now

    #set up output buffers
    sumX = numpy.empty_like(0)
    sumY = numpy.empty_like(0)
    N = numpy.empty_like(0)
    buff = numpy.zeros(
        width * height * 4, numpy.uint8
    )  #output is numpy array of (640, 480, 4); need to convert to RGBA -> RGB -> BGR and then display
    origin = (0, 0, 0)
    region = (width, height, 1)

    #device -> host
    cl.enqueue_copy(queue, sumX,
                    dest_sum_buf)  #from 3rd arg on device to 2nd arg on host
    cl.enqueue_copy(queue, N, dest_N_buf)
    cl.enqueue_copy(queue, sumY, dest_sumY_buf)

    cl.enqueue_read_image(queue, clOutImage, origin, region, buff).wait()

    #print("N = " + str(N) + "; SumX = " + str(sumX) + "; SumY = " + str(sumY))

    #print(buff) #remember that every fourth value is alpha = 255
    offsetX = 0
    offsetY = 0

    if N != 0:
        print("LASER (x,y) = (" + str(sumX / N) + ", " + str(sumY / N) + ")")

    if N > 5:
        offsetX = sumX / N - 320.
        offsetY = sumY / N - 240.

    return (buff, int(offsetX), int(offsetY))
コード例 #23
0
ファイル: pycl_new3.py プロジェクト: haehn/sandbox
#
img = cv2.imread(sys.argv[1], cv2.CV_LOAD_IMAGE_GRAYSCALE)
img_width, img_height = img.shape

mf = cl.mem_flags
in_image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.UNSIGNED_INT8)
in_image = cl.Image(p.context, mf.READ_ONLY | mf.USE_HOST_PTR, in_image_format, hostbuf=img)

#
# create output buffer
#
out_buffer = np.zeros(shape=(img_width/2, img_height/2), dtype=np.uint8)

#
# create ouput image object
#
# out_image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.UNSIGNED_INT8)
out_image = cl.Image(p.context, mf.WRITE_ONLY, in_image_format, out_buffer.shape)

#
# call kernel
#
p.program.downsample(p.queue, out_buffer.shape, None, in_image, out_image)

#
# read output
#
cl.enqueue_read_image(p.queue, out_image, (0,0), out_buffer.shape, out_buffer).wait()

# cv2.imwrite('/tmp/pycl_tex_z1.jpg', out_buffer)