コード例 #1
0
def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors):
	# create Cheetah template and fill in variables for mask kernel
	mask_template = Template(mask_source)
	mask_template.BLOCK_DIM_X = b_size[0]
  	mask_template.BLOCK_DIM_Y = b_size[1]
  	mask_template.WIDTH = dest_im.shape[1]
  	mask_template.HEIGHT = dest_im.shape[0]
  	mask_template.RGB = RGB
  	mask_template.NEIGHBORS = neighbors

  	# compile the CUDA kernel
  	mask_kernel = cuda_compile(mask_template, "mask_kernel")

  	# alloc memory to GPU
  	d_source = cu.mem_alloc(source_im.nbytes)
  	cu.memcpy_htod(d_source, source_im)

  	# sends to GPU filter out interior points in the mask
  	mask_kernel(d_source, block=b_size, grid=g_size)

  	# retrieves interior point buffer from GPU
  	inner_buffer = np.array(dest_im, dtype =np.uint8)
  	cu.memcpy_dtoh(inner_buffer, d_source)

  	# returns the interior buffer
  	return inner_buffer
コード例 #2
0
def poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, interior_buffer, n):
	# create Cheetah template and fill in variables for Poisson kernal
  	template = Template(poisson_blending_source)
  	template.BLOCK_DIM_X = b_size[0]
  	template.BLOCK_DIM_Y = b_size[1]
  	template.WIDTH = dest_im.shape[1]
  	template.HEIGHT = dest_im.shape[0]
  	template.RGB = RGB
  	template.NEIGHBORS = neighbors

  	# compile the CUDA kernel
  	poisson_blending_kernel = cuda_compile(template, "poisson_blending_kernel")

  	# alloc memory in GPU
  	out_image = np.array(dest_im, dtype =np.uint8)
  	d_source, d_destination, d_buffer= cu.mem_alloc(source_im.nbytes), cu.mem_alloc(dest_im.nbytes), cu.mem_alloc(interior_buffer.nbytes)
  	cu.memcpy_htod(d_source, source_im)
  	cu.memcpy_htod(d_destination, dest_im)
  	cu.memcpy_htod(d_buffer, interior_buffer)

  	# calls CUDA for Poisson Blending n # of times
  	for i in range(n):
		poisson_blending_kernel(d_source, d_destination, d_buffer, block=b_size, grid=g_size)

	# retrieves the final output image and returns
	cu.memcpy_dtoh(out_image, d_destination)
  	return out_image
コード例 #3
0
def make_frame_processor(frame_shape, frame_filter_data):
	"""
	Return a function that takes a frame and returns the filtered frame.
	"""
	size, scale, offset, F = frame_filter_data
	frame_filter = np.asarray(F, dtype=np.float32).reshape(size) / scale

	# calculate offsets based on size of filter
	min_offset= (size[0] - 1 )/2*-1
	max_offset = (size[0]- 1)/2 + 1



	# make list of offsets to apply to pixel and neighbors in kernel
	offset_list = []
	filter_list = []
	for a in xrange(min_offset, max_offset):
		for b in xrange(min_offset, max_offset):
			offset_list.append([a,b])

	# append the filter to each pair of offsets
	for f in frame_filter:
		for ff in f:
			filter_list.append(ff)
	
	comb_list = zip(offset_list, filter_list)
	#print comb_list
	# create a list consisting of (offset a, offset b, filter value)
	final_list = []
	for t in comb_list:
		t[0].append(t[1])
		final_list.append((t[0]))

	# Block size (threads per block)
	b_size = (32, 32, 1)  

	print 'frame shape: ' + str(frame_shape) 
	# Grid size (blocks per grid)
	g_size = (int(np.ceil(float(frame_shape[1])/b_size[0])), int(np.ceil(float(frame_shape[0])/b_size[1])))
	
	# initialize template and hard code variables
	template = Template(filter_source)
	template.LIST = final_list
	template.HEIGHT, template.WIDTH, _ = frame_shape
	template.MAX_OFF = max_offset - 1
	template.MIN_OFF = min_offset
	#print template
		
	# Compile the CUDA Kernel
	module = nvcc.SourceModule(template)
	# Return a handle to the compiled CUDA kernel
	filter_kernel = module.get_function("filter_kernel")
  
	def processor(frame):
		"""Applies the frame_filter 2D array to each channel of the image"""
		
		# allocate memory and transfer from host to device
		d_frame_in, d_frame_out = cu.mem_alloc(frame.nbytes), cu.mem_alloc(frame.nbytes) #, cu.mem_alloc(offset.nbytes), cu.mem_alloc(F.nbytes)
		cu.memcpy_htod(d_frame_in, frame)
		cu.memcpy_htod(d_frame_out, frame)
		
		filter_kernel(d_frame_in, d_frame_out, block=b_size, grid= g_size)

		# transfer from device to host
		cu.memcpy_dtoh(frame, d_frame_out)
		return frame
  
	# Return the function
	return processor