def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors): # create Cheetah template and fill in variables for mask kernel mask_template = Template(mask_source) mask_template.BLOCK_DIM_X = b_size[0] mask_template.BLOCK_DIM_Y = b_size[1] mask_template.WIDTH = dest_im.shape[1] mask_template.HEIGHT = dest_im.shape[0] mask_template.RGB = RGB mask_template.NEIGHBORS = neighbors # compile the CUDA kernel mask_kernel = cuda_compile(mask_template, "mask_kernel") # alloc memory to GPU d_source = cu.mem_alloc(source_im.nbytes) cu.memcpy_htod(d_source, source_im) # sends to GPU filter out interior points in the mask mask_kernel(d_source, block=b_size, grid=g_size) # retrieves interior point buffer from GPU inner_buffer = np.array(dest_im, dtype =np.uint8) cu.memcpy_dtoh(inner_buffer, d_source) # returns the interior buffer return inner_buffer
def poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, interior_buffer, n): # create Cheetah template and fill in variables for Poisson kernal template = Template(poisson_blending_source) template.BLOCK_DIM_X = b_size[0] template.BLOCK_DIM_Y = b_size[1] template.WIDTH = dest_im.shape[1] template.HEIGHT = dest_im.shape[0] template.RGB = RGB template.NEIGHBORS = neighbors # compile the CUDA kernel poisson_blending_kernel = cuda_compile(template, "poisson_blending_kernel") # alloc memory in GPU out_image = np.array(dest_im, dtype =np.uint8) d_source, d_destination, d_buffer= cu.mem_alloc(source_im.nbytes), cu.mem_alloc(dest_im.nbytes), cu.mem_alloc(interior_buffer.nbytes) cu.memcpy_htod(d_source, source_im) cu.memcpy_htod(d_destination, dest_im) cu.memcpy_htod(d_buffer, interior_buffer) # calls CUDA for Poisson Blending n # of times for i in range(n): poisson_blending_kernel(d_source, d_destination, d_buffer, block=b_size, grid=g_size) # retrieves the final output image and returns cu.memcpy_dtoh(out_image, d_destination) return out_image