def tv2(data, weight, Niter=50): """ chambolles tv regularized denoising weight should be around 2+1.5*noise_sigma """ prog = OCLProgram(abspath("kernels/tv2.cl")) data_im = OCLImage.from_array(data.astype(np, float32, copy=False)) pImgs = [ dev.createImage( data.shape[::-1], mem_flags=cl.mem_flags.READ_WRITE, dtype=np.float32, channel_order=cl.channel_order.RGBA ) for i in range(2) ] outImg = dev.createImage(data.shape[::-1], dtype=np.float32, mem_flags=cl.mem_flags.READ_WRITE) dev.writeImage(inImg, data.astype(np.float32)) dev.writeImage(pImgs[0], np.zeros((4,) + data.shape, dtype=np.float32)) dev.writeImage(pImgs[1], np.zeros((4,) + data.shape, dtype=np.float32)) for i in range(Niter): proc.runKernel("div_step", inImg.shape, None, inImg, pImgs[i % 2], outImg) proc.runKernel("grad_step", inImg.shape, None, outImg, pImgs[i % 2], pImgs[1 - i % 2], np.float32(weight)) return dev.readImage(outImg, dtype=np.float32)
def bilateral2(data, fSize, sigma_p, sigma_x = 10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32:"bilat2_float", np.uint16:"bilat2_short"} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral2.cl")) prog.run_kernel(dtypes_kernels[dtype], img.shape,None, img,res.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(fSize),np.float32(sigma_x),np.float32(sigma_p)) return res.get()
def tv2(data, weight, Niter=50): """ chambolles tv regularized denoising weight should be around 2+1.5*noise_sigma """ prog = OCLProgram(abspath("kernels/tv2.cl")) data_im = OCLImage.from_array(data.astype(np, float32, copy=False)) pImgs = [ dev.createImage(data.shape[::-1], mem_flags=cl.mem_flags.READ_WRITE, dtype=np.float32, channel_order=cl.channel_order.RGBA) for i in range(2) ] outImg = dev.createImage(data.shape[::-1], dtype=np.float32, mem_flags=cl.mem_flags.READ_WRITE) dev.writeImage(inImg, data.astype(np.float32)) dev.writeImage(pImgs[0], np.zeros((4, ) + data.shape, dtype=np.float32)) dev.writeImage(pImgs[1], np.zeros((4, ) + data.shape, dtype=np.float32)) for i in range(Niter): proc.runKernel("div_step", inImg.shape, None, inImg, pImgs[i % 2], outImg) proc.runKernel("grad_step", inImg.shape, None, outImg, pImgs[i % 2], pImgs[1 - i % 2], np.float32(weight)) return dev.readImage(outImg, dtype=np.float32)
def nlm3(data,sigma, size_filter = 2, size_search = 3): """for noise level of sigma_0, choose sigma = 1.5*sigma_0 """ prog = OCLProgram(abspath("kernels/nlm3.cl"), build_options="-D FS=%i -D BS=%i"%(size_filter,size_search)) data = data.astype(np.float32, copy = False) img = OCLImage.from_array(data) distImg = OCLImage.empty_like(data) distImg = OCLImage.empty_like(data) tmpImg = OCLImage.empty_like(data) tmpImg2 = OCLImage.empty_like(data) accBuf = OCLArray.zeros(data.shape,np.float32) weightBuf = OCLArray.zeros(data.shape,np.float32) for dx in range(size_search+1): for dy in range(-size_search,size_search+1): for dz in range(-size_search,size_search+1): prog.run_kernel("dist",img.shape,None, img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz)) prog.run_kernel("convolve",img.shape,None, tmpImg,tmpImg2,np.int32(1)) prog.run_kernel("convolve",img.shape,None, tmpImg2,tmpImg,np.int32(2)) prog.run_kernel("convolve",img.shape,None, tmpImg,distImg,np.int32(4)) prog.run_kernel("computePlus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) if any([dx,dy,dz]): prog.run_kernel("computeMinus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) acc = accBuf.get() weights = weightBuf.get() return acc/weights
def nlm3(data,sigma, size_filter = 2, size_search = 3): """for noise level of sigma_0, choose sigma = 1.5*sigma_0 """ prog = OCLProgram(abspath("kernels/nlm3.cl"), build_options="-D FS=%i -D BS=%i"%(size_filter,size_search)) img = OCLImage.from_array(data) distImg = OCLImage.empty_like(data) distImg = OCLImage.empty_like(data) tmpImg = OCLImage.empty_like(data) tmpImg2 = OCLImage.empty_like(data) accBuf = OCLArray.zeros(data.shape,np.float32) weightBuf = OCLArray.zeros(data.shape,np.float32) for dx in range(size_search+1): for dy in range(-size_search,size_search+1): for dz in range(-size_search,size_search+1): prog.run_kernel("dist",img.shape,None, img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz)) prog.run_kernel("convolve",img.shape,None, tmpImg,tmpImg2,np.int32(1)) prog.run_kernel("convolve",img.shape,None, tmpImg2,tmpImg,np.int32(2)) prog.run_kernel("convolve",img.shape,None, tmpImg,distImg,np.int32(4)) prog.run_kernel("computePlus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) if any([dx,dy,dz]): prog.run_kernel("computeMinus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) acc = accBuf.get() weights = weightBuf.get() return acc/weights
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None): assert_bufs_type(np.float32,data_g,hx_g,hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny,Nx = hy_g.shape[0],hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx)) prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny)) return res_g
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g=None, dev=None): assert_bufs_type(np.float32, data_g, hx_g, hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny, Nx = hy_g.shape[0], hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x", data_g.shape[::-1], None, data_g.data, hx_g.data, tmp_g.data, np.int32(Nx)) prog.run_kernel("conv_sep2_y", data_g.shape[::-1], None, tmp_g.data, hy_g.data, res_g.data, np.int32(Ny)) return res_g
def _convolve_buf(data_g, h_g , res_g = None): """ buffer variant """ assert_bufs_type(np.float32,data_g,h_g) prog = OCLProgram(abspath("kernels/convolve.cl")) if res_g is None: res_g = OCLArray.empty(data_g.shape,dtype=np.float32) Nhs = [np.int32(n) for n in h_g.shape] kernel_name = "convolve%sd_buf"%(len(data_g.shape)) prog.run_kernel(kernel_name,data_g.shape[::-1],None, data_g.data,h_g.data,res_g.data, *Nhs) return res_g
def affine(data, mat = np.identity(4), mode ="linear"): """affine transform data with matrix mat """ bop = {"linear":"","nearest":"-D USENEAREST"} if not mode in bop.keys(): raise KeyError("mode = '%s' not defined ,valid: %s"%(mode, bop.keys())) d_im = OCLImage.from_array(data) res_g = OCLArray.empty(data.shape,np.float32) mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False)) prog = OCLProgram(abspath("kernels/transformations.cl") , build_options=[bop[mode]]) prog.run_kernel("affine", data.shape[::-1],None, d_im,res_g.data,mat_g.data) return res_g.get()
def affine(data, mat = np.identity(4), interp = "linear"): """affine transform data with matrix mat """ bop = {"linear":"","nearest":"-D USENEAREST"} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys())) d_im = OCLImage.from_array(data) res_g = OCLArray.empty(data.shape,np.float32) mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False)) prog = OCLProgram(abspath("kernels/transformations.cl") , build_options=[bop[interp]]) prog.run_kernel("affine", data.shape[::-1],None, d_im,res_g.data,mat_g.data) return res_g.get()
def bilateral2(data, fSize, sigma_p, sigma_x=10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32: "bilat2_float", np.uint16: "bilat2_short"} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:" % (dtype, dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral2.cl")) prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(fSize), np.float32(sigma_x), np.float32(sigma_p)) return res.get()
def _fftshift_single(d_g, res_g, ax = 0): """ basic fftshift of an OCLArray shape(d_g) = [N_0,N_1...., N, .... N_{k-1, N_k] = [N1, N, N2] the we can address each element in the flat buffer by index = i + N2*j + N2*N*k where i = 1 .. N2 j = 1 .. N k = 1 .. N1 and the swap of elements is performed on the index j """ dtype_kernel_name = {np.float32:"fftshift_1_f", np.complex64:"fftshift_1_c" } N = d_g.shape[ax] N1 = 1 if ax==0 else np.prod(d_g.shape[:ax]) N2 = 1 if ax == len(d_g.shape)-1 else np.prod(d_g.shape[ax+1:]) dtype = d_g.dtype.type prog = OCLProgram(abspath("kernels/fftshift.cl")) prog.run_kernel(dtype_kernel_name[dtype],(N2,N/2,N1),None, d_g.data, res_g.data, np.int32(N), np.int32(N2)) return res_g
def scale(data, scale = (1.,1.,1.), interp = "linear"): """returns a interpolated, scaled version of data scale = (scale_z,scale_y,scale_x) or scale = scale_all interp = "linear" | "nearest" """ bop = {"linear":[],"nearest":["-D","USENEAREST"]} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys())) if not isinstance(scale,(tuple, list, np.ndarray)): scale = (scale,)*3 if len(scale) != 3: raise ValueError("scale = %s misformed"%scale) d_im = OCLImage.from_array(data) nshape = np.array(data.shape)*np.array(scale) nshape = tuple(nshape.astype(np.int)) res_g = OCLArray.empty(nshape,np.float32) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=bop[interp]) prog.run_kernel("scale", res_g.shape[::-1],None, d_im,res_g.data) return res_g.get()
def _convolve3_old(data,h, dev = None): """convolves 3d data with kernel h on the GPU Device dev boundary conditions are clamping to edge. h is converted to float32 if dev == None the default one is used """ if dev is None: dev = get_device() if dev is None: raise ValueError("no OpenCLDevice found...") dtype = data.dtype.type dtypes_options = {np.float32:"", np.uint16:"-D SHORTTYPE"} if not dtype in dtypes_options.keys(): raise TypeError("data type %s not supported yet, please convert to:"%dtype,dtypes_options.keys()) prog = OCLProgram(abspath("kernels/convolve3.cl"), build_options = dtypes_options[dtype]) hbuf = OCLArray.from_array(h.astype(np.float32)) img = OCLImage.from_array(data) res = OCLArray.empty(data.shape,dtype=np.float32) Ns = [np.int32(n) for n in data.shape+h.shape] prog.run_kernel("convolve3d",img.shape,None, img,hbuf.data,res.data, *Ns) return res.get()
def scale(data, scale = (1.,1.,1.), interp = "linear"): """returns a interpolated, scaled version of data scale = (scale_z,scale_y,scale_x) or scale = scale_all interp = "linear" | "nearest" """ bop = {"linear":"","nearest":"-D USENEAREST"} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys())) if not isinstance(scale,(tuple, list, np.ndarray)): scale = (scale,)*3 if len(scale) != 3: raise ValueError("scale = %s misformed"%scale) d_im = OCLImage.from_array(data) nshape = np.array(data.shape)*np.array(scale) nshape = tuple(nshape.astype(np.int)) res_g = OCLArray.empty(nshape,np.float32) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=[bop[interp]]) prog.run_kernel("scale", res_g.shape[::-1],None, d_im,res_g.data) return res_g.get()
def convolve_spatial2(im, hs, mode = "constant", plan = None, return_plan = False): """ spatial varying convolution of an 2d image with a 2d grid of psfs shape(im_ = (Ny,Nx) shape(hs) = (Gy,Gx, Hy,Hx) the input image im is subdivided into (Gy,Gz) blocks hs[j,i] is the psf at the center of each block (i,j) as of now each image dimension has to be divisble by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if im.ndim !=2 or hs.ndim !=4: raise ValueError("wrong dimensions of input!") if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:2])]): raise NotImplementedError("shape of image has to be divisible by Gx Gy = %s shape mismatch"%(str(hs.shape[:2]))) mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ny, Nx = im.shape Gy, Gx = hs.shape[:2] # the size of each block within the grid Nblock_y, Nblock_x = Ny/Gy, Nx/Gx # the size of the overlapping patches with safety padding Npatch_x, Npatch_y = _next_power_of_2(3*Nblock_x), _next_power_of_2(3*Nblock_y) #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y) print Nblock_x, Npatch_x hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3)) prog = OCLProgram(abspath("kernels/conv_spatial.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan((Npatch_y,Npatch_x)) patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) x0s = Nblock_x*np.arange(Gx) y0s = Nblock_y*np.arange(Gy) print x0s for i,_x0 in enumerate(x0s): for j,_y0 in enumerate(y0s): prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None, im_g, np.int32(_x0+Nblock_x/2-Npatch_x/2), np.int32(_y0+Nblock_y/2-Npatch_y/2), patches_g.data, np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)) # convolution fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan) fft(h_g,inplace=True, batch = Gx*Gy, plan = plan) prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None, patches_g.data, h_g.data) fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan) #return patches_g.get() #accumulate res_g = OCLArray.empty(im.shape,np.float32) for i in xrange(Gx+1): for j in xrange(Gy+1): prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None, patches_g.data,res_g.data, np.int32(i),np.int32(j), np.int32(Gx),np.int32(Gy), np.int32(Npatch_x),np.int32(Npatch_y)) res = res_g.get() if return_plan: return res, plan else: return res
def _convolve_spatial2(im, hs, mode = "constant", grid_dim = None, pad_factor = 2, plan = None, return_plan = False): """ spatial varying convolution of an 2d image with a 2d grid of psfs shape(im_ = (Ny,Nx) shape(hs) = (Gy,Gx, Hy,Hx) the input image im is subdivided into (Gy,Gx) blocks hs[j,i] is the psf at the center of each block (i,j) as of now each image dimension has to be divisible by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if grid_dim: Gs = tuple(grid_dim) else: Gs = hs.shape[:2] mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ny, Nx = im.shape Gy, Gx = Gs # the size of each block within the grid Nblock_y, Nblock_x = Ny/Gy, Nx/Gx # the size of the overlapping patches with safety padding Npatch_x, Npatch_y = _next_power_of_2(pad_factor*Nblock_x), _next_power_of_2(pad_factor*Nblock_y) prog = OCLProgram(abspath("kernels/conv_spatial2.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan((Npatch_y,Npatch_x)) x0s = Nblock_x*np.arange(Gx) y0s = Nblock_y*np.arange(Gy) patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64) #prepare psfs if grid_dim: h_g = OCLArray.zeros((Gy,Gx,Npatch_y,Npatch_x),np.complex64) tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False)) for i,_x0 in enumerate(x0s): for j,_y0 in enumerate(y0s): prog.run_kernel("fill_psf_grid2", (Nblock_x,Nblock_y),None, tmp_g.data, np.int32(Nx), np.int32(i*Nblock_x), np.int32(j*Nblock_y), h_g.data, np.int32(Npatch_x), np.int32(Npatch_y), np.int32(-Nblock_x/2+Npatch_x/2), np.int32(-Nblock_y/2+Npatch_y/2), np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y) ) else: hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3)) h_g = OCLArray.from_array(hs.astype(np.complex64)) #prepare image im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) for i,_x0 in enumerate(x0s): for j,_y0 in enumerate(y0s): prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None, im_g, np.int32(_x0+Nblock_x/2-Npatch_x/2), np.int32(_y0+Nblock_y/2-Npatch_y/2), patches_g.data, np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)) #return np.abs(patches_g.get()) # convolution fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan) fft(h_g,inplace=True, batch = Gx*Gy, plan = plan) prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None, patches_g.data, h_g.data) fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan) print Nblock_x, Npatch_x #return np.abs(patches_g.get()) #accumulate res_g = OCLArray.empty(im.shape,np.float32) for j in xrange(Gy+1): for i in xrange(Gx+1): prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None, patches_g.data,res_g.data, np.int32(i),np.int32(j), np.int32(Gx),np.int32(Gy), np.int32(Npatch_x),np.int32(Npatch_y)) res = res_g.get() if return_plan: return res, plan else: return res
def convolve_spatial3(im, hs, mode = "constant", plan = None, return_plan = False, pad_factor = 2): """ spatial varying convolution of an 3d image with a 3d grid of psfs shape(im_ = (Nz,Ny,Nx) shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx) the input image im is subdivided into (Gx,Gy,Gz) blocks hs[k,j,i] is the psf at the center of each block (i,j,k) as of now each image dimension has to be divisble by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 Nz % Gz == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if im.ndim !=3 or hs.ndim !=6: raise ValueError("wrong dimensions of input!") if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:3])]): raise NotImplementedError("shape of image has to be divisible by Gx Gy = %s !"%(str(hs.shape[:3]))) mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ns = tuple(im.shape) Gs = tuple(hs.shape[:3]) # the size of each block within the grid Nblocks = [n/g for n,g in zip(Ns,Gs)] # the size of the overlapping patches with safety padding Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks]) print hs.shape hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5)) prog = OCLProgram(abspath("kernels/conv_spatial.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan(Npatchs) patches_g = OCLArray.empty(Gs+Npatchs,np.complex64) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)] print Nblocks # this loops over all i,j,k for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel("fill_patch3",Npatchs[::-1],None, im_g, np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2), np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2), np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2), patches_g.data, np.int32(i*np.prod(Npatchs)+ j*Gs[2]*np.prod(Npatchs)+ k*Gs[2]*Gs[1]*np.prod(Npatchs))) print patches_g.shape, h_g.shape # convolution fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan) fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan) prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse = True, batch = np.prod(Gs), plan = plan) #return patches_g.get() #accumulate res_g = OCLArray.zeros(im.shape,np.float32) for k, j, i in product(*[range(g+1) for g in Gs]): prog.run_kernel("interpolate3",Nblocks[::-1],None, patches_g.data, res_g.data, np.int32(i),np.int32(j),np.int32(k), np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]), np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0])) res = res_g.get() if return_plan: return res, plan else: return res
def _convolve_spatial3(im, hs, mode = "constant", grid_dim = None, plan = None, return_plan = False, pad_factor = 2): if im.ndim !=3: raise ValueError("wrong dimensions of input!") if not (hs.ndim==6 or (hs.ndim==3 and grid_dim)): raise ValueError("wrong dimensions of psf grid!") if grid_dim: if hs.shape != im.shape: raise ValueError("if grid_dim is set, then im.shape = hs.shape !") Gs = tuple(grid_dim) else: if not hs.ndim==6: raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)") Gs = hs.shape[:3] if not np.all([n%g==0 for n,g in zip(im.shape,Gs)]): raise NotImplementedError("shape of image has to be divisible by Gx Gy = %s shape mismatch"%(str(hs.shape[:2]))) mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ns = im.shape # the size of each block within the grid Nblocks = [n/g for n,g in zip(Ns,Gs)] # the size of the overlapping patches with safety padding Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks]) prog = OCLProgram(abspath("kernels/conv_spatial3.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan(Npatchs) Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)] patches_g = OCLArray.empty(Gs+Npatchs,np.complex64) #prepare psfs if grid_dim: h_g = OCLArray.zeros(Gs+Npatchs,np.complex64) tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False)) for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel("fill_psf_grid3", Nblocks[::-1],None, tmp_g.data, np.int32(im.shape[2]), np.int32(im.shape[1]), np.int32(i*Nblocks[2]), np.int32(j*Nblocks[1]), np.int32(k*Nblocks[0]), h_g.data, np.int32(Npatchs[2]), np.int32(Npatchs[1]), np.int32(Npatchs[0]), np.int32(-Nblocks[2]/2+Npatchs[2]/2), np.int32(-Nblocks[1]/2+Npatchs[1]/2), np.int32(-Nblocks[0]/2+Npatchs[0]/2), np.int32(i*np.prod(Npatchs)+ j*Gs[2]*np.prod(Npatchs)+ k*Gs[2]*Gs[1]*np.prod(Npatchs))) else: hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5)) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) # this loops over all i,j,k for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel("fill_patch3",Npatchs[::-1],None, im_g, np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2), np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2), np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2), patches_g.data, np.int32(i*np.prod(Npatchs)+ j*Gs[2]*np.prod(Npatchs)+ k*Gs[2]*Gs[1]*np.prod(Npatchs))) # convolution fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan) fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan) prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse = True, batch = np.prod(Gs), plan = plan) #return patches_g.get() #accumulate res_g = OCLArray.zeros(im.shape,np.float32) for k, j, i in product(*[range(g+1) for g in Gs]): prog.run_kernel("interpolate3",Nblocks[::-1],None, patches_g.data, res_g.data, np.int32(i),np.int32(j),np.int32(k), np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]), np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0])) res = res_g.get() if return_plan: return res, plan else: return res