def setup(self, size, units, lam=0.5, n0=1.0, use_fresnel_approx=False): """ sets up the internal variables e.g. propagators etc... :param size: the size of the geometry in pixels (Nx,Ny,Nz) :param units: the phyiscal units of each voxel in microns (dx,dy,dz) :param lam: the wavelength of light in microns :param n0: the refractive index of the surrounding media :param use_fresnel_approx: if True, uses fresnel approximation for propagator """ Bpm3d_Base.setup(self, size, units, lam=lam, n0=n0, use_fresnel_approx=use_fresnel_approx) # setting up the gpu buffers and kernels self.program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) Nx, Ny = self.size[:2] plan = fft_plan(()) self._H_g = OCLArray.from_array(self._H.astype(np.complex64)) self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32)) self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32)) self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32") self.gfactor_g = OCLArray.zeros(Nz, "float32") self.reduce_kernel = OCLReductionKernel( np.float32, neutral="0", reduce_expr="a+b", map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)", arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain", )
def create_dn_buffer(size, units,points, dn_inner = .0, rad_inner = 0, dn_outer = .1, rad_outer = .4): Nx, Ny, Nz = size dx, dy, dz = units program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl")) dn_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32) # sort by z ps = np.array(points) ps = ps[np.argsort(ps[:,2]),:] Np = ps.shape[0] pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32)) program.run_kernel("fill_dn",(Nx,Ny,Nz),None,dn_g.data, pointsBuf.data,np.int32(Np), np.float32(dx),np.float32(dy),np.float32(dz), np.float32(dn_inner),np.float32(rad_inner), np.float32(dn_outer),np.float32(rad_outer)) return dn_g
def test_3d(): from time import time Niter = 10 data = np.zeros((128,)*3,np.float32) data[30,30,30] = 1. hx = 1./5*np.ones(5) hy = 1./13*np.ones(13) hz = 1./13*np.ones(11) t = time() for _ in range(Niter): out = convolve_sep3(data,hx,hy, hz) print "time: %.3f ms"%(1000.*(time()-t)/Niter) data_g = OCLArray.from_array(data.astype(np.float32)) hx_g = OCLArray.from_array(hx.astype(np.float32)) hy_g = OCLArray.from_array(hy.astype(np.float32)) hz_g = OCLArray.from_array(hz.astype(np.float32)) t = time() for _ in range(Niter): out_g = convolve_sep3(data_g,hx_g,hy_g, hz_g) out_g.get(); print "time: %.3f ms"%(1000.*(time()-t)/Niter) return out, out_g.get()
def _fft_convolve_numpy(data, h, plan = None, kernel_is_fft = False, kernel_is_fftshifted = False): """ convolving via opencl fft for numpy arrays data and h must have the same size """ dev = get_device() if data.shape != h.shape: raise ValueError("data and kernel must have same size! %s vs %s "%(str(data.shape),str(h.shape))) data_g = OCLArray.from_array(data.astype(np.complex64)) if not kernel_is_fftshifted: h = np.fft.fftshift(h) h_g = OCLArray.from_array(h.astype(np.complex64)) res_g = OCLArray.empty_like(data_g) _fft_convolve_gpu(data_g,h_g,res_g = res_g, plan = plan, kernel_is_fft = kernel_is_fft) res = abs(res_g.get()) del data_g del h_g del res_g return res
def _deconv_rl_np(data, h, Niter = 10, ): """ """ d_g = OCLArray.from_array(data.astype(np.float32, copy = False)) h_g = OCLArray.from_array(h.astype(np.float32, copy = False)) res_g = _deconv_rl_gpu_conv(d_g,h_g,Niter) return res_g.get()
def _deconv_rl_gpu_conv(data_g, h_g, Niter = 10): """ using convolve """ #set up some gpu buffers u_g = OCLArray.empty(data_g.shape,np.float32) u_g.copy_buffer(data_g) tmp_g = OCLArray.empty(data_g.shape,np.float32) tmp2_g = OCLArray.empty(data_g.shape,np.float32) #fix this hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy()) for i in range(Niter): convolve(u_g, h_g, res_g = tmp_g) _divide_inplace(data_g,tmp_g) # return data_g, tmp_g convolve(tmp_g, hflip_g, res_g = tmp2_g) _multiply_inplace(u_g,tmp2_g) return u_g
def _convolve_sep2_numpy(data,hx,hy): hx_g = OCLArray.from_array(hx.astype(np.float32)) hy_g = OCLArray.from_array(hy.astype(np.float32)) data_g = OCLArray.from_array(data.astype(np.float32)) return _convolve_sep2_gpu(data_g,hx_g,hy_g).get()
def gpu_kuwahara(data, N=5): """Function to convolve an imgage with the Kuwahara filter on GPU.""" # create numpy arrays if (N%2==0): raise ValueError("Data has to be a (2n+1)x(2n+1) array.") data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty((data.shape[0],data.shape[1]),float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_kuwahara.cl") # start kernel on gput prog.run_kernel("kuwahara", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data, int32(N)) # return res_g.get()
def test_bessel(n,x): x_g = OCLArray.from_array(x.astype(float32)) res_g = OCLArray.empty_like(x.astype(float32)) p = OCLProgram(absPath("kernels/bessel.cl")) p.run_kernel("bessel_fill",x_g.shape,None, x_g.data,res_g.data,int32(n)) return res_g.get()
def _convolve_np(data, h): """ numpy variant """ data_g = OCLArray.from_array(data.astype(np.float32, copy = False)) h_g = OCLArray.from_array(h.astype(np.float32, copy = False)) return _convolve_buf(data_g, h_g).get()
def nlm3(data,sigma, size_filter = 2, size_search = 3): """for noise level of sigma_0, choose sigma = 1.5*sigma_0 """ prog = OCLProgram(abspath("kernels/nlm3.cl"), build_options="-D FS=%i -D BS=%i"%(size_filter,size_search)) data = data.astype(np.float32, copy = False) img = OCLImage.from_array(data) distImg = OCLImage.empty_like(data) distImg = OCLImage.empty_like(data) tmpImg = OCLImage.empty_like(data) tmpImg2 = OCLImage.empty_like(data) accBuf = OCLArray.zeros(data.shape,np.float32) weightBuf = OCLArray.zeros(data.shape,np.float32) for dx in range(size_search+1): for dy in range(-size_search,size_search+1): for dz in range(-size_search,size_search+1): prog.run_kernel("dist",img.shape,None, img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz)) prog.run_kernel("convolve",img.shape,None, tmpImg,tmpImg2,np.int32(1)) prog.run_kernel("convolve",img.shape,None, tmpImg2,tmpImg,np.int32(2)) prog.run_kernel("convolve",img.shape,None, tmpImg,distImg,np.int32(4)) prog.run_kernel("computePlus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) if any([dx,dy,dz]): prog.run_kernel("computeMinus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(img.shape[2]), np.int32(dx),np.int32(dy),np.int32(dz), np.float32(sigma)) acc = accBuf.get() weights = weightBuf.get() return acc/weights
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False): """ gpu version of fftshift for numpy arrays or OCLArrays Parameters ---------- arr_obj: numpy array or OCLArray (float32/complex64) the array to be fftshifted axes: list or None the axes over which to shift (like np.fft.fftshift) if None, all axes are taken res_g: if given, fills it with the result (has to be same shape and dtype as arr_obj) else internally creates a new one Returns ------- if return_buffer, returns the result as (well :) OCLArray else returns the result as numpy array """ if axes is None: axes = range(arr_obj.ndim) if isinstance(arr_obj, OCLArray): if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES.keys(): raise NotImplementedError("only works for float32 or complex64") elif isinstance(arr_obj, np.ndarray): if np.iscomplexobj(arr_obj): arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False)) else: arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False)) else: raise ValueError("unknown type (%s)"%(type(arr_obj))) if not np.all([arr_obj.shape[a]%2==0 for a in axes]): raise NotImplementedError("only works on axes of even dimensions") if res_g is None: res_g = OCLArray.empty_like(arr_obj) # iterate over all axes # FIXME: this is still rather inefficient in_g = arr_obj for ax in axes: _fftshift_single(in_g, res_g, ax) in_g = res_g if return_buffer: return res_g else: return res_g.get()
def _deconv_rl_np_fft(data, h, Niter = 10, h_is_fftshifted = False): """ deconvolves data with given psf (kernel) h data and h have to be same shape via lucy richardson deconvolution """ if data.shape != h.shape: raise ValueError("data and h have to be same shape") if not h_is_fftshifted: h = np.fft.fftshift(h) hflip = h[::-1,::-1] #set up some gpu buffers y_g = OCLArray.from_array(data.astype(np.complex64)) u_g = OCLArray.from_array(data.astype(np.complex64)) tmp_g = OCLArray.empty(data.shape,np.complex64) hf_g = OCLArray.from_array(h.astype(np.complex64)) hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64)) # hflipped_g = OCLArray.from_array(h.astype(np.complex64)) plan = fft_plan(data.shape) #transform psf fft(hf_g,inplace = True) fft(hflip_f_g,inplace = True) for i in range(Niter): print i fft_convolve(u_g, hf_g, res_g = tmp_g, kernel_is_fft = True) _complex_divide_inplace(y_g,tmp_g) fft_convolve(tmp_g,hflip_f_g, inplace = True, kernel_is_fft = True) _complex_multiply_inplace(u_g,tmp_g) return np.abs(u_g.get())
def transfer(data): """transfers data""" d1_g = OCLArray.from_array(data) d2_g = OCLArray.empty_like(data) if data.dtype.type == np.float32: im = OCLImage.empty(data.shape[::1],dtype = np.float32) elif data.dtype.type == np.complex64: im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2) im.copy_buffer(d1_g) d2_g.copy_image(im) return d2_g.get()
def resample_buf(data, new_shape): """resamples d""" d1_g = OCLArray.from_array(data) d2_g = OCLArray.empty(new_shape,data.dtype) if data.dtype.type == np.float32: im = OCLImage.empty(data.shape[::1],dtype = np.float32) elif data.dtype.type == np.complex64: im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2) im.copy_buffer(d1_g) d2_g.copy_image_resampled(im) return d2_g.get()
def focus_field_cylindrical(shape,units,lam = .5,NA = .3, n0=1., n_integration_steps = 100): """computes focus field of cylindrical lerns with given NA see: Colin J. R. Sheppard, Cylindrical lenses—focusing and imaging: a review Appl. Opt. 52, 538-545 (2013) return u,ex,ey,ez with u being the intensity """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) Nx, Ny, Nz = shape dx, dy, dz = units alpha = np.arcsin(NA/n0) u_g = OCLArray.empty((Nz,Ny),np.float32) ex_g = OCLArray.empty((Nz,Ny),np.complex64) ey_g = OCLArray.empty((Nz,Ny),np.complex64) ez_g = OCLArray.empty((Nz,Ny),np.complex64) t = time.time() p.run_kernel("psf_cylindrical",u_g.shape[::-1],None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(-dy*(Ny-1)/2.),np.float32(dy*(Ny-1)/2.), np.float32(-dz*(Nz-1)/2.),np.float32(dz*(Nz-1)/2.), np.float32(lam/n0), np.float32(alpha)) u = np.array(np.repeat(u_g.get()[...,np.newaxis],Nx,axis=-1)) ex = np.array(np.repeat(ex_g.get()[...,np.newaxis],Nx,axis=-1)) ey = np.array(np.repeat(ey_g.get()[...,np.newaxis],Nx,axis=-1)) ez = np.array(np.repeat(ez_g.get()[...,np.newaxis],Nx,axis=-1)) print "time in secs:" , time.time()-t return u, ex, ey, ez
def test_parseval(): from time import time Nx = 512 Nz = 10 d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64) d_g = OCLArray.from_array(d.astype(np.complex64)) s1, s2 = [],[] t = time() for i in range(Nz): print i # myfunc(d_g) # fft(d_g, inplace=True, fast_math=False) # fft(d_g, inverse = True,inplace=True,fast_math=False) fft(d_g, inplace=True) # fft(d_g, inverse = True,inplace=True) s1.append(np.sum(np.abs(d_g.get())**2)) print time()-t for i in range(Nz): print i d = np.fft.fftn(d).astype(np.complex64) d = np.fft.ifftn(d).astype(np.complex64) s2.append(np.sum(np.abs(d)**2)) return s1, s2
def bilateral3(data, size_filter, sigma_p, sigma_x = 10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32:"bilat3_float",} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral3.cl")) print img.shape prog.run_kernel(dtypes_kernels[dtype], img.shape,None, img,res.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(size_filter),np.float32(sigma_x),np.float32(sigma_p)) return res.get()
def _fft_convolve_gpu(data_g, h_g, res_g = None, plan = None, inplace = False, kernel_is_fft = False): """ fft convolve for gpu buffer """ _complex_multiply_kernel = OCLElementwiseKernel( "cfloat_t *a, cfloat_t * b", "a[i] = cfloat_mul(b[i],a[i])","mult") dev = get_device() assert_bufs_type(np.complex64,data_g,h_g) if data_g.shape != h_g.shape: raise ValueError("data and kernel must have same size! %s vs %s "%(str(data_g.shape),str(h_g.shape))) if plan is None: plan = fft_plan(data_g.shape) if inplace: res_g = data_g else: if res_g is None: res_g = OCLArray.empty(data_g.shape,data_g.dtype) res_g.copy_buffer(data_g) if not kernel_is_fft: kern_g = OCLArray.empty(h_g.shape,h_g.dtype) kern_g.copy_buffer(h_g) fft(kern_g,inplace=True, plan = plan) else: kern_g = h_g fft(res_g,inplace=True, plan = plan) #multiply in fourier domain _complex_multiply_kernel(res_g,kern_g) fft(res_g,inplace = True, inverse = True, plan = plan) return res_g
def _ocl_fft_gpu(plan, ocl_arr,res_arr = None, inverse = False, batch = 1): assert_bufs_type(np.complex64,ocl_arr) if res_arr is None: res_arr = OCLArray.empty(ocl_arr.shape,np.complex64) plan.execute(ocl_arr.data,res_arr.data, inverse = inverse, batch = batch) return res_arr
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None): assert_bufs_type(np.float32,data_g,hx_g,hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny,Nx = hy_g.shape[0],hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx)) prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny)) return res_g
def test_2d(): import time data = np.zeros((100,)*2,np.float32) data[50,50] = 1. hx = 1./5*np.ones(5) hy = 1./13*np.ones(13) out = convolve_sep2(data,hx,hy) data_g = OCLArray.from_array(data.astype(np.float32)) hx_g = OCLArray.from_array(hx.astype(np.float32)) hy_g = OCLArray.from_array(hy.astype(np.float32)) out_g = convolve_sep2(data_g,hx_g,hy_g) return out, out_g.get()
def _deconv_rl_gpu_fft(data_g, h_g, Niter = 10): """ using fft_convolve """ if data_g.shape != h_g.shape: raise ValueError("data and h have to be same shape") #set up some gpu buffers u_g = OCLArray.empty(data_g.shape,np.complex64) u_g.copy_buffer(data_g) tmp_g = OCLArray.empty(data_g.shape,np.complex64) #fix this hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy()) plan = fft_plan(data_g.shape) #transform psf fft(h_g,inplace = True) fft(hflip_g,inplace = True) for i in range(Niter): print i fft_convolve(u_g, h_g, res_g = tmp_g, kernel_is_fft = True) _complex_divide_inplace(data_g,tmp_g) fft_convolve(tmp_g,hflip_g, inplace = True, kernel_is_fft = True) _complex_multiply_inplace(u_g,tmp_g) return u_g
def get_gpu(N = 256, niter=100, sig = 1.): np.random.seed(0) a = np.random.normal(0,sig,(N,N)).astype(np.complex64) b = (1.*a.copy()).astype(np.complex64) c_g = OCLArray.empty_like(b) b_g = OCLArray.from_array(b) p = fft_plan((N,N), fast_math = False) rels = [] for _ in range(niter): fft(b_g,res_g = c_g, plan = p) fft(c_g, res_g = b_g, inverse = True, plan = p) # b = fft(fft(b), inverse = True) # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a))) rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a))) return np.array(rels)
def _ocl_fft_numpy(plan, arr,inverse = False, batch = 1, fast_math = True): if arr.dtype != np.complex64: logger.info("converting %s to complex64, might slow things down..."%arr.dtype) ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False)) _ocl_fft_gpu_inplace(plan, ocl_arr, inverse = inverse, batch = batch) return ocl_arr.get()
def time_gpu(dshape, niter=100, fast_math=False): d_g = OCLArray.empty(dshape, np.complex64) get_device().queue.finish() plan = fft_plan(dshape, fast_math=fast_math) t = time() for _ in xrange(niter): fft(d_g, inplace=True, plan=plan) get_device().queue.finish() t = (time()-t)/niter print "GPU (fast_math = %s)\t%s\t\t%.2f ms"%(fast_math, dshape, 1000.*t)
def gpu_structure(data): """Function to convolve an imgage with a structure filter on GPU.""" # create numpy arrays data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty((data.shape[0],data.shape[1],2),float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_structure.cl") # start kernel on gput prog.run_kernel("structure", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data) return res_g.get()
def gpu_mean(data, Nx=10,Ny=10): """Function to convolve an imgage with a mean filter on GPU.""" # create numpy arrays data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty(data.shape,float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_mean.cl") # start kernel on gput prog.run_kernel("mean", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data, int32(Nx),int32(Ny)) return res_g.get()
def _ocl_fft_numpy(arr,inverse = False, plan = None): if plan is None: plan = Plan(arr.shape, queue = get_device().queue) if arr.dtype != np.complex64: logger.info("converting %s to complex64, might slow things down..."%arr.dtype) ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False)) _ocl_fft_gpu_inplace(ocl_arr, inverse = inverse, plan = plan) return ocl_arr.get()
def time_multi(N, nargs, niter =100): map_exprs=["%s*x%s[i]"%(i,i) for i in xrange(nargs)] arguments = ",".join("__global float *x%s"%i for i in xrange(nargs)) k = OCLReductionKernel2(np.float32, neutral="0", reduce_expr="a+b", map_exprs=map_exprs, arguments=arguments) ins = [OCLArray.from_array(np.ones(N,np.float32)) for _ in xrange(len(map_exprs))] outs = [OCLArray.empty(1,np.float32) for _ in xrange(len(map_exprs))] from time import time t = time() for _ in xrange(niter): k(*ins, outs = outs) get_device().queue.finish() t = (time()-t)/niter print "multi reduction: result =", [float(out.get()) for out in outs] print "multi reduction:\t\t%.2f ms"%(1000*t) return t
"interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys()))) if not mode in mode_defines: raise KeyError("mode = '%s' not defined ,valid: %s" % (mode, list(mode_defines.keys()))) # reorder matrix, such that x,y,z -> z,y,x (as the kernel is assuming that) if output_shape is None: output_shape = data.shape if isinstance(data, OCLImage): d_im = data else: d_im = OCLImage.from_array(data.astype(np.float32, copy=False)) if res_g is None: res_g = OCLArray.empty(output_shape, np.float32) mat_inv_g = OCLArray.from_array(mat.astype(np.float32, copy=False)) prog = OCLProgram(abspath("kernels/affine.cl") , build_options=interpolation_defines[interpolation] + mode_defines[mode]) prog.run_kernel("affine3", output_shape[::-1], None, d_im, res_g.data, mat_inv_g.data) if isinstance(data, OCLImage): return res_g else: return res_g.get()
def _propagate_core(self, u0=None, dn_ind_start=0, dn_ind_end=1, dn_ind_offset=0, return_comp="field", return_shape="full", free_prop=False, dn_mean_method="none", **kwargs): """ the core propagation method, the refractive index dn is assumed to be already residing in gpu memory if u0 is None, assumes that the initial field to be residing in self._buf_plane kwargs: return_comp in ["field", "intens"] return_shape in ["last", "full"] free_prop = False | True dn_mean_method = "none", "global", "local" """ print("mean method: ", dn_mean_method) free_prop = free_prop or (self.dn is None) if return_comp == "field": res_type = Bpm3d._complex_type elif return_comp == "intens": res_type = Bpm3d._real_type else: raise ValueError(return_comp) if not return_shape in ["last", "full"]: raise ValueError() Nx, Ny, _ = self.shape Nz = dn_ind_end - dn_ind_start assert dn_ind_start >= 0 # if not u0 is None: # print "huhu" # self._buf_plane.write_array(u0.astype(np.complex64,copy=False)) if return_shape == "full": u = OCLArray.empty((Nz, Ny, Nx), dtype=res_type) # copy the first plane if return_shape == "full": if self._is_subsampled: self._img_xy.copy_buffer(self._buf_plane) self._copy_down_img(self._img_xy, u, 0) else: self._copy_down_buf(self._buf_plane, u, 0) dn0 = 0 if dn_mean_method == "local" and not self.dn is None and not free_prop: self.intens_sum_g = OCLArray.from_array( np.ones(1, dtype=Bpm3d._real_type)) self.intens_dn_sum_g = OCLArray.from_array( (self.dn_mean[dn_ind_start + dn_ind_offset] * np.ones(1)).astype(dtype=Bpm3d._real_type)) #self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g) self._fill_propagator(self.n0) for i in range(Nz - 1): for j in range(self.simul_z): fft(self._buf_plane, inplace=True, plan=self._plan) self._mult_complex(self._buf_plane, self._buf_H) fft(self._buf_plane, inplace=True, inverse=True, plan=self._plan) if not free_prop: #FIXME here we make a slight error for the first time point, as we #FIXME set dn0 first and the compute the new propagator if dn_mean_method == "local": self._mult_dn_local( self._buf_plane, (i + dn_ind_start + (j + 1.) / self.simul_z), self.intens_sum_g, self.intens_dn_sum_g, self.intens_g, self.intens_dn_g) else: self._mult_dn(self._buf_plane, (i + dn_ind_start + (j + 1.) / self.simul_z), dn0) if not self.dn is None and not free_prop: if dn_mean_method == "local": self._kernel_reduction( self.intens_g, self.intens_dn_g, outs=[self.intens_sum_g, self.intens_dn_sum_g]) self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g) #print(self.intens_dn_sum_g.get(), self.n0) #print("mean dn: ",self.intens_dn_sum_g.get()/self.intens_sum_g.get()) elif dn_mean_method == "global": if self.dn_mean[i + dn_ind_start + dn_ind_offset] != dn0: dn0 = self.dn_mean[i + dn_ind_start + dn_ind_offset] self._fill_propagator(self.n0 + dn0) if return_shape == "full": if self._is_subsampled and self.simul_xy != self.shape[:2]: self._img_xy.copy_buffer(self._buf_plane) self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny)) else: self._copy_down_buf(self._buf_plane, u, (i + 1) * (Nx * Ny)) if return_shape == "full": return u.get() else: return self._buf_plane.get()
k1 = OCLReductionKernel(np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]", arguments="__global float *x") k2 = OCLMultiReductionKernel( np.float32, neutral="0", reduce_expr="a+b", map_exprs=["y[i]*x[i]", "x[i]"], arguments="__global float *x, __global float *y") N = 512 a = OCLArray.from_array(np.ones((N, N), np.float32)) b = OCLArray.from_array(2. * np.ones((N, N), np.float32)) o1 = OCLArray.empty((), np.float32) o2 = OCLArray.empty((), np.float32) from time import time t = time() for _ in range(400): k1(a) k1(b) k1(a).get() k1(b).get() print(time() - t)
# out = convolve(data,h) out = convolve(data[0,...],h[0,...]) out = convolve(data[0,0,...],h[0,0,...]) if __name__ == '__main__': # test_convolve() N = 100 ndim = 3 d = np.zeros([N+3*i for i,n in enumerate(range(ndim))],np.float32) h = np.ones((11,)*ndim,np.float32) ind = [np.random.randint(0,n,int(np.prod(d.shape)**(1./d.ndim))/10) for n in d.shape] d[tuple(ind)] = 1. h *= 1./np.sum(h) out1 = convolve(d,h) d_g = OCLArray.from_array(d) h_g = OCLArray.from_array(h) res_g = convolve(d_g,h_g) out2 = res_g.get()
def map_coordinates(data, coordinates, interpolation="linear", mode='constant'): """ Map data to new coordinates by interpolation. The array of coordinates is used to find, for each point in the output, the corresponding coordinates in the input. should correspond to scipy.ndimage.map_coordinates Parameters ---------- data coordinates output interpolation mode cval prefilter Returns ------- """ if not (isinstance(data, np.ndarray) and data.ndim in (2, 3)): raise ValueError("input data has to be a 2d or 3d array!") coordinates = np.asarray(coordinates, np.int32) if not (coordinates.shape[0] == data.ndim): raise ValueError("coordinate has to be of shape (data.ndim,m) ") interpolation_defines = { "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"], "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"] } mode_defines = { "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"], "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"], "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"] } if not interpolation in interpolation_defines: raise KeyError("interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys()))) if not mode in mode_defines: raise KeyError("mode = '%s' not defined ,valid: %s" % (mode, list(mode_defines.keys()))) if not data.dtype.type in cl_buffer_datatype_dict: raise KeyError( "dtype %s not supported yet (%s)" % (data.dtype.type, tuple(cl_buffer_datatype_dict.keys()))) dtype_defines = [ "-D", "DTYPE=%s" % cl_buffer_datatype_dict[data.dtype.type] ] d_im = OCLImage.from_array(data) coordinates_g = OCLArray.from_array( coordinates.astype(np.float32, copy=False)) res_g = OCLArray.empty(coordinates.shape[1], data.dtype) prog = OCLProgram(abspath("kernels/map_coordinates.cl"), build_options=interpolation_defines[interpolation] + mode_defines[mode] + dtype_defines) kernel = "map_coordinates{ndim}".format(ndim=data.ndim) prog.run_kernel(kernel, (coordinates.shape[-1], ), None, d_im, res_g.data, coordinates_g.data) return res_g.get()
# return data_g, tmp_g convolve(tmp_g, hflip_g, res_g=tmp2_g) _multiply_inplace(u_g, tmp2_g) return u_g if __name__ == '__main__': from scipy.misc import lena d = np.pad(lena(), ((50, ) * 2, ) * 2, mode="constant") h = np.ones((11, ) * 2) / 121. # hpad = np.pad(h,((251,250),(251,250)),mode="constant") y = convolve(d, h) y += 0.02 * np.max(d) * np.random.uniform(0, 1, d.shape) print "start" # u = deconv_rl(y,h, 1) out = [ r.get() for r in _deconv_rl_gpu_conv(OCLArray.from_array(y.astype( np.float32)), OCLArray.from_array(h.astype(np.float32)), 1) ]
def _max_filter_numpy(data, size=5): data_g = OCLArray.from_array(data.astype(np.float32)) return _max_filter_gpu(data_g, size=size).get()
def focus_field_cylindrical(shape=(128, 128, 128), units=(0.1, 0.1, 0.1), lam=.5, NA=.3, n0=1., return_all_fields=False, n_integration_steps=100): """calculates the focus field for a perfect, aberration free cylindrical lens after x polarized illumination via the vectorial debye diffraction integral (see [2]_). The pupil function is given by the numerical aperture NA Parameters ---------- shape: Nx,Ny,Nz the shape of the geometry units: dx,dy,dz the pixel sizes in microns lam: float the wavelength of light used in microns NA: float the numerical aperture of the lens n0: float the refractive index of the medium return_all_fields: boolean if True, returns u,ex,ey,ez where ex/ey/ez are the complex field components n_integration_steps: int number of integration steps to perform return_all_fields: boolean if True returns also the complex vectorial field components Returns ------- u: ndarray the intensity of the focus field (u,ex,ey,ez): list(ndarray) the intensity of the focus field and the complex field components (if return_all_fields is True) Example ------- >>> u, ex, ey, ez = focus_field_cylindrical((128,128,128), (0.1,0.1,.1), lam=.5, NA = .4, return_all_field=True) References ---------- .. [2] Colin J. R. Sheppard: Cylindrical lenses—focusing and imaging: a review, Appl. Opt. 52, 538-545 (2013) """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"), build_options=[ "-I", absPath("kernels"), "-D", "INT_STEPS=%s" % n_integration_steps ]) Nx, Ny, Nz = shape dx, dy, dz = units alpha = np.arcsin(NA / n0) u_g = OCLArray.empty((Nz, Ny), np.float32) ex_g = OCLArray.empty((Nz, Ny), np.complex64) ey_g = OCLArray.empty((Nz, Ny), np.complex64) ez_g = OCLArray.empty((Nz, Ny), np.complex64) t = time.time() p.run_kernel("psf_cylindrical", u_g.shape[::-1], None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(-dy * (Ny // 2)), np.float32((Ny - 1 - Ny // 2) * dy), np.float32(-dz * (Nz // 2)), np.float32((Nz - 1 - Nz // 2) * dz), np.float32(lam / n0), np.float32(alpha)) u = np.array(np.repeat(u_g.get()[..., np.newaxis], Nx, axis=-1)) ex = np.array(np.repeat(ex_g.get()[..., np.newaxis], Nx, axis=-1)) ey = np.array(np.repeat(ey_g.get()[..., np.newaxis], Nx, axis=-1)) ez = np.array(np.repeat(ez_g.get()[..., np.newaxis], Nx, axis=-1)) print("time in secs:", time.time() - t) if return_all_fields: return u, ex, ey, ez else: return u
def geometric_transform(data, mapping="c0,c1", output_shape=None, mode='constant', interpolation="linear"): """ Apply an arbitrary geometric transform. The given mapping function is used to find, for each point in the output, the corresponding coordinates in the input. The value of the input at those coordinates is determined by spline interpolation of the requested order. Parameters ---------- %(input)s mapping : {callable, scipy.LowLevelCallable} A callable object that accepts a tuple of length equal to the output array rank, and returns the corresponding input coordinates as a tuple of length equal to the input array rank. """ if not (isinstance(data, np.ndarray) and data.ndim in (2, 3)): raise ValueError("input data has to be a 2d or 3d array!") interpolation_defines = { "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"], "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"] } mode_defines = { "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"], "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"], "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"] } if not interpolation in interpolation_defines: raise KeyError("interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys()))) if not mode in mode_defines: raise KeyError("mode = '%s' not defined ,valid: %s" % (mode, list(mode_defines.keys()))) if not data.dtype.type in cl_buffer_datatype_dict: raise KeyError( "dtype %s not supported yet (%s)" % (data.dtype.type, tuple(cl_buffer_datatype_dict.keys()))) dtype_defines = [ "-D", "DTYPE={type}".format(type=cl_buffer_datatype_dict[data.dtype.type]) ] image_functions = { np.float32: "read_imagef", np.uint8: "read_imageui", np.uint16: "read_imageui", np.int32: "read_imagei" } image_read_defines = [ "-D", "READ_IMAGE=%s" % image_functions[data.dtype.type] ] with open(abspath("kernels/geometric_transform.cl"), "r") as f: tpl = Template(f.read()) output_shape = tuple(output_shape) mappings = {"FUNC2": "c1,c0", "FUNC3": "c2,c1,c0"} mappings["FUNC%d" % data.ndim] = ",".join(reversed(mapping.split(","))) rendered = tpl.render(**mappings) d_im = OCLImage.from_array(data) res_g = OCLArray.empty(output_shape, data.dtype) prog = OCLProgram(src_str=rendered, build_options=interpolation_defines[interpolation] + mode_defines[mode] + dtype_defines + image_read_defines) kernel = "geometric_transform{ndim}".format(ndim=data.ndim) prog.run_kernel(kernel, output_shape[::-1], None, d_im, res_g.data) return res_g.get()
_ocl_fft_gpu_inplace(plan, ocl_arr, inverse=inverse) return ocl_arr.get() def _ocl_fft_gpu_inplace(plan, ocl_arr, inverse=False): assert_bufs_type(np.complex64, ocl_arr) plan(ocl_arr, ocl_arr, inverse=inverse) def _ocl_fft_gpu(plan, ocl_arr, res_arr=None, inverse=False): assert_bufs_type(np.complex64, ocl_arr) if res_arr is None: res_arr = OCLArray.empty_like(ocl_arr) plan(ocl_arr, res_arr, inverse=inverse) return res_arr if __name__ == '__main__': d = np.random.uniform(0, 1, (64, ) * 2).astype(np.complex64) b = OCLArray.from_array(d) plan = fft_plan(d.shape) d2 = fft(d, plan=plan) fft(b, inplace=True, plan=plan)
def _convolve_spatial2(im, hs, mode="constant", grid_dim=None, pad_factor=2, plan=None, return_plan=False): """ spatial varying convolution of an 2d image with a 2d grid of psfs shape(im_ = (Ny,Nx) shape(hs) = (Gy,Gx, Hy,Hx) the input image im is subdivided into (Gy,Gx) blocks hs[j,i] is the psf at the center of each block (i,j) as of now each image dimension has to be divisible by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if grid_dim: Gs = tuple(grid_dim) else: Gs = hs.shape[:2] mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"} Ny, Nx = im.shape Gy, Gx = Gs # the size of each block within the grid Nblock_y, Nblock_x = Ny // Gy, Nx // Gx # the size of the overlapping patches with safety padding Npatch_x, Npatch_y = _next_power_of_2( pad_factor * Nblock_x), _next_power_of_2(pad_factor * Nblock_y) prog = OCLProgram(abspath("kernels/conv_spatial2.cl"), build_options=["-D", "ADDRESSMODE=%s" % mode_str[mode]]) if plan is None: plan = fft_plan((Gy, Gx, Npatch_y, Npatch_x), axes=(-2, -1)) x0s = Nblock_x * np.arange(Gx) y0s = Nblock_y * np.arange(Gy) patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64) #prepare psfs if grid_dim: h_g = OCLArray.zeros((Gy, Gx, Npatch_y, Npatch_x), np.complex64) tmp_g = OCLArray.from_array(hs.astype(np.float32, copy=False)) for i, _x0 in enumerate(x0s): for j, _y0 in enumerate(y0s): prog.run_kernel( "fill_psf_grid2", (Nblock_x, Nblock_y), None, tmp_g.data, np.int32(Nx), np.int32(i * Nblock_x), np.int32(j * Nblock_y), h_g.data, np.int32(Npatch_x), np.int32(Npatch_y), np.int32(-Nblock_x // 2 + Npatch_x // 2), np.int32(-Nblock_y // 2 + Npatch_y // 2), np.int32(i * Npatch_x * Npatch_y + j * Gx * Npatch_x * Npatch_y)) else: hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)), axes=(2, 3)) h_g = OCLArray.from_array(hs.astype(np.complex64)) #prepare image im_g = OCLImage.from_array(im.astype(np.float32, copy=False)) for i, _x0 in enumerate(x0s): for j, _y0 in enumerate(y0s): prog.run_kernel( "fill_patch2", (Npatch_x, Npatch_y), None, im_g, np.int32(_x0 + Nblock_x // 2 - Npatch_x // 2), np.int32(_y0 + Nblock_y // 2 - Npatch_y // 2), patches_g.data, np.int32(i * Npatch_x * Npatch_y + j * Gx * Npatch_x * Npatch_y)) #return np.abs(patches_g.get()) # convolution fft(patches_g, inplace=True, plan=plan) fft(h_g, inplace=True, plan=plan) prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse=True, plan=plan) logger.debug("Nblock_x: {}, Npatch_x: {}".format(Nblock_x, Npatch_x)) #return np.abs(patches_g.get()) #accumulate res_g = OCLArray.empty(im.shape, np.float32) for j in range(Gy + 1): for i in range(Gx + 1): prog.run_kernel("interpolate2", (Nblock_x, Nblock_y), None, patches_g.data, res_g.data, np.int32(i), np.int32(j), np.int32(Gx), np.int32(Gy), np.int32(Npatch_x), np.int32(Npatch_y)) res = res_g.get() if return_plan: return res, plan else: return res
# dtype = d_g.dtype.type # # if not isinstance(d_g, OCLArray): # raise ValueError("only works on OCLArrays") # # if not dtype in dtype_kernel_name.keys(): # raise NotImplementedError("only works for float32 or complex64") # # if not np.all([n%2==0 for n in d_g.shape]): # raise NotImplementedError("only works on even length arryas") # # prog = OCLProgram(abspath("kernels/fftshift.cl")) # prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None, # d_g.data, d_g.data, # np.int32(Nx), np.int32(Ny)) # return d_g if __name__ == '__main__': Nx, Ny, Nz = (256,)*3 d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32) d[Nz//2-30:Nz//2+30,Ny//2-20:Ny//2+20,Nx//2-20:Nx//2+20] = 2. d_g = OCLArray.from_array(d) out_g = OCLArray.empty_like(d) out = fftshift(d, axes= (0,1,2))
def scale(data, scale=(1., 1., 1.), interpolation="linear"): """ returns a interpolated, scaled version of data the output shape is scaled too. Parameters ---------- data: ndarray 3d input array scale: float, tuple scaling factor along each axis (x,y,z) interpolation: str either "nearest" or "linear" Returns ------- scaled output """ if not (isinstance(data, np.ndarray) and data.ndim == 3): raise ValueError("input data has to be a 3d array!") interpolation_defines = { "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"], "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"] } if not interpolation in interpolation_defines: raise KeyError("interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys()))) options_types = { np.uint8: ["-D", "TYPENAME=uchar", "-D", "READ_IMAGE=read_imageui"], np.uint16: ["-D", "TYPENAME=short", "-D", "READ_IMAGE=read_imageui"], np.float32: ["-D", "TYPENAME=float", "-D", "READ_IMAGE=read_imagef"], } dtype = data.dtype.type if not dtype in options_types: raise ValueError("type %s not supported! Available: %s" % (dtype, str(list(options_types.keys())))) if not isinstance(scale, (tuple, list, np.ndarray)): scale = (scale, ) * 3 if len(scale) != 3: raise ValueError("scale = %s misformed" % scale) d_im = OCLImage.from_array(data) nshape = _scale_shape(data.shape, scale) res_g = OCLArray.empty(nshape, dtype) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=interpolation_defines[interpolation] + options_types[dtype]) prog.run_kernel("scale", res_g.shape[::-1], None, d_im, res_g.data) return res_g.get()
def _propagate(self, u0=None, offset=0, return_comp="field", return_shape="full", free_prop=False, slow_mean=False, **kwargs): """ kwargs: return_comp in ["field", "intens"] return_shape in ["last", "full"] free_prop = False | True """ free_prop = free_prop or (self.dn is None) if return_comp == "field": res_type = Bpm3d._complex_type elif return_comp == "intens": res_type = Bpm3d._real_type else: raise ValueError(return_comp) if not return_shape in ["last", "full"]: raise ValueError() if u0 is None: u0 = self.u0_plane() u0 = u0.astype(np.complex64, copy=False) Nx, Ny, Nz = self.shape assert offset >= 0 and offset < (Nz - 1) if return_shape == "full": u = OCLArray.empty((Nz - offset, Ny, Nx), dtype=res_type) self._buf_plane.write_array(u0) # copy the first plane if return_shape == "full": if self._is_subsampled: self._img_xy.copy_buffer(self._buf_plane) self._copy_down_img(self._img_xy, u, 0) else: self._copy_down_buf(self._buf_plane, u, 0) dn0 = 0 for i in range(Nz - 1 - offset): if not self.dn is None and not free_prop: if slow_mean: if return_shape == "full": raise NotImplementedError() else: tmp = OCLArray.empty((1, Ny, Nx), dtype=res_type) if self._is_subsampled: self._img_xy.copy_buffer(self._buf_plane) self._copy_down_img(self._img_xy, tmp, 0) else: self._copy_down_buf(self._buf_plane, tmp, 0) dn0 = np.sum( np.abs(self.dn[i]) * tmp.get()) / np.sum(np.abs(self.dn[i]) + 1.e-10) self._fill_propagator(self.n0 + dn0) else: if self.dn_mean[i + offset] != dn0: dn0 = self.dn_mean[i + offset] self._fill_propagator(self.n0 + dn0) for j in range(self.simul_z): fft(self._buf_plane, inplace=True, plan=self._plan) self._mult_complex(self._buf_plane, self._buf_H) fft(self._buf_plane, inplace=True, inverse=True, plan=self._plan) if not free_prop: self._mult_dn(self._buf_plane, (i + offset + (j + 1.) / self.simul_z), dn0) if return_shape == "full": if self._is_subsampled and self.simul_xy != self.shape[:2]: self._img_xy.copy_buffer(self._buf_plane) self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny)) else: self._copy_down_buf(self._buf_plane, u, (i + 1) * (Nx * Ny)) if return_shape == "full": return u.get() else: return self._buf_plane.get()
def _gaussian_np(data, sigma, normalize=True, truncate = 4.0): d_g = OCLArray.from_array(data.astype(np.float32, copy=False)) return _gaussian_buf(d_g, sigma, truncate = truncate, normalize=normalize).get()
def buffer_create_write(data): buf = OCLArray.empty(data.shape,data.dtype) buf.write_array(data) assert np.allclose(data,buf.get())
def convolve_spatial3(im, hs, mode="constant", plan=None, return_plan=False, pad_factor=2): """ spatial varying convolution of an 3d image with a 3d grid of psfs shape(im_ = (Nz,Ny,Nx) shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx) the input image im is subdivided into (Gx,Gy,Gz) blocks hs[k,j,i] is the psf at the center of each block (i,j,k) as of now each image dimension has to be divisble by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 Nz % Gz == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if im.ndim != 3 or hs.ndim != 6: raise ValueError("wrong dimensions of input!") if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:3])]): raise NotImplementedError( "shape of image has to be divisible by Gx Gy = %s !" % (str(hs.shape[:3]))) mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"} Ns = tuple(im.shape) Gs = tuple(hs.shape[:3]) # the size of each block within the grid Nblocks = [n / g for n, g in zip(Ns, Gs)] # the size of the overlapping patches with safety padding Npatchs = tuple([next_power_of_2(pad_factor * nb) for nb in Nblocks]) print(hs.shape) hs = np.fft.fftshift(pad_to_shape(hs, Gs + Npatchs), axes=(3, 4, 5)) prog = OCLProgram(abspath("kernels/conv_spatial.cl"), build_options=["-D", "ADDRESSMODE=%s" % mode_str[mode]]) if plan is None: plan = fft_plan(Npatchs) patches_g = OCLArray.empty(Gs + Npatchs, np.complex64) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32, copy=False)) Xs = [nb * np.arange(g) for nb, g in zip(Nblocks, Gs)] print(Nblocks) # this loops over all i,j,k for (k, _z0), (j, _y0), (i, _x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel( "fill_patch3", Npatchs[::-1], None, im_g, np.int32(_x0 + Nblocks[2] / 2 - Npatchs[2] / 2), np.int32(_y0 + Nblocks[1] / 2 - Npatchs[1] / 2), np.int32(_z0 + Nblocks[0] / 2 - Npatchs[0] / 2), patches_g.data, np.int32(i * np.prod(Npatchs) + j * Gs[2] * np.prod(Npatchs) + k * Gs[2] * Gs[1] * np.prod(Npatchs))) print(patches_g.shape, h_g.shape) # convolution fft(patches_g, inplace=True, batch=np.prod(Gs), plan=plan) fft(h_g, inplace=True, batch=np.prod(Gs), plan=plan) prog.run_kernel("mult_inplace", (np.prod(Npatchs) * np.prod(Gs), ), None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse=True, batch=np.prod(Gs), plan=plan) #return patches_g.get() #accumulate res_g = OCLArray.zeros(im.shape, np.float32) for k, j, i in product(*[list(range(g + 1)) for g in Gs]): prog.run_kernel("interpolate3", Nblocks[::-1], None, patches_g.data, res_g.data, np.int32(i), np.int32(j), np.int32(k), np.int32(Gs[2]), np.int32(Gs[1]), np.int32(Gs[0]), np.int32(Npatchs[2]), np.int32(Npatchs[1]), np.int32(Npatchs[0])) res = res_g.get() if return_plan: return res, plan else: return res
else: return results else: stage_inf = self.stage_2_inf #args = tuple(results)+stage1_args args = (results[0], ) + stage1_args if __name__ == '__main__': from gputools import OCLArray from gputools import OCLReductionKernel k1 = OCLReductionKernel(np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]", arguments="__global float *x") k2 = TestKernel(np.float32, neutral="0", reduce_expr="a+b", map_exprs=["x[i]"], arguments="__global float *x") a = OCLArray.from_array(np.ones((256, 256), np.float32)) a = OCLArray.from_array(np.ones(256, np.float32)) #res = OCLArray.empty(256,np.float32) print(k1(a)) print(k2(a))
def convolve_spatial2(im, hs, mode="constant", plan=None, return_plan=False): """ spatial varying convolution of an 2d image with a 2d grid of psfs shape(im_ = (Ny,Nx) shape(hs) = (Gy,Gx, Hy,Hx) the input image im is subdivided into (Gy,Gz) blocks hs[j,i] is the psf at the center of each block (i,j) as of now each image dimension has to be divisble by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if im.ndim != 2 or hs.ndim != 4: raise ValueError("wrong dimensions of input!") if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:2])]): raise NotImplementedError( "shape of image has to be divisible by Gx Gy = %s shape mismatch" % (str(hs.shape[:2]))) mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"} Ny, Nx = im.shape Gy, Gx = hs.shape[:2] # the size of each block within the grid Nblock_y, Nblock_x = Ny / Gy, Nx / Gx # the size of the overlapping patches with safety padding Npatch_x, Npatch_y = next_power_of_2(3 * Nblock_x), next_power_of_2( 3 * Nblock_y) #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y) print(Nblock_x, Npatch_x) hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)), axes=(2, 3)) prog = OCLProgram(abspath("kernels/conv_spatial.cl"), build_options=["-D", "ADDRESSMODE=%s" % mode_str[mode]]) if plan is None: plan = fft_plan((Npatch_y, Npatch_x)) patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32, copy=False)) x0s = Nblock_x * np.arange(Gx) y0s = Nblock_y * np.arange(Gy) print(x0s) for i, _x0 in enumerate(x0s): for j, _y0 in enumerate(y0s): prog.run_kernel( "fill_patch2", (Npatch_x, Npatch_y), None, im_g, np.int32(_x0 + Nblock_x / 2 - Npatch_x / 2), np.int32(_y0 + Nblock_y / 2 - Npatch_y / 2), patches_g.data, np.int32(i * Npatch_x * Npatch_y + j * Gx * Npatch_x * Npatch_y)) # convolution fft(patches_g, inplace=True, batch=Gx * Gy, plan=plan) fft(h_g, inplace=True, batch=Gx * Gy, plan=plan) prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse=True, batch=Gx * Gy, plan=plan) #return patches_g.get() #accumulate res_g = OCLArray.empty(im.shape, np.float32) for i in range(Gx + 1): for j in range(Gy + 1): prog.run_kernel("interpolate2", (Nblock_x, Nblock_y), None, patches_g.data, res_g.data, np.int32(i), np.int32(j), np.int32(Gx), np.int32(Gy), np.int32(Npatch_x), np.int32(Npatch_y)) res = res_g.get() if return_plan: return res, plan else: return res
def affine(data, mat=np.identity(4), mode="constant", interpolation="linear"): """ affine transform data with matrix mat, which is the inverse coordinate transform matrix (similar to ndimage.affine_transform) Parameters ---------- data, ndarray 3d array to be transformed mat, ndarray 3x3 or 4x4 inverse coordinate transform matrix mode: string boundary mode, one of the following: 'constant' pads with zeros 'edge' pads with edge values 'wrap' pads with the repeated version of the input interpolation, string interpolation mode, one of the following 'linear' 'nearest' Returns ------- res: ndarray transformed array (same shape as input) """ warnings.warn( "gputools.transform.affine: API change as of gputools>= 0.2.8: the inverse of the matrix is now used as in scipy.ndimage.affine_transform" ) if not (isinstance(data, np.ndarray) and data.ndim == 3): raise ValueError("input data has to be a 3d array!") interpolation_defines = { "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"], "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"] } mode_defines = { "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"], "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"], "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"] } if not interpolation in interpolation_defines: raise KeyError("interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys()))) if not mode in mode_defines: raise KeyError("mode = '%s' not defined ,valid: %s" % (mode, list(mode_defines.keys()))) # reorder matrix, such that x,y,z -> z,y,x (as the kernel is assuming that) d_im = OCLImage.from_array(data.astype(np.float32, copy=False)) res_g = OCLArray.empty(data.shape, np.float32) mat_inv_g = OCLArray.from_array(mat.astype(np.float32, copy=False)) prog = OCLProgram(abspath("kernels/affine.cl"), build_options=interpolation_defines[interpolation] + mode_defines[mode]) prog.run_kernel("affine3", data.shape[::-1], None, d_im, res_g.data, mat_inv_g.data) return res_g.get()
def focus_field_cylindrical_plane(shape=(128, 128), units=(.1, .1), z=0., lam=.5, NA=.3, n0=1., ex_g=None, n_integration_steps=200): """calculates the complex 2d input field at position -z of a \ for a perfect, aberration free cylindrical lens after x polarized illumination via the vectorial debye diffraction integral. Parameters ---------- shape: Nx,Ny the 2d shape of the geometry units: dx,dy the pixel sizes in microns z: float defocus position in microns, such that the beam would focus at z e.g. an input field with z = 10. would hav its focus spot after 10 microns lam: float the wavelength of light used in microns NA: float the numerical aperture of the lens n0: float the refractive index of the medium n_integration_steps: int number of integration steps to perform Returns ------- ex: ndarray the complex field Example ------- >>> # the input pattern of a bessel beam that will focus after 4 microns >>> ex = focus_field_cylindrical_plane((256,256), (0.1,0.1), z = 4., lam=.5, NA = .4) See Also -------- biobeam.focus_field_cylindrical : the 3d function """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"), build_options=[ "-I", absPath("kernels"), "-D", "INT_STEPS=%s" % n_integration_steps ]) Nx, Ny = shape dx, dy = units alpha = np.arcsin(NA / n0) if ex_g is None: use_buffer = False ex_g = OCLArray.empty((Ny, Nx), np.complex64) else: use_buffer = True assert ex_g.shape[::-1] == shape p.run_kernel("psf_cylindrical_plane", (Nx, Ny), None, ex_g.data, np.float32(-dy * (Ny // 2)), np.float32((Ny - 1 - Ny // 2) * dy), np.float32(-z), np.float32(lam / n0), np.float32(alpha)) if not use_buffer: return ex_g.get()
def buffer_from_array(data): buf = OCLArray.from_array(data) assert np.allclose(data,buf.get())