ctx = autoinit.context if __name__ == '__main__': api = cluda.cuda_api() thr = api.Thread.create() size = (256, 256, 256) units = (.1, ) * 3 lam = .5 u0 = None n0 = 1. dn = np.zeros(size[::-1], np.complex64) clock = StopWatch() clock.tic("setup") Nx, Ny, Nz = size dx, dy, dz = units dn[Nz / 2:, ...] = 0.1 #setting up the propagator k0 = 2. * np.pi / lam kxs = 2. * np.pi * np.fft.fftfreq(Nx, dx) kys = 2. * np.pi * np.fft.fftfreq(Ny, dy) KY, KX = np.meshgrid(kxs, kys, indexing="ij")
def bpm_3d_inverse(u, units, lam=.5, use_fresnel_approx=False): """ size - the dimension of the image to be calulcated in pixels (Nx,Ny,Nz) units - the unit lengths of each dimensions in microns lam - the wavelength u - the complex field distribution returns dn - the refractive index of the medium (can be complex) """ clock = StopWatch() clock.tic("setup") Nz, Ny, Nx = u.shape dx, dy, dz = units #setting up the propagator k0 = 2. * np.pi / lam kxs = np.arange(-Nx / 2., Nx / 2.) / Nx kys = np.arange(-Ny / 2., Ny / 2.) / Ny KY, KX = np.meshgrid(kxs, kys, indexing="ij") H0 = np.sqrt(0.j + (1. / lam)**2 - KX**2 / dx**2 - KY**2 / dy**2) if use_fresnel_approx: H0 = 1. / lam * (0.j + 1. - .5 * lam**2 * (KX**2 / dx**2 + KY**2 / dy**2)) outsideInds = np.isnan(H0) H = np.exp(2.j * np.pi * dz * H0) H[outsideInds] = 0. H0[outsideInds] = 0. H = np.fft.fftshift(H).astype(np.complex64) """ setting up the gpu buffers and kernels """ program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) # program = OCLProgram(src_str = kernel_str) kernel_divide = OCLElementwiseKernel( "cfloat_t *a_g, cfloat_t *b_g,float kz, cfloat_t *res_g", "res_g[i] = (cfloat_t)(i,0.)", "divide") plan = ocl_fft_plan((Ny, Nx)) plane_g = OCLArray.empty((Ny, Nx), np.complex64) plane0_g = OCLArray.empty((Ny, Nx), np.complex64) h_g = OCLArray.from_array(H.astype(np.complex64)) u_g = OCLArray.from_array(u.astype(np.complex64)) dn_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.complex64) clock.toc("setup") clock.tic("run") for i in range(Nz - 1): program.run_kernel("copy_complex", (Nx * Ny, ), None, u_g.data, plane_g.data, np.int32(i * Nx * Ny)) #calculate the propagated plane ocl_fft(plane_g, inplace=True, plan=plan) program.run_kernel("mult", (Nx * Ny, ), None, plane_g.data, h_g.data) ocl_fft(plane_g, inplace=True, inverse=True, plan=plan) dn_g[i + 1, ...] = plane_g # program.run_kernel("copy_complex",(Nx*Ny,),None, # u_g.data,plane0_g.data,np.int32((i+1)*Nx*Ny)) # program.run_kernel("divide_dn_complex",(Nx*Ny,),None, # plane0_g.data,plane_g.data,dn_g.data, # np.float32(k0*dz), # np.int32((i+1)*Nx*Ny)) clock.toc("run") print clock return dn_g.get()