Ejemplo n.º 1
0
ctx = autoinit.context

if __name__ == '__main__':

    api = cluda.cuda_api()
    thr = api.Thread.create()

    size = (256, 256, 256)
    units = (.1, ) * 3
    lam = .5
    u0 = None
    n0 = 1.
    dn = np.zeros(size[::-1], np.complex64)

    clock = StopWatch()

    clock.tic("setup")

    Nx, Ny, Nz = size
    dx, dy, dz = units

    dn[Nz / 2:, ...] = 0.1

    #setting up the propagator
    k0 = 2. * np.pi / lam

    kxs = 2. * np.pi * np.fft.fftfreq(Nx, dx)
    kys = 2. * np.pi * np.fft.fftfreq(Ny, dy)

    KY, KX = np.meshgrid(kxs, kys, indexing="ij")
Ejemplo n.º 2
0
def bpm_3d_inverse(u, units, lam=.5, use_fresnel_approx=False):
    """
    size     -    the dimension of the image to be calulcated  in pixels (Nx,Ny,Nz)
    units    -    the unit lengths of each dimensions in microns
    lam      -    the wavelength
    u       -    the complex field distribution

    returns 
    dn       -    the refractive index of the medium (can be complex)

    """
    clock = StopWatch()

    clock.tic("setup")
    Nz, Ny, Nx = u.shape
    dx, dy, dz = units

    #setting up the propagator
    k0 = 2. * np.pi / lam

    kxs = np.arange(-Nx / 2., Nx / 2.) / Nx
    kys = np.arange(-Ny / 2., Ny / 2.) / Ny

    KY, KX = np.meshgrid(kxs, kys, indexing="ij")

    H0 = np.sqrt(0.j + (1. / lam)**2 - KX**2 / dx**2 - KY**2 / dy**2)

    if use_fresnel_approx:
        H0 = 1. / lam * (0.j + 1. - .5 * lam**2 *
                         (KX**2 / dx**2 + KY**2 / dy**2))

    outsideInds = np.isnan(H0)
    H = np.exp(2.j * np.pi * dz * H0)
    H[outsideInds] = 0.
    H0[outsideInds] = 0.

    H = np.fft.fftshift(H).astype(np.complex64)
    """
    setting up the gpu buffers and kernels
    """

    program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))
    # program = OCLProgram(src_str = kernel_str)

    kernel_divide = OCLElementwiseKernel(
        "cfloat_t *a_g, cfloat_t *b_g,float kz, cfloat_t *res_g",
        "res_g[i] = (cfloat_t)(i,0.)", "divide")

    plan = ocl_fft_plan((Ny, Nx))
    plane_g = OCLArray.empty((Ny, Nx), np.complex64)
    plane0_g = OCLArray.empty((Ny, Nx), np.complex64)

    h_g = OCLArray.from_array(H.astype(np.complex64))
    u_g = OCLArray.from_array(u.astype(np.complex64))

    dn_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.complex64)

    clock.toc("setup")
    clock.tic("run")

    for i in range(Nz - 1):
        program.run_kernel("copy_complex", (Nx * Ny, ), None, u_g.data,
                           plane_g.data, np.int32(i * Nx * Ny))

        #calculate the propagated plane
        ocl_fft(plane_g, inplace=True, plan=plan)

        program.run_kernel("mult", (Nx * Ny, ), None, plane_g.data, h_g.data)

        ocl_fft(plane_g, inplace=True, inverse=True, plan=plan)

        dn_g[i + 1, ...] = plane_g

        # program.run_kernel("copy_complex",(Nx*Ny,),None,
        #                    u_g.data,plane0_g.data,np.int32((i+1)*Nx*Ny))

        # program.run_kernel("divide_dn_complex",(Nx*Ny,),None,
        #                    plane0_g.data,plane_g.data,dn_g.data,
        #                    np.float32(k0*dz),
        #                    np.int32((i+1)*Nx*Ny))

    clock.toc("run")

    print clock
    return dn_g.get()