Ejemplo n.º 1
0
    def setup(self, size, units, lam=0.5, n0=1.0, use_fresnel_approx=False):
        """
            sets up the internal variables e.g. propagators etc...

            :param size:  the size of the geometry in pixels (Nx,Ny,Nz)
            :param units: the phyiscal units of each voxel in microns (dx,dy,dz)
            :param lam: the wavelength of light in microns
            :param n0:  the refractive index of the surrounding media
            :param use_fresnel_approx:  if True, uses fresnel approximation for propagator


        """
        Bpm3d_Base.setup(self, size, units, lam=lam, n0=n0, use_fresnel_approx=use_fresnel_approx)

        # setting up the gpu buffers and kernels
        self.program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny = self.size[:2]
        plan = fft_plan(())
        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32")
        self.gfactor_g = OCLArray.zeros(Nz, "float32")

        self.reduce_kernel = OCLReductionKernel(
            np.float32,
            neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain",
        )
Ejemplo n.º 2
0
def create_dn_buffer(size, units,points,
                     dn_inner = .0, rad_inner = 0,
                     dn_outer = .1, rad_outer = .4):

    Nx, Ny, Nz = size
    dx, dy, dz = units

    program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl"))


    dn_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32)

    # sort by z
    ps = np.array(points)
    ps = ps[np.argsort(ps[:,2]),:]

    Np = ps.shape[0]

    pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32))

    program.run_kernel("fill_dn",(Nx,Ny,Nz),None,dn_g.data,
                       pointsBuf.data,np.int32(Np),
                       np.float32(dx),np.float32(dy),np.float32(dz),
                       np.float32(dn_inner),np.float32(rad_inner),
                       np.float32(dn_outer),np.float32(rad_outer))


    return dn_g
Ejemplo n.º 3
0
def test_3d():
    from time import time
    Niter = 10
    
    data = np.zeros((128,)*3,np.float32)

    data[30,30,30] = 1.
    hx = 1./5*np.ones(5)
    hy = 1./13*np.ones(13)
    hz = 1./13*np.ones(11)

    t = time()
    for _ in range(Niter):
        out = convolve_sep3(data,hx,hy, hz)
    print "time: %.3f ms"%(1000.*(time()-t)/Niter)

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))
    hz_g = OCLArray.from_array(hz.astype(np.float32))

    t = time()
    for _ in range(Niter):
        out_g = convolve_sep3(data_g,hx_g,hy_g, hz_g)

    out_g.get();
    print "time: %.3f ms"%(1000.*(time()-t)/Niter)

        
    return  out, out_g.get()
Ejemplo n.º 4
0
def _fft_convolve_numpy(data, h, plan = None,
                        kernel_is_fft = False,
                        kernel_is_fftshifted = False):
    """ convolving via opencl fft for numpy arrays

    data and h must have the same size
    """

    dev = get_device()

    if data.shape != h.shape:
        raise ValueError("data and kernel must have same size! %s vs %s "%(str(data.shape),str(h.shape)))

    
    data_g = OCLArray.from_array(data.astype(np.complex64))

    if not kernel_is_fftshifted:
        h = np.fft.fftshift(h)

    
    h_g = OCLArray.from_array(h.astype(np.complex64))
    res_g = OCLArray.empty_like(data_g)
    
    _fft_convolve_gpu(data_g,h_g,res_g = res_g,
                      plan = plan,
                      kernel_is_fft = kernel_is_fft)

    res =  abs(res_g.get())

    del data_g
    del h_g
    del res_g
    
    return res
Ejemplo n.º 5
0
def _deconv_rl_np(data, h, Niter = 10, ):
    """
    """
    d_g = OCLArray.from_array(data.astype(np.float32, copy = False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy = False))
    res_g = _deconv_rl_gpu_conv(d_g,h_g,Niter)
    return res_g.get()
Ejemplo n.º 6
0
def _deconv_rl_gpu_conv(data_g, h_g, Niter = 10):
    """ 
    using convolve

    """
        
    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape,np.float32)

    u_g.copy_buffer(data_g)
    
    tmp_g = OCLArray.empty(data_g.shape,np.float32)
    tmp2_g = OCLArray.empty(data_g.shape,np.float32)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy())

    for i in range(Niter):
        convolve(u_g, h_g,
                 res_g = tmp_g)


        _divide_inplace(data_g,tmp_g)

        # return data_g, tmp_g
        
        convolve(tmp_g, hflip_g,
                 res_g = tmp2_g)
        _multiply_inplace(u_g,tmp2_g)

    return u_g
Ejemplo n.º 7
0
def _convolve_sep2_numpy(data,hx,hy):
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    data_g = OCLArray.from_array(data.astype(np.float32))

    return _convolve_sep2_gpu(data_g,hx_g,hy_g).get()
Ejemplo n.º 8
0
def gpu_kuwahara(data, N=5):
    """Function to convolve an imgage with the Kuwahara filter on GPU."""
    # create numpy arrays


    if (N%2==0):       
        raise ValueError("Data has to be a (2n+1)x(2n+1) array.")

    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty((data.shape[0],data.shape[1]),float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_kuwahara.cl")
    
    # start kernel on gput
    prog.run_kernel("kuwahara",   # the name of the kernel in the cl file
                   data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data,
                    int32(N)) 
                    
    
#                    
    
    return res_g.get()
Ejemplo n.º 9
0
def test_bessel(n,x):
    x_g = OCLArray.from_array(x.astype(float32))
    res_g = OCLArray.empty_like(x.astype(float32))
    
    p = OCLProgram(absPath("kernels/bessel.cl"))
    p.run_kernel("bessel_fill",x_g.shape,None,
                 x_g.data,res_g.data,int32(n))

    return res_g.get()
Ejemplo n.º 10
0
def _convolve_np(data, h):
    """
    numpy variant
    """

    
    data_g = OCLArray.from_array(data.astype(np.float32, copy = False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy = False))
    
    return _convolve_buf(data_g, h_g).get()  
Ejemplo n.º 11
0
def nlm3(data,sigma, size_filter = 2, size_search = 3):
    """for noise level of sigma_0, choose sigma = 1.5*sigma_0
    """

    prog = OCLProgram(abspath("kernels/nlm3.cl"),
                      build_options="-D FS=%i -D BS=%i"%(size_filter,size_search))


    data = data.astype(np.float32, copy = False)
    img = OCLImage.from_array(data)

    distImg = OCLImage.empty_like(data)

    distImg = OCLImage.empty_like(data)
    tmpImg = OCLImage.empty_like(data)
    tmpImg2 = OCLImage.empty_like(data)

    accBuf = OCLArray.zeros(data.shape,np.float32)    
    weightBuf = OCLArray.zeros(data.shape,np.float32)

    for dx in range(size_search+1):
        for dy in range(-size_search,size_search+1):
            for dz in range(-size_search,size_search+1):
                prog.run_kernel("dist",img.shape,None,
                                img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz))
                
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,tmpImg2,np.int32(1))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg2,tmpImg,np.int32(2))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,distImg,np.int32(4))

                prog.run_kernel("computePlus",img.shape,None,
                                img,distImg,accBuf.data,weightBuf.data,
                                np.int32(img.shape[0]),
                                np.int32(img.shape[1]),
                                np.int32(img.shape[2]),
                                np.int32(dx),np.int32(dy),np.int32(dz),
                                np.float32(sigma))

                if any([dx,dy,dz]):
                    prog.run_kernel("computeMinus",img.shape,None,
                                    img,distImg,accBuf.data,weightBuf.data,
                                    np.int32(img.shape[0]),
                                    np.int32(img.shape[1]),
                                    np.int32(img.shape[2]),
                                    np.int32(dx),np.int32(dy),np.int32(dz),
                                    np.float32(sigma))

    acc  = accBuf.get()
    weights  = weightBuf.get()

    return acc/weights
Ejemplo n.º 12
0
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False):
    """
    gpu version of fftshift for numpy arrays or OCLArrays

    Parameters
    ----------
    arr_obj: numpy array or OCLArray (float32/complex64)
        the array to be fftshifted
    axes: list or None
        the axes over which to shift (like np.fft.fftshift)
        if None, all axes are taken
    res_g:
        if given, fills it with the result (has to be same shape and dtype as arr_obj)
        else internally creates a new one
    Returns
    -------
        if return_buffer, returns the result as (well :) OCLArray
        else returns the result as numpy array

    """

    if axes is None:
        axes = range(arr_obj.ndim)


    if isinstance(arr_obj, OCLArray):
        if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES.keys():
            raise NotImplementedError("only works for float32 or complex64")
    elif isinstance(arr_obj, np.ndarray):
        if np.iscomplexobj(arr_obj):
            arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False))
        else:
            arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False))
    else:
        raise ValueError("unknown type (%s)"%(type(arr_obj)))

    if not np.all([arr_obj.shape[a]%2==0 for a in axes]):
        raise NotImplementedError("only works on axes of even dimensions")

    if res_g is None:
        res_g = OCLArray.empty_like(arr_obj)


    # iterate over all axes
    # FIXME: this is still rather inefficient
    in_g = arr_obj
    for ax in axes:
        _fftshift_single(in_g, res_g, ax)
        in_g = res_g

    if return_buffer:
        return res_g
    else:
        return res_g.get()
Ejemplo n.º 13
0
def _deconv_rl_np_fft(data, h, Niter = 10, 
                h_is_fftshifted = False):
    """ deconvolves data with given psf (kernel) h

    data and h have to be same shape

    
    via lucy richardson deconvolution
    """

    if data.shape != h.shape:
        raise ValueError("data and h have to be same shape")

    if not h_is_fftshifted:
        h = np.fft.fftshift(h)


    hflip = h[::-1,::-1]
        
    #set up some gpu buffers
    y_g = OCLArray.from_array(data.astype(np.complex64))
    u_g = OCLArray.from_array(data.astype(np.complex64))
    
    tmp_g = OCLArray.empty(data.shape,np.complex64)

    hf_g = OCLArray.from_array(h.astype(np.complex64))
    hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64))

    # hflipped_g = OCLArray.from_array(h.astype(np.complex64))
    
    plan = fft_plan(data.shape)

    #transform psf
    fft(hf_g,inplace = True)
    fft(hflip_f_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, hf_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)

        _complex_divide_inplace(y_g,tmp_g)

        fft_convolve(tmp_g,hflip_f_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)
        

    return np.abs(u_g.get())
Ejemplo n.º 14
0
def transfer(data):
    """transfers data"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty_like(data)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image(im)

    return d2_g.get()
Ejemplo n.º 15
0
def resample_buf(data, new_shape):
    """resamples d"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty(new_shape,data.dtype)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image_resampled(im)

    return d2_g.get()
Ejemplo n.º 16
0
def focus_field_cylindrical(shape,units,lam = .5,NA = .3, n0=1.,
                            n_integration_steps = 100):
    """computes focus field of cylindrical lerns with given NA

    see:
    Colin J. R. Sheppard,
    Cylindrical lenses—focusing and imaging: a review

    Appl. Opt. 52, 538-545 (2013)

    return u,ex,ey,ez   with u being the intensity
    """

    p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps)))

    
    Nx, Ny, Nz = shape
    dx, dy, dz = units

    alpha = np.arcsin(NA/n0)
    
    u_g = OCLArray.empty((Nz,Ny),np.float32)
    ex_g = OCLArray.empty((Nz,Ny),np.complex64)
    ey_g = OCLArray.empty((Nz,Ny),np.complex64)
    ez_g = OCLArray.empty((Nz,Ny),np.complex64)

    t = time.time()
    
    p.run_kernel("psf_cylindrical",u_g.shape[::-1],None,
                 ex_g.data,
                 ey_g.data,
                 ez_g.data,
                 u_g.data,
                 np.float32(-dy*(Ny-1)/2.),np.float32(dy*(Ny-1)/2.),
                 np.float32(-dz*(Nz-1)/2.),np.float32(dz*(Nz-1)/2.),
                 np.float32(lam/n0),
                 np.float32(alpha))

    u = np.array(np.repeat(u_g.get()[...,np.newaxis],Nx,axis=-1))
    ex = np.array(np.repeat(ex_g.get()[...,np.newaxis],Nx,axis=-1))
    ey = np.array(np.repeat(ey_g.get()[...,np.newaxis],Nx,axis=-1))
    ez = np.array(np.repeat(ez_g.get()[...,np.newaxis],Nx,axis=-1))

    
    print "time in secs:" , time.time()-t
    

    return u, ex, ey, ez
Ejemplo n.º 17
0
def test_parseval():

    from time import time
    Nx = 512
    Nz  = 10
    d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [],[]
    t = time()
    for i in range(Nz):
        print i
        # myfunc(d_g)

        # fft(d_g, inplace=True, fast_math=False)
        # fft(d_g, inverse = True,inplace=True,fast_math=False)

        fft(d_g, inplace=True)
        # fft(d_g, inverse = True,inplace=True)

    s1.append(np.sum(np.abs(d_g.get())**2))

    print time()-t

    for i in range(Nz):
        print i
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2
Ejemplo n.º 18
0
def bilateral3(data, size_filter, sigma_p, sigma_x = 10.):
    """bilateral filter """
    
    dtype = data.dtype.type
    dtypes_kernels = {np.float32:"bilat3_float",}

    if not dtype in dtypes_kernels.keys():
        logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys()))
        data = data.astype(np.float32)
        dtype = data.dtype.type


    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    
    prog = OCLProgram(abspath("kernels/bilateral3.cl"))

    print img.shape

    prog.run_kernel(dtypes_kernels[dtype],
                    img.shape,None,
                    img,res.data,
                    np.int32(img.shape[0]),np.int32(img.shape[1]),
                    np.int32(size_filter),np.float32(sigma_x),np.float32(sigma_p))


    return res.get()
Ejemplo n.º 19
0
def _fft_convolve_gpu(data_g, h_g, res_g = None,
                      plan = None, inplace = False,
                      kernel_is_fft = False):
    """ fft convolve for gpu buffer
    """

    _complex_multiply_kernel = OCLElementwiseKernel(
        "cfloat_t *a, cfloat_t * b",
        "a[i] = cfloat_mul(b[i],a[i])","mult")


    dev = get_device()

    assert_bufs_type(np.complex64,data_g,h_g)

    if data_g.shape != h_g.shape:
        raise ValueError("data and kernel must have same size! %s vs %s "%(str(data_g.shape),str(h_g.shape)))


    if plan is None:
        plan = fft_plan(data_g.shape)

    if inplace:
        res_g = data_g
    else:
        if res_g is None:
            res_g = OCLArray.empty(data_g.shape,data_g.dtype)
            
        res_g.copy_buffer(data_g)
        
    if not kernel_is_fft:
        kern_g = OCLArray.empty(h_g.shape,h_g.dtype)
        kern_g.copy_buffer(h_g)
        fft(kern_g,inplace=True, plan = plan)
    else:
        kern_g = h_g


    fft(res_g,inplace=True, plan = plan)


    #multiply in fourier domain
    _complex_multiply_kernel(res_g,kern_g)

    fft(res_g,inplace = True, inverse = True, plan = plan)

    return res_g
Ejemplo n.º 20
0
def _ocl_fft_gpu(plan, ocl_arr,res_arr = None, inverse = False, batch = 1):

    assert_bufs_type(np.complex64,ocl_arr)
    if res_arr is None:
        res_arr = OCLArray.empty(ocl_arr.shape,np.complex64)
    plan.execute(ocl_arr.data,res_arr.data, inverse = inverse, batch = batch)

    return res_arr
Ejemplo n.º 21
0
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None):

    assert_bufs_type(np.float32,data_g,hx_g,hy_g)

    prog = OCLProgram(abspath("kernels/convolve_sep.cl"))

    Ny,Nx = hy_g.shape[0],hx_g.shape[0]

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)
    
    prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx))
    prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny))

    return res_g
Ejemplo n.º 22
0
def test_2d():
    import time
    
    data = np.zeros((100,)*2,np.float32)

    data[50,50] = 1.
    hx = 1./5*np.ones(5)
    hy = 1./13*np.ones(13)

    out = convolve_sep2(data,hx,hy)

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    out_g = convolve_sep2(data_g,hx_g,hy_g)
        
    return  out, out_g.get()
Ejemplo n.º 23
0
def _deconv_rl_gpu_fft(data_g, h_g, Niter = 10):
    """ 
    using fft_convolve

    """


    if data_g.shape != h_g.shape:
        raise ValueError("data and h have to be same shape")

        
    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape,np.complex64)

    u_g.copy_buffer(data_g)
    
    tmp_g = OCLArray.empty(data_g.shape,np.complex64)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy())

    plan = fft_plan(data_g.shape)

    #transform psf
    fft(h_g,inplace = True)
    fft(hflip_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, h_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)


        _complex_divide_inplace(data_g,tmp_g)

        
        fft_convolve(tmp_g,hflip_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)

    return u_g
Ejemplo n.º 24
0
def get_gpu(N = 256, niter=100, sig = 1.):
    np.random.seed(0)
    a = np.random.normal(0,sig,(N,N)).astype(np.complex64)
    b = (1.*a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N,N), fast_math = False)
    
    rels = []
    for _ in range(niter):
        fft(b_g,res_g = c_g, plan = p)
        fft(c_g, res_g = b_g, inverse = True, plan = p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a)))

    return np.array(rels)
Ejemplo n.º 25
0
def _ocl_fft_numpy(plan, arr,inverse = False, batch = 1, fast_math = True):

    if arr.dtype != np.complex64:
       logger.info("converting %s to complex64, might slow things down..."%arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False))
    
    _ocl_fft_gpu_inplace(plan, ocl_arr, inverse = inverse, batch = batch)
    
    return ocl_arr.get()
Ejemplo n.º 26
0
def time_gpu(dshape, niter=100, fast_math=False):
    d_g = OCLArray.empty(dshape, np.complex64)
    get_device().queue.finish()
    plan = fft_plan(dshape, fast_math=fast_math)
    t = time()
    for _ in xrange(niter):
        fft(d_g, inplace=True, plan=plan)
    get_device().queue.finish()
    t = (time()-t)/niter
    print "GPU (fast_math = %s)\t%s\t\t%.2f ms"%(fast_math, dshape, 1000.*t)
Ejemplo n.º 27
0
def gpu_structure(data):
    """Function to convolve an imgage with a structure filter on GPU."""
    # create numpy arrays
    
    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty((data.shape[0],data.shape[1],2),float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_structure.cl")
    
    # start kernel on gput
    prog.run_kernel("structure",   # the name of the kernel in the cl file
                    data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data) 
                    
                    
    return res_g.get()
Ejemplo n.º 28
0
def gpu_mean(data, Nx=10,Ny=10):
    """Function to convolve an imgage with a mean filter on GPU."""
    # create numpy arrays
    
    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty(data.shape,float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_mean.cl")
    
    # start kernel on gput
    prog.run_kernel("mean",   # the name of the kernel in the cl file
                    data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data,
                    int32(Nx),int32(Ny)) 
                    
                    
    return res_g.get()
Ejemplo n.º 29
0
def _ocl_fft_numpy(arr,inverse = False, plan = None):
    if plan is None:
        plan = Plan(arr.shape, queue = get_device().queue)

    if arr.dtype != np.complex64:
       logger.info("converting %s to complex64, might slow things down..."%arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False))
    
    _ocl_fft_gpu_inplace(ocl_arr, inverse = inverse, plan  = plan)
    
    return ocl_arr.get()
Ejemplo n.º 30
0
    def time_multi(N, nargs, niter =100):
        map_exprs=["%s*x%s[i]"%(i,i) for i in xrange(nargs)]
        arguments = ",".join("__global float *x%s"%i for i in xrange(nargs))

        k = OCLReductionKernel2(np.float32,
                            neutral="0", reduce_expr="a+b",
                            map_exprs=map_exprs,
                            arguments=arguments)

        ins = [OCLArray.from_array(np.ones(N,np.float32)) for _ in xrange(len(map_exprs))]
        outs = [OCLArray.empty(1,np.float32) for _ in xrange(len(map_exprs))]

        from time import time
        t = time()
        for _ in xrange(niter):
            k(*ins, outs = outs)
        get_device().queue.finish()
        t = (time()-t)/niter
        print "multi reduction: result =", [float(out.get()) for out in outs]
        print "multi reduction:\t\t%.2f ms"%(1000*t)
        return t
Ejemplo n.º 31
0
            "interpolation = '%s' not defined ,valid: %s" % (interpolation, list(interpolation_defines.keys())))

    if not mode in mode_defines:
        raise KeyError("mode = '%s' not defined ,valid: %s" % (mode, list(mode_defines.keys())))

    # reorder matrix, such that x,y,z -> z,y,x (as the kernel is assuming that)

   if output_shape is None:
        output_shape = data.shape

    if isinstance(data, OCLImage):
        d_im = data
    else:
        d_im = OCLImage.from_array(data.astype(np.float32, copy=False))
    if res_g is None:
        res_g = OCLArray.empty(output_shape, np.float32)
        
    mat_inv_g = OCLArray.from_array(mat.astype(np.float32, copy=False))

    prog = OCLProgram(abspath("kernels/affine.cl")
                      , build_options=interpolation_defines[interpolation] +
                                      mode_defines[mode])

    prog.run_kernel("affine3",
                    output_shape[::-1], None,
                    d_im, res_g.data, mat_inv_g.data)

    if isinstance(data, OCLImage):
        return res_g
    else:
        return res_g.get()
Ejemplo n.º 32
0
    def _propagate_core(self,
                        u0=None,
                        dn_ind_start=0,
                        dn_ind_end=1,
                        dn_ind_offset=0,
                        return_comp="field",
                        return_shape="full",
                        free_prop=False,
                        dn_mean_method="none",
                        **kwargs):
        """
        the core propagation method, the refractive index dn is
        assumed to be already residing in gpu memory
        if u0 is None, assumes that the initial field
        to be residing in self._buf_plane

        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
            dn_mean_method = "none", "global", "local"
        """

        print("mean method: ", dn_mean_method)

        free_prop = free_prop or (self.dn is None)

        if return_comp == "field":
            res_type = Bpm3d._complex_type
        elif return_comp == "intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        Nx, Ny, _ = self.shape
        Nz = dn_ind_end - dn_ind_start

        assert dn_ind_start >= 0

        # if not u0 is None:
        #     print "huhu"
        #     self._buf_plane.write_array(u0.astype(np.complex64,copy=False))

        if return_shape == "full":
            u = OCLArray.empty((Nz, Ny, Nx), dtype=res_type)

        # copy the first plane
        if return_shape == "full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        if dn_mean_method == "local" and not self.dn is None and not free_prop:
            self.intens_sum_g = OCLArray.from_array(
                np.ones(1, dtype=Bpm3d._real_type))
            self.intens_dn_sum_g = OCLArray.from_array(
                (self.dn_mean[dn_ind_start + dn_ind_offset] *
                 np.ones(1)).astype(dtype=Bpm3d._real_type))
            #self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g)

        self._fill_propagator(self.n0)

        for i in range(Nz - 1):

            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)

                self._mult_complex(self._buf_plane, self._buf_H)

                fft(self._buf_plane,
                    inplace=True,
                    inverse=True,
                    plan=self._plan)

                if not free_prop:
                    #FIXME here we make  a slight error for the first time point, as we
                    #FIXME set dn0 first and the compute the new propagator
                    if dn_mean_method == "local":
                        self._mult_dn_local(
                            self._buf_plane,
                            (i + dn_ind_start + (j + 1.) / self.simul_z),
                            self.intens_sum_g, self.intens_dn_sum_g,
                            self.intens_g, self.intens_dn_g)

                    else:
                        self._mult_dn(self._buf_plane,
                                      (i + dn_ind_start +
                                       (j + 1.) / self.simul_z), dn0)

            if not self.dn is None and not free_prop:
                if dn_mean_method == "local":
                    self._kernel_reduction(
                        self.intens_g,
                        self.intens_dn_g,
                        outs=[self.intens_sum_g, self.intens_dn_sum_g])

                    self._fill_propagator_buf(self.n0, self.intens_dn_sum_g,
                                              self.intens_sum_g)

                    #print(self.intens_dn_sum_g.get(), self.n0)
                    #print("mean dn: ",self.intens_dn_sum_g.get()/self.intens_sum_g.get())

                elif dn_mean_method == "global":
                    if self.dn_mean[i + dn_ind_start + dn_ind_offset] != dn0:
                        dn0 = self.dn_mean[i + dn_ind_start + dn_ind_offset]
                        self._fill_propagator(self.n0 + dn0)

            if return_shape == "full":
                if self._is_subsampled and self.simul_xy != self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u,
                                        (i + 1) * (Nx * Ny))

        if return_shape == "full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 33
0
    k1 = OCLReductionKernel(np.float32,
                            neutral="0",
                            reduce_expr="a+b",
                            map_expr="x[i]",
                            arguments="__global float *x")

    k2 = OCLMultiReductionKernel(
        np.float32,
        neutral="0",
        reduce_expr="a+b",
        map_exprs=["y[i]*x[i]", "x[i]"],
        arguments="__global float *x, __global float *y")

    N = 512
    a = OCLArray.from_array(np.ones((N, N), np.float32))
    b = OCLArray.from_array(2. * np.ones((N, N), np.float32))

    o1 = OCLArray.empty((), np.float32)
    o2 = OCLArray.empty((), np.float32)

    from time import time
    t = time()
    for _ in range(400):
        k1(a)
        k1(b)

    k1(a).get()
    k1(b).get()
    print(time() - t)
Ejemplo n.º 34
0
    # out = convolve(data,h)
    out = convolve(data[0,...],h[0,...])
    out = convolve(data[0,0,...],h[0,0,...])
    


if __name__ == '__main__':

    # test_convolve()

    N = 100
    ndim = 3

    d = np.zeros([N+3*i for i,n in enumerate(range(ndim))],np.float32)
    h = np.ones((11,)*ndim,np.float32)
    
    ind = [np.random.randint(0,n,int(np.prod(d.shape)**(1./d.ndim))/10) for n in d.shape]
    d[tuple(ind)] = 1.
    h *= 1./np.sum(h)


    out1 = convolve(d,h)

    d_g = OCLArray.from_array(d)
    h_g = OCLArray.from_array(h)

    res_g = convolve(d_g,h_g)

    out2 = res_g.get()
Ejemplo n.º 35
0
def map_coordinates(data,
                    coordinates,
                    interpolation="linear",
                    mode='constant'):
    """
    Map data to new coordinates by interpolation.
    The array of coordinates is used to find, for each point in the output,
    the corresponding coordinates in the input.

    should correspond to scipy.ndimage.map_coordinates
    
    Parameters
    ----------
    data
    coordinates
    output
    interpolation
    mode
    cval
    prefilter

    Returns
    -------
    """
    if not (isinstance(data, np.ndarray) and data.ndim in (2, 3)):
        raise ValueError("input data has to be a 2d or 3d array!")

    coordinates = np.asarray(coordinates, np.int32)
    if not (coordinates.shape[0] == data.ndim):
        raise ValueError("coordinate has to be of shape (data.ndim,m) ")

    interpolation_defines = {
        "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"],
        "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"]
    }

    mode_defines = {
        "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"],
        "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"],
        "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"]
    }

    if not interpolation in interpolation_defines:
        raise KeyError("interpolation = '%s' not defined ,valid: %s" %
                       (interpolation, list(interpolation_defines.keys())))

    if not mode in mode_defines:
        raise KeyError("mode = '%s' not defined ,valid: %s" %
                       (mode, list(mode_defines.keys())))

    if not data.dtype.type in cl_buffer_datatype_dict:
        raise KeyError(
            "dtype %s not supported yet (%s)" %
            (data.dtype.type, tuple(cl_buffer_datatype_dict.keys())))

    dtype_defines = [
        "-D", "DTYPE=%s" % cl_buffer_datatype_dict[data.dtype.type]
    ]

    d_im = OCLImage.from_array(data)
    coordinates_g = OCLArray.from_array(
        coordinates.astype(np.float32, copy=False))
    res_g = OCLArray.empty(coordinates.shape[1], data.dtype)

    prog = OCLProgram(abspath("kernels/map_coordinates.cl"),
                      build_options=interpolation_defines[interpolation] +
                      mode_defines[mode] + dtype_defines)

    kernel = "map_coordinates{ndim}".format(ndim=data.ndim)

    prog.run_kernel(kernel, (coordinates.shape[-1], ), None, d_im, res_g.data,
                    coordinates_g.data)

    return res_g.get()
Ejemplo n.º 36
0
        # return data_g, tmp_g

        convolve(tmp_g, hflip_g, res_g=tmp2_g)
        _multiply_inplace(u_g, tmp2_g)

    return u_g


if __name__ == '__main__':

    from scipy.misc import lena

    d = np.pad(lena(), ((50, ) * 2, ) * 2, mode="constant")

    h = np.ones((11, ) * 2) / 121.
    # hpad = np.pad(h,((251,250),(251,250)),mode="constant")

    y = convolve(d, h)

    y += 0.02 * np.max(d) * np.random.uniform(0, 1, d.shape)

    print "start"

    # u = deconv_rl(y,h, 1)

    out = [
        r.get()
        for r in _deconv_rl_gpu_conv(OCLArray.from_array(y.astype(
            np.float32)), OCLArray.from_array(h.astype(np.float32)), 1)
    ]
Ejemplo n.º 37
0
def _max_filter_numpy(data, size=5):
    data_g = OCLArray.from_array(data.astype(np.float32))
    return _max_filter_gpu(data_g, size=size).get()
Ejemplo n.º 38
0
def focus_field_cylindrical(shape=(128, 128, 128),
                            units=(0.1, 0.1, 0.1),
                            lam=.5,
                            NA=.3,
                            n0=1.,
                            return_all_fields=False,
                            n_integration_steps=100):
    """calculates the focus field for a perfect, aberration free cylindrical lens after
    x polarized illumination via the vectorial debye diffraction integral (see [2]_).
    The pupil function is given by the numerical aperture NA



    Parameters
    ----------

    shape: Nx,Ny,Nz
        the shape of the geometry
    units: dx,dy,dz
        the pixel sizes in microns
    lam: float
        the wavelength of light used in microns
    NA: float
        the numerical aperture of the lens
    n0: float
        the refractive index of the medium
    return_all_fields: boolean
        if True, returns u,ex,ey,ez where ex/ey/ez are the complex field components
    n_integration_steps: int
        number of integration steps to perform
    return_all_fields: boolean
        if True returns also the complex vectorial field components

    Returns
    -------
    u: ndarray
        the intensity of the focus field
    (u,ex,ey,ez): list(ndarray)
        the intensity of the focus field and the complex field components (if return_all_fields is True)



    Example
    -------

    >>> u, ex, ey, ez = focus_field_cylindrical((128,128,128), (0.1,0.1,.1), lam=.5, NA = .4, return_all_field=True)

    References
    ----------

    .. [2] Colin J. R. Sheppard: Cylindrical lenses—focusing and imaging: a review, Appl. Opt. 52, 538-545 (2013)


    """

    p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),
                   build_options=[
                       "-I",
                       absPath("kernels"), "-D",
                       "INT_STEPS=%s" % n_integration_steps
                   ])

    Nx, Ny, Nz = shape
    dx, dy, dz = units

    alpha = np.arcsin(NA / n0)

    u_g = OCLArray.empty((Nz, Ny), np.float32)
    ex_g = OCLArray.empty((Nz, Ny), np.complex64)
    ey_g = OCLArray.empty((Nz, Ny), np.complex64)
    ez_g = OCLArray.empty((Nz, Ny), np.complex64)

    t = time.time()

    p.run_kernel("psf_cylindrical", u_g.shape[::-1], None, ex_g.data,
                 ey_g.data, ez_g.data, u_g.data, np.float32(-dy * (Ny // 2)),
                 np.float32((Ny - 1 - Ny // 2) * dy),
                 np.float32(-dz * (Nz // 2)),
                 np.float32((Nz - 1 - Nz // 2) * dz), np.float32(lam / n0),
                 np.float32(alpha))

    u = np.array(np.repeat(u_g.get()[..., np.newaxis], Nx, axis=-1))
    ex = np.array(np.repeat(ex_g.get()[..., np.newaxis], Nx, axis=-1))
    ey = np.array(np.repeat(ey_g.get()[..., np.newaxis], Nx, axis=-1))
    ez = np.array(np.repeat(ez_g.get()[..., np.newaxis], Nx, axis=-1))

    print("time in secs:", time.time() - t)

    if return_all_fields:
        return u, ex, ey, ez
    else:
        return u
Ejemplo n.º 39
0
def geometric_transform(data,
                        mapping="c0,c1",
                        output_shape=None,
                        mode='constant',
                        interpolation="linear"):
    """
    Apply an arbitrary geometric transform.
    The given mapping function is used to find, for each point in the
    output, the corresponding coordinates in the input. The value of the
    input at those coordinates is determined by spline interpolation of
    the requested order.
    Parameters
    ----------
    %(input)s
    mapping : {callable, scipy.LowLevelCallable}
        A callable object that accepts a tuple of length equal to the output
        array rank, and returns the corresponding input coordinates as a tuple
        of length equal to the input array rank.
    """

    if not (isinstance(data, np.ndarray) and data.ndim in (2, 3)):
        raise ValueError("input data has to be a 2d or 3d array!")

    interpolation_defines = {
        "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"],
        "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"]
    }

    mode_defines = {
        "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"],
        "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"],
        "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"]
    }

    if not interpolation in interpolation_defines:
        raise KeyError("interpolation = '%s' not defined ,valid: %s" %
                       (interpolation, list(interpolation_defines.keys())))

    if not mode in mode_defines:
        raise KeyError("mode = '%s' not defined ,valid: %s" %
                       (mode, list(mode_defines.keys())))

    if not data.dtype.type in cl_buffer_datatype_dict:
        raise KeyError(
            "dtype %s not supported yet (%s)" %
            (data.dtype.type, tuple(cl_buffer_datatype_dict.keys())))

    dtype_defines = [
        "-D",
        "DTYPE={type}".format(type=cl_buffer_datatype_dict[data.dtype.type])
    ]

    image_functions = {
        np.float32: "read_imagef",
        np.uint8: "read_imageui",
        np.uint16: "read_imageui",
        np.int32: "read_imagei"
    }

    image_read_defines = [
        "-D", "READ_IMAGE=%s" % image_functions[data.dtype.type]
    ]

    with open(abspath("kernels/geometric_transform.cl"), "r") as f:
        tpl = Template(f.read())

    output_shape = tuple(output_shape)

    mappings = {"FUNC2": "c1,c0", "FUNC3": "c2,c1,c0"}

    mappings["FUNC%d" % data.ndim] = ",".join(reversed(mapping.split(",")))

    rendered = tpl.render(**mappings)

    d_im = OCLImage.from_array(data)
    res_g = OCLArray.empty(output_shape, data.dtype)

    prog = OCLProgram(src_str=rendered,
                      build_options=interpolation_defines[interpolation] +
                      mode_defines[mode] + dtype_defines + image_read_defines)

    kernel = "geometric_transform{ndim}".format(ndim=data.ndim)

    prog.run_kernel(kernel, output_shape[::-1], None, d_im, res_g.data)

    return res_g.get()
Ejemplo n.º 40
0
    _ocl_fft_gpu_inplace(plan, ocl_arr, inverse=inverse)

    return ocl_arr.get()


def _ocl_fft_gpu_inplace(plan, ocl_arr, inverse=False):
    assert_bufs_type(np.complex64, ocl_arr)
    plan(ocl_arr, ocl_arr, inverse=inverse)


def _ocl_fft_gpu(plan, ocl_arr, res_arr=None, inverse=False):
    assert_bufs_type(np.complex64, ocl_arr)

    if res_arr is None:
        res_arr = OCLArray.empty_like(ocl_arr)
    plan(ocl_arr, res_arr, inverse=inverse)

    return res_arr


if __name__ == '__main__':
    d = np.random.uniform(0, 1, (64, ) * 2).astype(np.complex64)

    b = OCLArray.from_array(d)

    plan = fft_plan(d.shape)

    d2 = fft(d, plan=plan)

    fft(b, inplace=True, plan=plan)
Ejemplo n.º 41
0
def _convolve_spatial2(im,
                       hs,
                       mode="constant",
                       grid_dim=None,
                       pad_factor=2,
                       plan=None,
                       return_plan=False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gx) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisible by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0


    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if grid_dim:
        Gs = tuple(grid_dim)
    else:
        Gs = hs.shape[:2]

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = Gs

    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny // Gy, Nx // Gx

    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(
        pad_factor * Nblock_x), _next_power_of_2(pad_factor * Nblock_y)

    prog = OCLProgram(abspath("kernels/conv_spatial2.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan((Gy, Gx, Npatch_y, Npatch_x), axes=(-2, -1))

    x0s = Nblock_x * np.arange(Gx)
    y0s = Nblock_y * np.arange(Gy)

    patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros((Gy, Gx, Npatch_y, Npatch_x), np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy=False))
        for i, _x0 in enumerate(x0s):
            for j, _y0 in enumerate(y0s):
                prog.run_kernel(
                    "fill_psf_grid2", (Nblock_x, Nblock_y), None, tmp_g.data,
                    np.int32(Nx),
                    np.int32(i * Nblock_x), np.int32(j * Nblock_y), h_g.data,
                    np.int32(Npatch_x), np.int32(Npatch_y),
                    np.int32(-Nblock_x // 2 + Npatch_x // 2),
                    np.int32(-Nblock_y // 2 + Npatch_y // 2),
                    np.int32(i * Npatch_x * Npatch_y +
                             j * Gx * Npatch_x * Npatch_y))
    else:
        hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)),
                             axes=(2, 3))
        h_g = OCLArray.from_array(hs.astype(np.complex64))

    #prepare image
    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    for i, _x0 in enumerate(x0s):
        for j, _y0 in enumerate(y0s):
            prog.run_kernel(
                "fill_patch2", (Npatch_x, Npatch_y), None, im_g,
                np.int32(_x0 + Nblock_x // 2 - Npatch_x // 2),
                np.int32(_y0 + Nblock_y // 2 - Npatch_y // 2), patches_g.data,
                np.int32(i * Npatch_x * Npatch_y +
                         j * Gx * Npatch_x * Npatch_y))

    #return np.abs(patches_g.get())
    # convolution
    fft(patches_g, inplace=True, plan=plan)
    fft(h_g, inplace=True, plan=plan)
    prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None,
                    patches_g.data, h_g.data)
    fft(patches_g, inplace=True, inverse=True, plan=plan)

    logger.debug("Nblock_x: {}, Npatch_x: {}".format(Nblock_x, Npatch_x))
    #return np.abs(patches_g.get())
    #accumulate
    res_g = OCLArray.empty(im.shape, np.float32)

    for j in range(Gy + 1):
        for i in range(Gx + 1):
            prog.run_kernel("interpolate2", (Nblock_x, Nblock_y),
                            None, patches_g.data, res_g.data, np.int32(i),
                            np.int32(j), np.int32(Gx), np.int32(Gy),
                            np.int32(Npatch_x), np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 42
0
#     dtype = d_g.dtype.type
#
#     if not isinstance(d_g, OCLArray):
#         raise ValueError("only works on  OCLArrays")
#
#     if not dtype in dtype_kernel_name.keys():
#         raise NotImplementedError("only works for float32 or complex64")
#
#     if not np.all([n%2==0 for n in d_g.shape]):
#         raise NotImplementedError("only works on even length arryas")
#
#     prog = OCLProgram(abspath("kernels/fftshift.cl"))
#     prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None,
#                     d_g.data, d_g.data,
#                     np.int32(Nx), np.int32(Ny))

    # return d_g

if __name__ == '__main__':

    Nx, Ny, Nz = (256,)*3
    d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32)

    d[Nz//2-30:Nz//2+30,Ny//2-20:Ny//2+20,Nx//2-20:Nx//2+20] = 2.

    d_g = OCLArray.from_array(d)
    out_g = OCLArray.empty_like(d)


    out = fftshift(d, axes= (0,1,2))
Ejemplo n.º 43
0
def scale(data, scale=(1., 1., 1.), interpolation="linear"):
    """
    returns a interpolated, scaled version of data
    
    the output shape is scaled too.
    
    Parameters
    ----------
    data: ndarray
        3d input array
    scale: float, tuple
        scaling factor along each axis (x,y,z) 
    interpolation: str
        either "nearest" or "linear"

    Returns
    -------
        scaled output 

    """

    if not (isinstance(data, np.ndarray) and data.ndim == 3):
        raise ValueError("input data has to be a 3d array!")

    interpolation_defines = {
        "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"],
        "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"]
    }

    if not interpolation in interpolation_defines:
        raise KeyError("interpolation = '%s' not defined ,valid: %s" %
                       (interpolation, list(interpolation_defines.keys())))

    options_types = {
        np.uint8: ["-D", "TYPENAME=uchar", "-D", "READ_IMAGE=read_imageui"],
        np.uint16: ["-D", "TYPENAME=short", "-D", "READ_IMAGE=read_imageui"],
        np.float32: ["-D", "TYPENAME=float", "-D", "READ_IMAGE=read_imagef"],
    }

    dtype = data.dtype.type

    if not dtype in options_types:
        raise ValueError("type %s not supported! Available: %s" %
                         (dtype, str(list(options_types.keys()))))

    if not isinstance(scale, (tuple, list, np.ndarray)):
        scale = (scale, ) * 3

    if len(scale) != 3:
        raise ValueError("scale = %s misformed" % scale)

    d_im = OCLImage.from_array(data)

    nshape = _scale_shape(data.shape, scale)

    res_g = OCLArray.empty(nshape, dtype)

    prog = OCLProgram(abspath("kernels/scale.cl"),
                      build_options=interpolation_defines[interpolation] +
                      options_types[dtype])

    prog.run_kernel("scale", res_g.shape[::-1], None, d_im, res_g.data)

    return res_g.get()
Ejemplo n.º 44
0
    def _propagate(self,
                   u0=None,
                   offset=0,
                   return_comp="field",
                   return_shape="full",
                   free_prop=False,
                   slow_mean=False,
                   **kwargs):
        """
        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
        """

        free_prop = free_prop or (self.dn is None)

        if return_comp == "field":
            res_type = Bpm3d._complex_type
        elif return_comp == "intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        if u0 is None:
            u0 = self.u0_plane()

        u0 = u0.astype(np.complex64, copy=False)

        Nx, Ny, Nz = self.shape

        assert offset >= 0 and offset < (Nz - 1)

        if return_shape == "full":
            u = OCLArray.empty((Nz - offset, Ny, Nx), dtype=res_type)

        self._buf_plane.write_array(u0)

        # copy the first plane
        if return_shape == "full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        for i in range(Nz - 1 - offset):
            if not self.dn is None and not free_prop:
                if slow_mean:
                    if return_shape == "full":
                        raise NotImplementedError()
                    else:
                        tmp = OCLArray.empty((1, Ny, Nx), dtype=res_type)
                        if self._is_subsampled:
                            self._img_xy.copy_buffer(self._buf_plane)
                            self._copy_down_img(self._img_xy, tmp, 0)
                        else:
                            self._copy_down_buf(self._buf_plane, tmp, 0)

                        dn0 = np.sum(
                            np.abs(self.dn[i]) *
                            tmp.get()) / np.sum(np.abs(self.dn[i]) + 1.e-10)

                        self._fill_propagator(self.n0 + dn0)
                else:
                    if self.dn_mean[i + offset] != dn0:
                        dn0 = self.dn_mean[i + offset]
                        self._fill_propagator(self.n0 + dn0)

            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)
                self._mult_complex(self._buf_plane, self._buf_H)
                fft(self._buf_plane,
                    inplace=True,
                    inverse=True,
                    plan=self._plan)
                if not free_prop:
                    self._mult_dn(self._buf_plane,
                                  (i + offset + (j + 1.) / self.simul_z), dn0)

            if return_shape == "full":
                if self._is_subsampled and self.simul_xy != self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u,
                                        (i + 1) * (Nx * Ny))

        if return_shape == "full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 45
0
def _gaussian_np(data, sigma,  normalize=True, truncate = 4.0):
    d_g = OCLArray.from_array(data.astype(np.float32, copy=False))

    return _gaussian_buf(d_g, sigma, truncate = truncate, normalize=normalize).get()
Ejemplo n.º 46
0
def buffer_create_write(data):
    buf = OCLArray.empty(data.shape,data.dtype)
    buf.write_array(data)
    assert np.allclose(data,buf.get())
def convolve_spatial3(im,
                      hs,
                      mode="constant",
                      plan=None,
                      return_plan=False,
                      pad_factor=2):
    """
    spatial varying convolution of an 3d image with a 3d grid of psfs

    shape(im_ = (Nz,Ny,Nx)
    shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx)

    the input image im is subdivided into (Gx,Gy,Gz) blocks
    hs[k,j,i] is the psf at the center of each block (i,j,k)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0
    Nz % Gz == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition


    """
    if im.ndim != 3 or hs.ndim != 6:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:3])]):
        raise NotImplementedError(
            "shape of image has to be divisible by Gx Gy  = %s !" %
            (str(hs.shape[:3])))

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ns = tuple(im.shape)
    Gs = tuple(hs.shape[:3])

    # the size of each block within the grid
    Nblocks = [n / g for n, g in zip(Ns, Gs)]

    # the size of the overlapping patches with safety padding
    Npatchs = tuple([next_power_of_2(pad_factor * nb) for nb in Nblocks])

    print(hs.shape)
    hs = np.fft.fftshift(pad_to_shape(hs, Gs + Npatchs), axes=(3, 4, 5))

    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)

    patches_g = OCLArray.empty(Gs + Npatchs, np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    Xs = [nb * np.arange(g) for nb, g in zip(Nblocks, Gs)]

    print(Nblocks)
    # this loops over all i,j,k
    for (k, _z0), (j, _y0), (i, _x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel(
            "fill_patch3", Npatchs[::-1], None, im_g,
            np.int32(_x0 + Nblocks[2] / 2 - Npatchs[2] / 2),
            np.int32(_y0 + Nblocks[1] / 2 - Npatchs[1] / 2),
            np.int32(_z0 + Nblocks[0] / 2 - Npatchs[0] / 2), patches_g.data,
            np.int32(i * np.prod(Npatchs) + j * Gs[2] * np.prod(Npatchs) +
                     k * Gs[2] * Gs[1] * np.prod(Npatchs)))

    print(patches_g.shape, h_g.shape)

    # convolution
    fft(patches_g, inplace=True, batch=np.prod(Gs), plan=plan)
    fft(h_g, inplace=True, batch=np.prod(Gs), plan=plan)
    prog.run_kernel("mult_inplace", (np.prod(Npatchs) * np.prod(Gs), ), None,
                    patches_g.data, h_g.data)

    fft(patches_g, inplace=True, inverse=True, batch=np.prod(Gs), plan=plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape, np.float32)

    for k, j, i in product(*[list(range(g + 1)) for g in Gs]):
        prog.run_kernel("interpolate3", Nblocks[::-1], None, patches_g.data,
                        res_g.data, np.int32(i), np.int32(j), np.int32(k),
                        np.int32(Gs[2]), np.int32(Gs[1]), np.int32(Gs[0]),
                        np.int32(Npatchs[2]), np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 48
0
                else:
                    return results
            else:
                stage_inf = self.stage_2_inf
                #args = tuple(results)+stage1_args
                args = (results[0], ) + stage1_args


if __name__ == '__main__':
    from gputools import OCLArray

    from gputools import OCLReductionKernel
    k1 = OCLReductionKernel(np.float32,
                            neutral="0",
                            reduce_expr="a+b",
                            map_expr="x[i]",
                            arguments="__global float *x")

    k2 = TestKernel(np.float32,
                    neutral="0",
                    reduce_expr="a+b",
                    map_exprs=["x[i]"],
                    arguments="__global float *x")

    a = OCLArray.from_array(np.ones((256, 256), np.float32))
    a = OCLArray.from_array(np.ones(256, np.float32))
    #res = OCLArray.empty(256,np.float32)

    print(k1(a))
    print(k2(a))
def convolve_spatial2(im, hs, mode="constant", plan=None, return_plan=False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gz) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if im.ndim != 2 or hs.ndim != 4:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:2])]):
        raise NotImplementedError(
            "shape of image has to be divisible by Gx Gy  = %s shape mismatch"
            % (str(hs.shape[:2])))

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = hs.shape[:2]

    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny / Gy, Nx / Gx

    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = next_power_of_2(3 * Nblock_x), next_power_of_2(
        3 * Nblock_y)
    #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y)

    print(Nblock_x, Npatch_x)

    hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)),
                         axes=(2, 3))

    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y, Npatch_x))

    patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    x0s = Nblock_x * np.arange(Gx)
    y0s = Nblock_y * np.arange(Gy)

    print(x0s)

    for i, _x0 in enumerate(x0s):
        for j, _y0 in enumerate(y0s):
            prog.run_kernel(
                "fill_patch2", (Npatch_x, Npatch_y), None, im_g,
                np.int32(_x0 + Nblock_x / 2 - Npatch_x / 2),
                np.int32(_y0 + Nblock_y / 2 - Npatch_y / 2), patches_g.data,
                np.int32(i * Npatch_x * Npatch_y +
                         j * Gx * Npatch_x * Npatch_y))

    # convolution
    fft(patches_g, inplace=True, batch=Gx * Gy, plan=plan)
    fft(h_g, inplace=True, batch=Gx * Gy, plan=plan)
    prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None,
                    patches_g.data, h_g.data)

    fft(patches_g, inplace=True, inverse=True, batch=Gx * Gy, plan=plan)

    #return patches_g.get()

    #accumulate
    res_g = OCLArray.empty(im.shape, np.float32)

    for i in range(Gx + 1):
        for j in range(Gy + 1):
            prog.run_kernel("interpolate2", (Nblock_x, Nblock_y),
                            None, patches_g.data, res_g.data, np.int32(i),
                            np.int32(j), np.int32(Gx), np.int32(Gy),
                            np.int32(Npatch_x), np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 50
0
def affine(data, mat=np.identity(4), mode="constant", interpolation="linear"):
    """
    affine transform data with matrix mat, which is the inverse coordinate transform matrix  
    (similar to ndimage.affine_transform)
     
    Parameters
    ----------
    data, ndarray
        3d array to be transformed
    mat, ndarray 
        3x3 or 4x4 inverse coordinate transform matrix 
    mode: string 
        boundary mode, one of the following:
        'constant'
            pads with zeros 
        'edge'
            pads with edge values
        'wrap'
            pads with the repeated version of the input 
    interpolation, string
        interpolation mode, one of the following    
        'linear'
        'nearest'
        
    Returns
    -------
    res: ndarray
        transformed array (same shape as input)
        
    """
    warnings.warn(
        "gputools.transform.affine: API change as of gputools>= 0.2.8: the inverse of the matrix is now used as in scipy.ndimage.affine_transform"
    )

    if not (isinstance(data, np.ndarray) and data.ndim == 3):
        raise ValueError("input data has to be a 3d array!")

    interpolation_defines = {
        "linear": ["-D", "SAMPLER_FILTER=CLK_FILTER_LINEAR"],
        "nearest": ["-D", "SAMPLER_FILTER=CLK_FILTER_NEAREST"]
    }

    mode_defines = {
        "constant": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP"],
        "wrap": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_REPEAT"],
        "edge": ["-D", "SAMPLER_ADDRESS=CLK_ADDRESS_CLAMP_TO_EDGE"]
    }

    if not interpolation in interpolation_defines:
        raise KeyError("interpolation = '%s' not defined ,valid: %s" %
                       (interpolation, list(interpolation_defines.keys())))

    if not mode in mode_defines:
        raise KeyError("mode = '%s' not defined ,valid: %s" %
                       (mode, list(mode_defines.keys())))

    # reorder matrix, such that x,y,z -> z,y,x (as the kernel is assuming that)

    d_im = OCLImage.from_array(data.astype(np.float32, copy=False))
    res_g = OCLArray.empty(data.shape, np.float32)
    mat_inv_g = OCLArray.from_array(mat.astype(np.float32, copy=False))

    prog = OCLProgram(abspath("kernels/affine.cl"),
                      build_options=interpolation_defines[interpolation] +
                      mode_defines[mode])

    prog.run_kernel("affine3", data.shape[::-1], None, d_im, res_g.data,
                    mat_inv_g.data)

    return res_g.get()
Ejemplo n.º 51
0
def focus_field_cylindrical_plane(shape=(128, 128),
                                  units=(.1, .1),
                                  z=0.,
                                  lam=.5,
                                  NA=.3,
                                  n0=1.,
                                  ex_g=None,
                                  n_integration_steps=200):
    """calculates the complex 2d input field at position -z of a \
     for a perfect, aberration free cylindrical lens after
    x polarized illumination via the vectorial debye diffraction integral.
    


    Parameters
    ----------
    shape: Nx,Ny
        the 2d shape of the geometry
    units: dx,dy
        the pixel sizes in microns
    z:  float
        defocus position in microns, such that the beam would focus at z
        e.g. an input field with z = 10. would hav its focus spot after 10 microns
    lam: float
        the wavelength of light used in microns
    NA: float
        the numerical aperture of the lens
    n0: float
        the refractive index of the medium
    n_integration_steps: int
        number of integration steps to perform

    Returns
    -------
    ex: ndarray
        the complex field


    Example
    -------

    >>> # the input pattern of a bessel beam that will focus after 4 microns
    >>> ex = focus_field_cylindrical_plane((256,256), (0.1,0.1), z = 4., lam=.5, NA = .4)

    See Also
    --------
    biobeam.focus_field_cylindrical : the 3d function


    """

    p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),
                   build_options=[
                       "-I",
                       absPath("kernels"), "-D",
                       "INT_STEPS=%s" % n_integration_steps
                   ])

    Nx, Ny = shape
    dx, dy = units

    alpha = np.arcsin(NA / n0)

    if ex_g is None:
        use_buffer = False
        ex_g = OCLArray.empty((Ny, Nx), np.complex64)
    else:
        use_buffer = True

    assert ex_g.shape[::-1] == shape

    p.run_kernel("psf_cylindrical_plane", (Nx, Ny), None, ex_g.data,
                 np.float32(-dy * (Ny // 2)),
                 np.float32((Ny - 1 - Ny // 2) * dy), np.float32(-z),
                 np.float32(lam / n0), np.float32(alpha))

    if not use_buffer:
        return ex_g.get()
Ejemplo n.º 52
0
def buffer_from_array(data):
    buf = OCLArray.from_array(data)
    assert np.allclose(data,buf.get())