コード例 #1
0
ファイル: tv2.py プロジェクト: maweigert/gputools
def tv2(data, weight, Niter=50):
    """
    chambolles tv regularized denoising

    weight should be around  2+1.5*noise_sigma
    """

    prog = OCLProgram(abspath("kernels/tv2.cl"))

    data_im = OCLImage.from_array(data.astype(np, float32, copy=False))

    pImgs = [
        dev.createImage(
            data.shape[::-1], mem_flags=cl.mem_flags.READ_WRITE, dtype=np.float32, channel_order=cl.channel_order.RGBA
        )
        for i in range(2)
    ]

    outImg = dev.createImage(data.shape[::-1], dtype=np.float32, mem_flags=cl.mem_flags.READ_WRITE)

    dev.writeImage(inImg, data.astype(np.float32))
    dev.writeImage(pImgs[0], np.zeros((4,) + data.shape, dtype=np.float32))
    dev.writeImage(pImgs[1], np.zeros((4,) + data.shape, dtype=np.float32))

    for i in range(Niter):
        proc.runKernel("div_step", inImg.shape, None, inImg, pImgs[i % 2], outImg)
        proc.runKernel("grad_step", inImg.shape, None, outImg, pImgs[i % 2], pImgs[1 - i % 2], np.float32(weight))
    return dev.readImage(outImg, dtype=np.float32)
コード例 #2
0
ファイル: bilateral2.py プロジェクト: maweigert/gputools
def bilateral2(data, fSize, sigma_p, sigma_x = 10.):
    """bilateral filter """
    
    dtype = data.dtype.type
    dtypes_kernels = {np.float32:"bilat2_float",
                        np.uint16:"bilat2_short"}

    if not dtype in dtypes_kernels.keys():
        logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys()))
        data = data.astype(np.float32)
        dtype = data.dtype.type


    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    
    prog = OCLProgram(abspath("kernels/bilateral2.cl"))


    prog.run_kernel(dtypes_kernels[dtype],
                    img.shape,None,
                    img,res.data,
                    np.int32(img.shape[0]),np.int32(img.shape[1]),
                    np.int32(fSize),np.float32(sigma_x),np.float32(sigma_p))


    return res.get()
コード例 #3
0
ファイル: tv2.py プロジェクト: gpwright/gputools
def tv2(data, weight, Niter=50):
    """
    chambolles tv regularized denoising

    weight should be around  2+1.5*noise_sigma
    """

    prog = OCLProgram(abspath("kernels/tv2.cl"))

    data_im = OCLImage.from_array(data.astype(np, float32, copy=False))

    pImgs = [
        dev.createImage(data.shape[::-1],
                        mem_flags=cl.mem_flags.READ_WRITE,
                        dtype=np.float32,
                        channel_order=cl.channel_order.RGBA) for i in range(2)
    ]

    outImg = dev.createImage(data.shape[::-1],
                             dtype=np.float32,
                             mem_flags=cl.mem_flags.READ_WRITE)

    dev.writeImage(inImg, data.astype(np.float32))
    dev.writeImage(pImgs[0], np.zeros((4, ) + data.shape, dtype=np.float32))
    dev.writeImage(pImgs[1], np.zeros((4, ) + data.shape, dtype=np.float32))

    for i in range(Niter):
        proc.runKernel("div_step", inImg.shape, None, inImg, pImgs[i % 2],
                       outImg)
        proc.runKernel("grad_step", inImg.shape, None, outImg, pImgs[i % 2],
                       pImgs[1 - i % 2], np.float32(weight))
    return dev.readImage(outImg, dtype=np.float32)
コード例 #4
0
ファイル: nlm3.py プロジェクト: maweigert/gputools
def nlm3(data,sigma, size_filter = 2, size_search = 3):
    """for noise level of sigma_0, choose sigma = 1.5*sigma_0
    """

    prog = OCLProgram(abspath("kernels/nlm3.cl"),
                      build_options="-D FS=%i -D BS=%i"%(size_filter,size_search))


    data = data.astype(np.float32, copy = False)
    img = OCLImage.from_array(data)

    distImg = OCLImage.empty_like(data)

    distImg = OCLImage.empty_like(data)
    tmpImg = OCLImage.empty_like(data)
    tmpImg2 = OCLImage.empty_like(data)

    accBuf = OCLArray.zeros(data.shape,np.float32)    
    weightBuf = OCLArray.zeros(data.shape,np.float32)

    for dx in range(size_search+1):
        for dy in range(-size_search,size_search+1):
            for dz in range(-size_search,size_search+1):
                prog.run_kernel("dist",img.shape,None,
                                img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz))
                
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,tmpImg2,np.int32(1))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg2,tmpImg,np.int32(2))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,distImg,np.int32(4))

                prog.run_kernel("computePlus",img.shape,None,
                                img,distImg,accBuf.data,weightBuf.data,
                                np.int32(img.shape[0]),
                                np.int32(img.shape[1]),
                                np.int32(img.shape[2]),
                                np.int32(dx),np.int32(dy),np.int32(dz),
                                np.float32(sigma))

                if any([dx,dy,dz]):
                    prog.run_kernel("computeMinus",img.shape,None,
                                    img,distImg,accBuf.data,weightBuf.data,
                                    np.int32(img.shape[0]),
                                    np.int32(img.shape[1]),
                                    np.int32(img.shape[2]),
                                    np.int32(dx),np.int32(dy),np.int32(dz),
                                    np.float32(sigma))

    acc  = accBuf.get()
    weights  = weightBuf.get()

    return acc/weights
コード例 #5
0
def nlm3(data,sigma, size_filter = 2, size_search = 3):
    """for noise level of sigma_0, choose sigma = 1.5*sigma_0
    """

    prog = OCLProgram(abspath("kernels/nlm3.cl"),
                      build_options="-D FS=%i -D BS=%i"%(size_filter,size_search))

    img = OCLImage.from_array(data)

    distImg = OCLImage.empty_like(data)

    distImg = OCLImage.empty_like(data)
    tmpImg = OCLImage.empty_like(data)
    tmpImg2 = OCLImage.empty_like(data)

    accBuf = OCLArray.zeros(data.shape,np.float32)    
    weightBuf = OCLArray.zeros(data.shape,np.float32)

    for dx in range(size_search+1):
        for dy in range(-size_search,size_search+1):
            for dz in range(-size_search,size_search+1):
                prog.run_kernel("dist",img.shape,None,
                                img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz))
                
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,tmpImg2,np.int32(1))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg2,tmpImg,np.int32(2))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,distImg,np.int32(4))

                prog.run_kernel("computePlus",img.shape,None,
                                img,distImg,accBuf.data,weightBuf.data,
                                np.int32(img.shape[0]),
                                np.int32(img.shape[1]),
                                np.int32(img.shape[2]),
                                np.int32(dx),np.int32(dy),np.int32(dz),
                                np.float32(sigma))

                if any([dx,dy,dz]):
                    prog.run_kernel("computeMinus",img.shape,None,
                                    img,distImg,accBuf.data,weightBuf.data,
                                    np.int32(img.shape[0]),
                                    np.int32(img.shape[1]),
                                    np.int32(img.shape[2]),
                                    np.int32(dx),np.int32(dy),np.int32(dz),
                                    np.float32(sigma))

    acc  = accBuf.get()
    weights  = weightBuf.get()

    return acc/weights
コード例 #6
0
ファイル: convolve_sep.py プロジェクト: maweigert/gputools
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None):

    assert_bufs_type(np.float32,data_g,hx_g,hy_g)

    prog = OCLProgram(abspath("kernels/convolve_sep.cl"))

    Ny,Nx = hy_g.shape[0],hx_g.shape[0]

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)
    
    prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx))
    prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny))

    return res_g
コード例 #7
0
ファイル: convolve_sep.py プロジェクト: gpwright/gputools
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g=None, dev=None):

    assert_bufs_type(np.float32, data_g, hx_g, hy_g)

    prog = OCLProgram(abspath("kernels/convolve_sep.cl"))

    Ny, Nx = hy_g.shape[0], hx_g.shape[0]

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)

    prog.run_kernel("conv_sep2_x", data_g.shape[::-1], None, data_g.data,
                    hx_g.data, tmp_g.data, np.int32(Nx))
    prog.run_kernel("conv_sep2_y", data_g.shape[::-1], None, tmp_g.data,
                    hy_g.data, res_g.data, np.int32(Ny))

    return res_g
コード例 #8
0
def _convolve_buf(data_g, h_g , res_g = None):
    """
    buffer variant
    """
    assert_bufs_type(np.float32,data_g,h_g)

    prog = OCLProgram(abspath("kernels/convolve.cl"))

    if res_g is None:
        res_g = OCLArray.empty(data_g.shape,dtype=np.float32)

    Nhs = [np.int32(n) for n in h_g.shape]
    
    kernel_name = "convolve%sd_buf"%(len(data_g.shape)) 
    prog.run_kernel(kernel_name,data_g.shape[::-1],None,
                    data_g.data,h_g.data,res_g.data,
                    *Nhs)

    return res_g
コード例 #9
0
def affine(data, mat = np.identity(4), mode ="linear"):
    """affine transform data with matrix mat

    """ 

    bop = {"linear":"","nearest":"-D USENEAREST"}

    if not mode in bop.keys():
        raise KeyError("mode = '%s' not defined ,valid: %s"%(mode, bop.keys()))
    
    d_im = OCLImage.from_array(data)
    res_g = OCLArray.empty(data.shape,np.float32)
    mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False))

    prog = OCLProgram(abspath("kernels/transformations.cl")
                      , build_options=[bop[mode]])

    prog.run_kernel("affine",
                    data.shape[::-1],None,
                    d_im,res_g.data,mat_g.data)

    return res_g.get()
コード例 #10
0
def affine(data, mat = np.identity(4), interp = "linear"):
    """affine transform data with matrix mat

    """ 

    bop = {"linear":"","nearest":"-D USENEAREST"}

    if not interp in bop.keys():
        raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys()))
    
    d_im = OCLImage.from_array(data)
    res_g = OCLArray.empty(data.shape,np.float32)
    mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False))

    prog = OCLProgram(abspath("kernels/transformations.cl")
                      , build_options=[bop[interp]])

    prog.run_kernel("affine",
                    data.shape[::-1],None,
                    d_im,res_g.data,mat_g.data)

    return res_g.get()
コード例 #11
0
ファイル: bilateral2.py プロジェクト: gpwright/gputools
def bilateral2(data, fSize, sigma_p, sigma_x=10.):
    """bilateral filter """

    dtype = data.dtype.type
    dtypes_kernels = {np.float32: "bilat2_float", np.uint16: "bilat2_short"}

    if not dtype in dtypes_kernels.keys():
        logger.info("data type %s not supported yet (%s), casting to float:" %
                    (dtype, dtypes_kernels.keys()))
        data = data.astype(np.float32)
        dtype = data.dtype.type

    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    prog = OCLProgram(abspath("kernels/bilateral2.cl"))

    prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data,
                    np.int32(img.shape[0]), np.int32(img.shape[1]),
                    np.int32(fSize), np.float32(sigma_x), np.float32(sigma_p))

    return res.get()
コード例 #12
0
ファイル: fftshift.py プロジェクト: maweigert/gputools
def _fftshift_single(d_g, res_g, ax = 0):
    """
    basic fftshift of an OCLArray


    shape(d_g) =  [N_0,N_1...., N, .... N_{k-1, N_k]
    = [N1, N, N2]

    the we can address each element in the flat buffer by

     index = i + N2*j + N2*N*k

    where   i = 1 .. N2
            j = 1 .. N
            k = 1 .. N1

    and the swap of elements is performed on the index j
    """

    dtype_kernel_name = {np.float32:"fftshift_1_f",
                   np.complex64:"fftshift_1_c"
                   }

    N = d_g.shape[ax]
    N1 = 1 if ax==0 else np.prod(d_g.shape[:ax])
    N2 = 1 if ax == len(d_g.shape)-1 else np.prod(d_g.shape[ax+1:])

    dtype = d_g.dtype.type

    prog = OCLProgram(abspath("kernels/fftshift.cl"))
    prog.run_kernel(dtype_kernel_name[dtype],(N2,N/2,N1),None,
                    d_g.data, res_g.data,
                    np.int32(N),
                    np.int32(N2))


    return res_g
コード例 #13
0
ファイル: scale.py プロジェクト: maweigert/gputools
def scale(data, scale = (1.,1.,1.), interp = "linear"):
    """returns a interpolated, scaled version of data

    scale = (scale_z,scale_y,scale_x)
    or
    scale = scale_all

    interp = "linear" | "nearest"
    """ 

    bop = {"linear":[],"nearest":["-D","USENEAREST"]}

    if not interp in bop.keys():
        raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys()))
    
    if not isinstance(scale,(tuple, list, np.ndarray)):
        scale = (scale,)*3

    if len(scale) != 3:
        raise ValueError("scale = %s misformed"%scale)

    d_im = OCLImage.from_array(data)

    nshape = np.array(data.shape)*np.array(scale)
    nshape = tuple(nshape.astype(np.int))

    res_g = OCLArray.empty(nshape,np.float32)


    prog = OCLProgram(abspath("kernels/scale.cl"), build_options=bop[interp])


    prog.run_kernel("scale",
                    res_g.shape[::-1],None,
                    d_im,res_g.data)

    return res_g.get()
コード例 #14
0
def _convolve3_old(data,h, dev = None):
    """convolves 3d data with kernel h on the GPU Device dev
    boundary conditions are clamping to edge.
    h is converted to float32

    if dev == None the default one is used
    """

    if dev is None:
        dev = get_device()

    if dev is None:
        raise ValueError("no OpenCLDevice found...")

    dtype = data.dtype.type

    dtypes_options = {np.float32:"",
                      np.uint16:"-D SHORTTYPE"}

    if not dtype in dtypes_options.keys():
        raise TypeError("data type %s not supported yet, please convert to:"%dtype,dtypes_options.keys())

    prog = OCLProgram(abspath("kernels/convolve3.cl"),
                      build_options = dtypes_options[dtype])

    
    hbuf = OCLArray.from_array(h.astype(np.float32))
    img = OCLImage.from_array(data)
    res = OCLArray.empty(data.shape,dtype=np.float32)

    Ns = [np.int32(n) for n in data.shape+h.shape]

    prog.run_kernel("convolve3d",img.shape,None,
                    img,hbuf.data,res.data,
                    *Ns)

    return res.get()
コード例 #15
0
ファイル: scale.py プロジェクト: gpwright/gputools
def scale(data, scale = (1.,1.,1.), interp = "linear"):
    """returns a interpolated, scaled version of data

    scale = (scale_z,scale_y,scale_x)
    or
    scale = scale_all

    interp = "linear" | "nearest"
    """ 

    bop = {"linear":"","nearest":"-D USENEAREST"}

    if not interp in bop.keys():
        raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys()))
    
    if not isinstance(scale,(tuple, list, np.ndarray)):
        scale = (scale,)*3

    if len(scale) != 3:
        raise ValueError("scale = %s misformed"%scale)

    d_im = OCLImage.from_array(data)

    nshape = np.array(data.shape)*np.array(scale)
    nshape = tuple(nshape.astype(np.int))

    res_g = OCLArray.empty(nshape,np.float32)


    prog = OCLProgram(abspath("kernels/scale.cl"), build_options=[bop[interp]])

    prog.run_kernel("scale",
                    res_g.shape[::-1],None,
                    d_im,res_g.data)

    return res_g.get()
コード例 #16
0
def convolve_spatial2(im, hs,
                      mode = "constant",
                      plan = None,
                      return_plan = False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gz) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if im.ndim !=2 or hs.ndim !=4:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:2])]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s shape mismatch"%(str(hs.shape[:2])))


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = hs.shape[:2]


    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny/Gy, Nx/Gx


    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(3*Nblock_x), _next_power_of_2(3*Nblock_y)
    #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y)

    print Nblock_x, Npatch_x

    hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3))


    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y,Npatch_x))


    patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    x0s = Nblock_x*np.arange(Gx)
    y0s = Nblock_y*np.arange(Gy)

    print x0s

    for i,_x0 in enumerate(x0s):
        for j,_y0 in enumerate(y0s):
            prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None,
                    im_g,
                    np.int32(_x0+Nblock_x/2-Npatch_x/2),
                    np.int32(_y0+Nblock_y/2-Npatch_y/2),
                    patches_g.data,
                    np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y))

    # convolution
    fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan)
    fft(h_g,inplace=True, batch = Gx*Gy, plan = plan)
    prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None,
                    patches_g.data, h_g.data)

    fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan)

    #return patches_g.get()

    #accumulate
    res_g = OCLArray.empty(im.shape,np.float32)

    for i in xrange(Gx+1):
        for j in xrange(Gy+1):
            prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None,
                            patches_g.data,res_g.data,
                            np.int32(i),np.int32(j),
                            np.int32(Gx),np.int32(Gy),
                            np.int32(Npatch_x),np.int32(Npatch_y))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
コード例 #17
0
def _convolve_spatial2(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      pad_factor = 2,
                      plan = None,
                      return_plan = False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gx) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisible by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0


    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if grid_dim:
        Gs = tuple(grid_dim)
    else:
        Gs = hs.shape[:2]


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = Gs


    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny/Gy, Nx/Gx


    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(pad_factor*Nblock_x), _next_power_of_2(pad_factor*Nblock_y)


    prog = OCLProgram(abspath("kernels/conv_spatial2.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y,Npatch_x))

    x0s = Nblock_x*np.arange(Gx)
    y0s = Nblock_y*np.arange(Gy)


    patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros((Gy,Gx,Npatch_y,Npatch_x),np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for i,_x0 in enumerate(x0s):
            for j,_y0 in enumerate(y0s):
                prog.run_kernel("fill_psf_grid2",
                                (Nblock_x,Nblock_y),None,
                        tmp_g.data,
                        np.int32(Nx),
                        np.int32(i*Nblock_x),
                        np.int32(j*Nblock_y),
                        h_g.data,
                        np.int32(Npatch_x),
                        np.int32(Npatch_y),
                        np.int32(-Nblock_x/2+Npatch_x/2),
                        np.int32(-Nblock_y/2+Npatch_y/2),
                        np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)
                            )
    else:
        hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    #prepare image
    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    for i,_x0 in enumerate(x0s):
        for j,_y0 in enumerate(y0s):
            prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None,
                    im_g,
                    np.int32(_x0+Nblock_x/2-Npatch_x/2),
                    np.int32(_y0+Nblock_y/2-Npatch_y/2),
                    patches_g.data,
                    np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y))


    #return np.abs(patches_g.get())
    # convolution
    fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan)
    fft(h_g,inplace=True, batch = Gx*Gy, plan = plan)
    prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None,
                    patches_g.data, h_g.data)
    fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan)


    print Nblock_x, Npatch_x
    #return np.abs(patches_g.get())
    #accumulate
    res_g = OCLArray.empty(im.shape,np.float32)

    for j in xrange(Gy+1):
        for i in xrange(Gx+1):
            prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None,
                            patches_g.data,res_g.data,
                            np.int32(i),np.int32(j),
                            np.int32(Gx),np.int32(Gy),
                            np.int32(Npatch_x),np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
コード例 #18
0
def convolve_spatial3(im, hs,
                      mode = "constant",
                      plan = None,
                      return_plan = False,
                      pad_factor = 2):
    """
    spatial varying convolution of an 3d image with a 3d grid of psfs

    shape(im_ = (Nz,Ny,Nx)
    shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx)

    the input image im is subdivided into (Gx,Gy,Gz) blocks
    hs[k,j,i] is the psf at the center of each block (i,j,k)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0
    Nz % Gz == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition


    """
    if im.ndim !=3 or hs.ndim !=6:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:3])]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s !"%(str(hs.shape[:3])))


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = tuple(im.shape)
    Gs = tuple(hs.shape[:3])


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    print hs.shape
    hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))



    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]




    print Nblocks
    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))



    print patches_g.shape, h_g.shape




    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
    prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None,
                    patches_g.data, h_g.data)

    fft(patches_g,
        inplace=True,
        inverse = True,
        batch = np.prod(Gs),
        plan = plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape,np.float32)

    for k, j, i in product(*[range(g+1) for g in Gs]):
        prog.run_kernel("interpolate3",Nblocks[::-1],None,
                        patches_g.data,
                        res_g.data,
                        np.int32(i),np.int32(j),np.int32(k),
                        np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]),
                        np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0]))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
コード例 #19
0
def _convolve_spatial3(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      plan = None,
                      return_plan = False,
                      pad_factor = 2):



    if im.ndim !=3:
        raise ValueError("wrong dimensions of input!")

    if not (hs.ndim==6 or (hs.ndim==3 and grid_dim)):
        raise ValueError("wrong dimensions of psf grid!")

    if grid_dim:
        if hs.shape != im.shape:
            raise ValueError("if grid_dim is set, then im.shape = hs.shape !")
        Gs = tuple(grid_dim)
    else:
        if not hs.ndim==6:
            raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)")
        Gs = hs.shape[:3]

    if not np.all([n%g==0 for n,g in zip(im.shape,Gs)]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s shape mismatch"%(str(hs.shape[:2])))



    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = im.shape


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    prog = OCLProgram(abspath("kernels/conv_spatial3.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)


    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros(Gs+Npatchs,np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
            prog.run_kernel("fill_psf_grid3",
                        Nblocks[::-1],None,
                        tmp_g.data,
                        np.int32(im.shape[2]),
                        np.int32(im.shape[1]),
                        np.int32(i*Nblocks[2]),
                        np.int32(j*Nblocks[1]),
                        np.int32(k*Nblocks[0]),
                        h_g.data,
                        np.int32(Npatchs[2]),
                        np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]),
                        np.int32(-Nblocks[2]/2+Npatchs[2]/2),
                        np.int32(-Nblocks[1]/2+Npatchs[1]/2),
                        np.int32(-Nblocks[0]/2+Npatchs[0]/2),
                        np.int32(i*np.prod(Npatchs)+
                         j*Gs[2]*np.prod(Npatchs)+
                         k*Gs[2]*Gs[1]*np.prod(Npatchs)))

    else:
        hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))


    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
    prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None,
                    patches_g.data, h_g.data)

    fft(patches_g,
        inplace=True,
        inverse = True,
        batch = np.prod(Gs),
        plan = plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape,np.float32)

    for k, j, i in product(*[range(g+1) for g in Gs]):
        prog.run_kernel("interpolate3",Nblocks[::-1],None,
                        patches_g.data,
                        res_g.data,
                        np.int32(i),np.int32(j),np.int32(k),
                        np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]),
                        np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0]))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res