def _filt(data_g, size=(3, 3,3 ), res_g=None):
        if not data_g.dtype.type in cl_buffer_datatype_dict:
            raise ValueError("dtype %s not supported"%data_g.dtype.type)

        DTYPE = cl_buffer_datatype_dict[data_g.dtype.type]


        with open(abspath("kernels/generic_separable_filter.cl"), "r") as f:
            tpl = Template(f.read())

        rendered = tpl.render(FSIZE_X=size[-1], FSIZE_Y=size[-2], FSIZE_Z=size[-3],
                              FUNC=FUNC, DEFAULT=DEFAULT, DTYPE = DTYPE)

        prog = OCLProgram(src_str=rendered,
                          build_options = ["-cl-unsafe-math-optimizations"]
        )                       

        tmp_g = OCLArray.empty_like(data_g)

        if res_g is None:
            res_g = OCLArray.empty_like(data_g)

        prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data, res_g.data)
        prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data, tmp_g.data)
        prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data, res_g.data)
        return res_g
Esempio n. 2
0
    def _filt(data_g, size=(3, 3, 3), res_g=None):
        assert_bufs_type(np.float32, data_g)

        with open(abspath("kernels/generic_reduce_filter.cl"), "r") as f:
            tpl = Template(f.read())

        rendered = tpl.render(FSIZE_X=size[-1],
                              FSIZE_Y=size[-2],
                              FSIZE_Z=size[-3],
                              FUNC=FUNC,
                              DEFAULT=DEFAULT)

        prog = OCLProgram(src_str=rendered)

        tmp_g = OCLArray.empty_like(data_g)

        if res_g is None:
            res_g = OCLArray.empty_like(data_g)

        prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data,
                        res_g.data)
        prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data,
                        tmp_g.data)
        prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data,
                        res_g.data)
        return res_g
Esempio n. 3
0
def bilateral3(data, size_filter, sigma_p, sigma_x = 10.):
    """bilateral filter """
    
    dtype = data.dtype.type
    dtypes_kernels = {np.float32:"bilat3_float",}

    if not dtype in dtypes_kernels.keys():
        logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys()))
        data = data.astype(np.float32)
        dtype = data.dtype.type


    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    
    prog = OCLProgram(abspath("kernels/bilateral3.cl"))

    print img.shape

    prog.run_kernel(dtypes_kernels[dtype],
                    img.shape,None,
                    img,res.data,
                    np.int32(img.shape[0]),np.int32(img.shape[1]),
                    np.int32(size_filter),np.float32(sigma_x),np.float32(sigma_p))


    return res.get()
Esempio n. 4
0
def _fft_convolve_numpy(data, h, plan = None,
                        kernel_is_fft = False,
                        kernel_is_fftshifted = False):
    """ convolving via opencl fft for numpy arrays

    data and h must have the same size
    """

    dev = get_device()

    if data.shape != h.shape:
        raise ValueError("data and kernel must have same size! %s vs %s "%(str(data.shape),str(h.shape)))

    
    data_g = OCLArray.from_array(data.astype(np.complex64))

    if not kernel_is_fftshifted:
        h = np.fft.fftshift(h)

    
    h_g = OCLArray.from_array(h.astype(np.complex64))
    res_g = OCLArray.empty_like(data_g)
    
    _fft_convolve_gpu(data_g,h_g,res_g = res_g,
                      plan = plan,
                      kernel_is_fft = kernel_is_fft)

    res =  abs(res_g.get())

    del data_g
    del h_g
    del res_g
    
    return res
Esempio n. 5
0
def _filter_max_2_gpu(data_g, size=10, res_g=None):
    assert_bufs_type(np.float32, data_g)

    prog = OCLProgram(abspath("kernels/minmax_filter.cl"))

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)

    prog.run_kernel("max_2_x", data_g.shape[::-1], None, data_g.data,
                    tmp_g.data, np.int32(size[-1]))
    prog.run_kernel("max_2_y", data_g.shape[::-1], None, tmp_g.data,
                    res_g.data, np.int32(size[-2]))

    return res_g
Esempio n. 6
0
def bilateral3(data, size_filter, sigma_p, sigma_x=10.):
    """bilateral filter """

    dtype = data.dtype.type
    dtypes_kernels = {
        np.float32: "bilat3_float",
    }

    if not dtype in dtypes_kernels:
        logger.info("data type %s not supported yet (%s), casting to float:" %
                    (dtype, list(dtypes_kernels.keys())))
        data = data.astype(np.float32)
        dtype = data.dtype.type

    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    prog = OCLProgram(abspath("kernels/bilateral3.cl"))

    logger.debug("in bilateral3, image shape: {}".format(img.shape))

    prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data,
                    np.int32(img.shape[0]), np.int32(img.shape[1]),
                    np.int32(size_filter), np.float32(sigma_x),
                    np.float32(sigma_p))

    return res.get()
Esempio n. 7
0
def _ocl_fft_gpu(plan, ocl_arr, res_arr=None, inverse=False):
    assert_bufs_type(np.complex64, ocl_arr)

    if res_arr is None:
        res_arr = OCLArray.empty_like(ocl_arr)
    plan(ocl_arr, res_arr, inverse=inverse)

    return res_arr
Esempio n. 8
0
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None):

    assert_bufs_type(np.float32,data_g,hx_g,hy_g)

    prog = OCLProgram(abspath("kernels/convolve_sep.cl"))

    Ny,Nx = hy_g.shape[0],hx_g.shape[0]

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)
    
    prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx))
    prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny))

    return res_g
Esempio n. 9
0
def test_bessel(n,x):
    x_g = OCLArray.from_array(x.astype(float32))
    res_g = OCLArray.empty_like(x.astype(float32))
    
    p = OCLProgram(absPath("kernels/bessel.cl"))
    p.run_kernel("bessel_fill",x_g.shape,None,
                 x_g.data,res_g.data,int32(n))

    return res_g.get()
Esempio n. 10
0
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g=None):
    assert_bufs_type(np.float32, data_g, hx_g, hy_g)

    prog = OCLProgram(abspath("kernels/convolve_sep.cl"))

    Ny, Nx = hy_g.shape[0], hx_g.shape[0]

    tmp_g = OCLArray.empty_like(data_g)

    if res_g is None:
        res_g = OCLArray.empty_like(data_g)

    prog.run_kernel("conv_sep2_x", data_g.shape[::-1], None, data_g.data,
                    hx_g.data, tmp_g.data, np.int32(Nx))
    prog.run_kernel("conv_sep2_y", data_g.shape[::-1], None, tmp_g.data,
                    hy_g.data, res_g.data, np.int32(Ny))

    return res_g
Esempio n. 11
0
def test_bessel(n, x):
    x_g = OCLArray.from_array(x.astype(float32))
    res_g = OCLArray.empty_like(x.astype(float32))

    p = OCLProgram(absPath("kernels/bessel.cl"))
    p.run_kernel("bessel_fill", x_g.shape, None, x_g.data, res_g.data,
                 int32(n))

    return res_g.get()
Esempio n. 12
0
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False):
    """
    gpu version of fftshift for numpy arrays or OCLArrays

    Parameters
    ----------
    arr_obj: numpy array or OCLArray (float32/complex64)
        the array to be fftshifted
    axes: list or None
        the axes over which to shift (like np.fft.fftshift)
        if None, all axes are taken
    res_g:
        if given, fills it with the result (has to be same shape and dtype as arr_obj)
        else internally creates a new one
    Returns
    -------
        if return_buffer, returns the result as (well :) OCLArray
        else returns the result as numpy array

    """

    if axes is None:
        axes = range(arr_obj.ndim)


    if isinstance(arr_obj, OCLArray):
        if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES.keys():
            raise NotImplementedError("only works for float32 or complex64")
    elif isinstance(arr_obj, np.ndarray):
        if np.iscomplexobj(arr_obj):
            arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False))
        else:
            arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False))
    else:
        raise ValueError("unknown type (%s)"%(type(arr_obj)))

    if not np.all([arr_obj.shape[a]%2==0 for a in axes]):
        raise NotImplementedError("only works on axes of even dimensions")

    if res_g is None:
        res_g = OCLArray.empty_like(arr_obj)


    # iterate over all axes
    # FIXME: this is still rather inefficient
    in_g = arr_obj
    for ax in axes:
        _fftshift_single(in_g, res_g, ax)
        in_g = res_g

    if return_buffer:
        return res_g
    else:
        return res_g.get()
Esempio n. 13
0
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False):
    """
    gpu version of fftshift for numpy arrays or OCLArrays

    Parameters
    ----------
    arr_obj: numpy array or OCLArray (float32/complex64)
        the array to be fftshifted
    axes: list or None
        the axes over which to shift (like np.fft.fftshift)
        if None, all axes are taken
    res_g:
        if given, fills it with the result (has to be same shape and dtype as arr_obj)
        else internally creates a new one
    Returns
    -------
        if return_buffer, returns the result as (well :) OCLArray
        else returns the result as numpy array

    """

    if axes is None:
        axes = list(range(arr_obj.ndim))


    if isinstance(arr_obj, OCLArray):
        if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES:
            raise NotImplementedError("only works for float32 or complex64")
    elif isinstance(arr_obj, np.ndarray):
        if np.iscomplexobj(arr_obj):
            arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False))
        else:
            arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False))
    else:
        raise ValueError("unknown type (%s)"%(type(arr_obj)))

    if not np.all([arr_obj.shape[a]%2==0 for a in axes]):
        raise NotImplementedError("only works on axes of even dimensions")

    if res_g is None:
        res_g = OCLArray.empty_like(arr_obj)


    # iterate over all axes
    # FIXME: this is still rather inefficient
    in_g = arr_obj
    for ax in axes:
        _fftshift_single(in_g, res_g, ax)
        in_g = res_g

    if return_buffer:
        return res_g
    else:
        return res_g.get()
Esempio n. 14
0
    def _filt(data_g, size=(3, 3, 3), cval = 0, res_g=None):
        if not data_g.dtype.type in cl_buffer_datatype_dict:
            raise ValueError("dtype %s not supported" % data_g.dtype.type)

        DTYPE = cl_buffer_datatype_dict[data_g.dtype.type]


        with open(abspath("kernels/median_filter.cl"), "r") as f:
            tpl = Template(f.read())

        rendered = tpl.render(DTYPE = DTYPE,FSIZE_X=size[2], FSIZE_Y=size[1], FSIZE_Z=size[0],CVAL = cval)

        prog = OCLProgram(src_str=rendered)

        tmp_g = OCLArray.empty_like(data_g)

        if res_g is None:
            res_g = OCLArray.empty_like(data_g)

        prog.run_kernel("median_3", data_g.shape[::-1], None, data_g.data, res_g.data)
        return res_g
Esempio n. 15
0
def transfer(data):
    """transfers data"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty_like(data)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image(im)

    return d2_g.get()
Esempio n. 16
0
def transfer(data):
    """transfers data"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty_like(data)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image(im)

    return d2_g.get()
Esempio n. 17
0
def get_gpu(N = 256, niter=100, sig = 1.):
    np.random.seed(0)
    a = np.random.normal(0,sig,(N,N)).astype(np.complex64)
    b = (1.*a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N,N), fast_math = False)
    
    rels = []
    for _ in range(niter):
        fft(b_g,res_g = c_g, plan = p)
        fft(c_g, res_g = b_g, inverse = True, plan = p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a)))

    return np.array(rels)
Esempio n. 18
0
def get_gpu(N=256, niter=100, sig=1.):
    np.random.seed(0)
    a = np.random.normal(0, sig, (N, N)).astype(np.complex64)
    b = (1. * a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N, N), fast_math=False)

    rels = []
    for _ in range(niter):
        fft(b_g, res_g=c_g, plan=p)
        fft(c_g, res_g=b_g, inverse=True, plan=p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a - b_g.get())) / np.amax(np.abs(a)))

    return np.array(rels)
Esempio n. 19
0
def bench(description,
          dshape,
          dtype,
          func_cpu,
          func_gpu,
          func_gpu_notransfer=None,
          niter=2):
    x = np.random.randint(0, 100, dshape).astype(dtype)

    func_cpu(x)
    t_cpu = time()
    for _ in range(niter):
        y = func_cpu(x)
    t_cpu = (time() - t_cpu) / niter

    func_gpu(x)
    t_gpu = time()
    for _ in range(niter):
        y = func_gpu(x)
    t_gpu = (time() - t_gpu) / niter

    if func_gpu_notransfer is not None:
        x_g = OCLArray.from_array(x)
        tmp_g = OCLArray.empty_like(x)
        func_gpu_notransfer(x_g, tmp_g)
        get_device().queue.finish()
        t_gpu_notransfer = time()
        for _ in range(niter):
            func_gpu_notransfer(x_g, tmp_g)
        get_device().queue.finish()
        t_gpu_notransfer = (time() - t_gpu_notransfer) / niter
    else:
        t_gpu_notransfer = None
    # print("%s\t\t %s\t%d ms \t %d ms"%(description,dshape, 1000*t1,1000*t2))

    print("%s| %s %s | %d ms | %d ms | %s" %
          (description, dshape, type_name_dict[dtype], 1000 * t_cpu,
           1000 * t_gpu, "%d ms" %
           (1000 * t_gpu_notransfer) if t_gpu_notransfer is not None else "-"))

    return t_cpu, t_gpu, t_gpu_notransfer
Esempio n. 20
0
def bilateral2(data, fSize, sigma_p, sigma_x=10.):
    """bilateral filter """

    dtype = data.dtype.type
    dtypes_kernels = {np.float32: "bilat2_float", np.uint16: "bilat2_short"}

    if not dtype in dtypes_kernels.keys():
        logger.info("data type %s not supported yet (%s), casting to float:" %
                    (dtype, dtypes_kernels.keys()))
        data = data.astype(np.float32)
        dtype = data.dtype.type

    img = OCLImage.from_array(data)
    res = OCLArray.empty_like(data)

    prog = OCLProgram(abspath("kernels/bilateral2.cl"))

    prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data,
                    np.int32(img.shape[0]), np.int32(img.shape[1]),
                    np.int32(fSize), np.float32(sigma_x), np.float32(sigma_p))

    return res.get()
Esempio n. 21
0
#     dtype = d_g.dtype.type
#
#     if not isinstance(d_g, OCLArray):
#         raise ValueError("only works on  OCLArrays")
#
#     if not dtype in dtype_kernel_name.keys():
#         raise NotImplementedError("only works for float32 or complex64")
#
#     if not np.all([n%2==0 for n in d_g.shape]):
#         raise NotImplementedError("only works on even length arryas")
#
#     prog = OCLProgram(abspath("kernels/fftshift.cl"))
#     prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None,
#                     d_g.data, d_g.data,
#                     np.int32(Nx), np.int32(Ny))

    # return d_g

if __name__ == '__main__':

    Nx, Ny, Nz = (256,)*3
    d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32)

    d[Nz/2-30:Nz/2+30,Ny/2-20:Ny/2+20,Nx/2-20:Nx/2+20] = 2.

    d_g = OCLArray.from_array(d)
    out_g = OCLArray.empty_like(d)


    out = fftshift(d, axes= (0,1,2))
Esempio n. 22
0
#     dtype = d_g.dtype.type
#
#     if not isinstance(d_g, OCLArray):
#         raise ValueError("only works on  OCLArrays")
#
#     if not dtype in dtype_kernel_name.keys():
#         raise NotImplementedError("only works for float32 or complex64")
#
#     if not np.all([n%2==0 for n in d_g.shape]):
#         raise NotImplementedError("only works on even length arryas")
#
#     prog = OCLProgram(abspath("kernels/fftshift.cl"))
#     prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None,
#                     d_g.data, d_g.data,
#                     np.int32(Nx), np.int32(Ny))

    # return d_g

if __name__ == '__main__':

    Nx, Ny, Nz = (256,)*3
    d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32)

    d[Nz//2-30:Nz//2+30,Ny//2-20:Ny//2+20,Nx//2-20:Nx//2+20] = 2.

    d_g = OCLArray.from_array(d)
    out_g = OCLArray.empty_like(d)


    out = fftshift(d, axes= (0,1,2))