Ejemplo n.º 1
0
def test_parseval():

    from time import time
    Nx = 512
    Nz = 10
    d = np.random.uniform(-1, 1, (Nx, Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [], []
    t = time()
    for i in range(Nz):
        # myfunc(d_g)

        # fft(d_g, inplace=True, fast_math=False)
        # fft(d_g, inverse = True,inplace=True,fast_math=False)

        fft(d_g, inplace=True)
        # fft(d_g, inverse = True,inplace=True)

    s1.append(np.sum(np.abs(d_g.get())**2))

    print(time() - t)

    for i in range(Nz):
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2
Ejemplo n.º 2
0
def test_parseval():

    from time import time
    Nx = 512
    Nz  = 10
    d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [],[]
    t = time()
    for i in range(Nz):
        print i
        # myfunc(d_g)

        # fft(d_g, inplace=True, fast_math=False)
        # fft(d_g, inverse = True,inplace=True,fast_math=False)

        fft(d_g, inplace=True)
        # fft(d_g, inverse = True,inplace=True)

    s1.append(np.sum(np.abs(d_g.get())**2))

    print time()-t

    for i in range(Nz):
        print i
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2
Ejemplo n.º 3
0
def time_gpu(dshape, niter=100, fast_math=False):
    d_g = OCLArray.empty(dshape, np.complex64)
    get_device().queue.finish()
    plan = fft_plan(dshape, fast_math=fast_math)
    t = time()
    for _ in xrange(niter):
        fft(d_g, inplace=True, plan=plan)
    get_device().queue.finish()
    t = (time()-t)/niter
    print "GPU (fast_math = %s)\t%s\t\t%.2f ms"%(fast_math, dshape, 1000.*t)
Ejemplo n.º 4
0
def time_gpu(dshape, niter=100, fast_math=False):
    d_g = OCLArray.empty(dshape, np.complex64)
    get_device().queue.finish()
    plan = fft_plan(dshape, fast_math=fast_math)
    t = time()
    for _ in range(niter):
        fft(d_g, inplace=True, plan=plan)
    get_device().queue.finish()
    t = (time() - t) / niter
    print("GPU (fast_math = %s)\t%s\t\t%.2f ms" % (fast_math, dshape, 1000. * t))
    return t
Ejemplo n.º 5
0
def _deconv_rl_np_fft(data, h, Niter = 10, 
                h_is_fftshifted = False):
    """ deconvolves data with given psf (kernel) h

    data and h have to be same shape

    
    via lucy richardson deconvolution
    """

    if data.shape != h.shape:
        raise ValueError("data and h have to be same shape")

    if not h_is_fftshifted:
        h = np.fft.fftshift(h)


    hflip = h[::-1,::-1]
        
    #set up some gpu buffers
    y_g = OCLArray.from_array(data.astype(np.complex64))
    u_g = OCLArray.from_array(data.astype(np.complex64))
    
    tmp_g = OCLArray.empty(data.shape,np.complex64)

    hf_g = OCLArray.from_array(h.astype(np.complex64))
    hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64))

    # hflipped_g = OCLArray.from_array(h.astype(np.complex64))
    
    plan = fft_plan(data.shape)

    #transform psf
    fft(hf_g,inplace = True)
    fft(hflip_f_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, hf_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)

        _complex_divide_inplace(y_g,tmp_g)

        fft_convolve(tmp_g,hflip_f_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)
        

    return np.abs(u_g.get())
Ejemplo n.º 6
0
def _deconv_rl_np_fft(data, h, Niter=10, h_is_fftshifted=False):
    """ deconvolves data with given psf (kernel) h

    data and h have to be same shape


    via lucy richardson deconvolution
    """

    if data.shape != h.shape:
        raise ValueError("data and h have to be same shape")

    if not h_is_fftshifted:
        h = np.fft.fftshift(h)

    hflip = h[::-1, ::-1]

    #set up some gpu buffers
    y_g = OCLArray.from_array(data.astype(np.complex64))
    u_g = OCLArray.from_array(data.astype(np.complex64))

    tmp_g = OCLArray.empty(data.shape, np.complex64)

    hf_g = OCLArray.from_array(h.astype(np.complex64))
    hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64))

    # hflipped_g = OCLArray.from_array(h.astype(np.complex64))

    plan = fft_plan(data.shape)

    #transform psf
    fft(hf_g, inplace=True)
    fft(hflip_f_g, inplace=True)

    for i in range(Niter):
        logger.info("Iteration: {}".format(i))
        fft_convolve(u_g, hf_g, res_g=tmp_g, kernel_is_fft=True)

        _complex_divide_inplace(y_g, tmp_g)

        fft_convolve(tmp_g, hflip_f_g, inplace=True, kernel_is_fft=True)

        _complex_multiply_inplace(u_g, tmp_g)

    return np.abs(u_g.get())
Ejemplo n.º 7
0
def _deconv_rl_gpu_fft(data_g, h_g, Niter = 10):
    """ 
    using fft_convolve

    """


    if data_g.shape != h_g.shape:
        raise ValueError("data and h have to be same shape")

        
    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape,np.complex64)

    u_g.copy_buffer(data_g)
    
    tmp_g = OCLArray.empty(data_g.shape,np.complex64)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy())

    plan = fft_plan(data_g.shape)

    #transform psf
    fft(h_g,inplace = True)
    fft(hflip_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, h_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)


        _complex_divide_inplace(data_g,tmp_g)

        
        fft_convolve(tmp_g,hflip_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)

    return u_g
Ejemplo n.º 8
0
def get_gpu(N=256, niter=100, sig=1.):
    np.random.seed(0)
    a = np.random.normal(0, sig, (N, N)).astype(np.complex64)
    b = (1. * a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N, N), fast_math=False)

    rels = []
    for _ in range(niter):
        fft(b_g, res_g=c_g, plan=p)
        fft(c_g, res_g=b_g, inverse=True, plan=p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a - b_g.get())) / np.amax(np.abs(a)))

    return np.array(rels)
Ejemplo n.º 9
0
def get_gpu(N = 256, niter=100, sig = 1.):
    np.random.seed(0)
    a = np.random.normal(0,sig,(N,N)).astype(np.complex64)
    b = (1.*a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N,N), fast_math = False)
    
    rels = []
    for _ in range(niter):
        fft(b_g,res_g = c_g, plan = p)
        fft(c_g, res_g = b_g, inverse = True, plan = p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a)))

    return np.array(rels)
Ejemplo n.º 10
0
def test_parseval():
    Nx = 512
    Nz = 100
    d = np.random.uniform(-1, 1, (Nx, Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [], []
    for i in range(Nz):
        print(i)
        fft(d_g, inplace=True, fast_math=False)
        fft(d_g, inverse=True, inplace=True, fast_math=False)
        s1.append(np.sum(np.abs(d_g.get())**2))

    for i in range(Nz):
        print(i)
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2
Ejemplo n.º 11
0
def test_parseval():
    Nx = 512
    Nz  = 100
    d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [],[]
    for i in range(Nz):
        print(i)
        fft(d_g, inplace=True, fast_math=False)
        fft(d_g, inverse = True,inplace=True,fast_math=False)
        s1.append(np.sum(np.abs(d_g.get())**2))

    for i in range(Nz):
        print(i)
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2
Ejemplo n.º 12
0
    def _propagate_to_img(self, u0=None, im=None, free_prop=False, **kwargs):
        """
        """

        free_prop = free_prop or (self.dn is None)

        res_type = Bpm3d._real_type

        if u0 is None:
            u0 = self.u0_plane()

        Nx, Ny, Nz = self.shape

        if im is None:
            im = self.result_im

        self._buf_plane.write_array(u0)

        # copy the first plane

        self._img_xy.copy_buffer(self._buf_plane)
        self._copy_down_img_to_img(self._img_xy, im, 0)

        for i in range(Nz - 1):
            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)
                self._mult_complex(self._buf_plane, self._buf_H)
                fft(self._buf_plane,
                    inplace=True,
                    inverse=True,
                    plan=self._plan)
                if not free_prop:
                    self._mult_dn(self._buf_plane,
                                  (i + (j + 1.) / self.simul_z), self.n0)

            self._img_xy.copy_buffer(self._buf_plane)
            self._copy_down_img_to_img(self._img_xy, im, i + 1)

        return im
Ejemplo n.º 13
0
    def apply(self, data):
        dshape = data.shape

        res = gputools.pad_to_power2(data.astype(np.complex64), mode="wrap")

        res = 1. / np.sqrt(res.size) * np.fft.fftshift(abs(gputools.fft(res)))

        res = gputools.pad_to_shape(res, dshape)

        if self.log:
            return np.log2(0.001 + res)
        else:
            return res
Ejemplo n.º 14
0
    def apply(self,data):
        dshape = data.shape
        
        res = gputools.pad_to_power2(data.astype(np.complex64), mode = "wrap")

        res = 1./np.sqrt(res.size)*np.fft.fftshift(abs(gputools.fft(res)))
        
        res = gputools.pad_to_shape(res,dshape)
        
        if self.log:
            return np.log2(0.001+res)
        else:
            return res
Ejemplo n.º 15
0
    def __init__(self,
                 psf: np.ndarray,
                 psf_is_fftshifted: bool = False,
                 n_iter=10):
        """ setup deconvolution for a given shape """
        self.shape = psf.shape
        if not psf_is_fftshifted:
            psf = np.fft.fftshift(psf)

        self.n_iter = n_iter
        # What happens here? Indices are being flipped ? Why. What if it is 3D?
        psfflip = psf[::-1, ::-1]

        self.psf_g = OCLArray.from_array(psf.astype(np.complex64))
        self.psfflip_f_g = OCLArray.from_array(psfflip.astype(np.complex64))
        self.plan = fft_plan(self.shape)

        # transform psf
        fft(self.psf_g, inplace=True)
        fft(self.psfflip_f_g, inplace=True)

        # get temp
        self.tmp_g = OCLArray.empty(psf.shape, np.complex64)
Ejemplo n.º 16
0
def _deconv_rl_gpu_fft(data_g, h_g, Niter=10):
    """
    using fft_convolve

    """

    if data_g.shape != h_g.shape:
        raise ValueError("data and h have to be same shape")

    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape, np.complex64)

    u_g.copy_buffer(data_g)

    tmp_g = OCLArray.empty(data_g.shape, np.complex64)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1, ::-1]).copy())

    plan = fft_plan(data_g.shape)

    #transform psf
    fft(h_g, inplace=True)
    fft(hflip_g, inplace=True)

    for i in range(Niter):
        logger.info("Iteration: {}".format(i))
        fft_convolve(u_g, h_g, res_g=tmp_g, kernel_is_fft=True)

        _complex_divide_inplace(data_g, tmp_g)

        fft_convolve(tmp_g, hflip_g, inplace=True, kernel_is_fft=True)

        _complex_multiply_inplace(u_g, tmp_g)

    return u_g
Ejemplo n.º 17
0
    def _propagate_to_img(self, u0=None, im=None, free_prop=False, **kwargs):
        """
        """

        free_prop = free_prop or (self.dn is None)

        res_type = Bpm3d._real_type

        if u0 is None:
            u0 = self.u0_plane()

        Nx, Ny, Nz = self.shape

        if im is None:
            im = self.result_im

        self._buf_plane.write_array(u0)

        # copy the first plane

        self._img_xy.copy_buffer(self._buf_plane)
        self._copy_down_img_to_img(self._img_xy, im, 0)

        for i in range(Nz-1):
            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)
                self._mult_complex(self._buf_plane, self._buf_H)
                fft(self._buf_plane, inplace=True, inverse=True, plan=self._plan)
                if not free_prop:
                    self._mult_dn(self._buf_plane, (i+(j+1.)/self.simul_z),self.n0)

            self._img_xy.copy_buffer(self._buf_plane)
            self._copy_down_img_to_img(self._img_xy, im, i+1)

        return im
Ejemplo n.º 18
0
    def convolve(self, convolved_glyph_image):
        # Convolved glyph image is a complex64 image of shape <s, o, h, w>. We will return a distance and a fullness image for each.
        abs_input = np.abs(convolved_glyph_image)

        padded_input = np.pad(
            abs_input, [[0, 0], [0, 0], [
                int(np.ceil(self.hh)), int(self.hh)
            ], [int(np.ceil(self.ww)), int(self.ww)]])
        padded_input = np.tile(padded_input,
                               [1, 2, 1, 1])  # double up the orientations
        # This runs out of memory!
        # We have 26 glyphs, and for each glyph we need n_scales * 2 * n_orientations * 3 * h * 3 * w = 14 * 8 * 9 * 90 * 150 = 108 megabytes. Should be no problem ...
        #print("Convolving glyph with the radial filters ...", padded_input.shape)
        #input_fft = np.fft.fft2(np.fft.ifftshift(padded_input, [2, 3]))
        #print("Input ffted.", input_fft.shape, self.filter1_fft.shape)
        #convolved_fft1 = input_fft * self.filter1_fft
        #convolved_fft2 = input_fft * self.filter2_fft
        #print("Input multiplied.")
        #filtered1 = np.fft.fftshift(np.fft.ifft2(convolved_fft1), [2, 3])
        #print("filtered1.")
        #filtered2 = np.fft.fftshift(np.fft.ifft2(convolved_fft2), [2, 3])
        #print("Convolution done for this glyph.")

        if True:
            padded_input1_ocl = gputools.OCLArray.from_array(
                np.fft.ifftshift(padded_input, (-2, -1)).astype(np.complex64))
            padded_input2_ocl = gputools.OCLArray.from_array(
                np.fft.ifftshift(padded_input, (-2, -1)).astype(np.complex64))

            #print("PADDED INPUT COL SIZE", padded_input_ocl.get().shape)

            # TODO MAKE GPU PLAN FIRST, and reuse it
            # TODO Make the broadcasting more efficient
            # TODO Reuse the filter bank across multiple letters.
            gputools.fft(padded_input1_ocl, axes=(-2, -1), inplace=True)
            gputools.fft(padded_input2_ocl, axes=(-2, -1), inplace=True)

            filter1_fft_ocl = gputools.OCLArray.from_array(self.filter1_fft)
            filter2_fft_ocl = gputools.OCLArray.from_array(self.filter2_fft)

            #print("FILTER BANK ", filter_bank_ocl_fft.get().shape)

            padded_input1_ocl *= filter1_fft_ocl
            padded_input2_ocl *= filter2_fft_ocl  # in place

            gputools.fft(padded_input1_ocl,
                         axes=(-2, -1),
                         inplace=True,
                         inverse=True)
            gputools.fft(padded_input2_ocl,
                         axes=(-2, -1),
                         inplace=True,
                         inverse=True)

            filtered1 = np.fft.fftshift(padded_input1_ocl.get())
            filtered2 = np.fft.fftshift(padded_input2_ocl.get())

        return (
            filtered1[:, :,
                      int(np.ceil(self.hh)):int(self.box_height +
                                                np.ceil(self.hh)),
                      int(np.ceil(self.ww)):int(self.box_width +
                                                np.ceil(self.ww))],
            filtered2[:, :,
                      int(np.ceil(self.hh)):int(self.box_height +
                                                np.ceil(self.hh)),
                      int(np.ceil(self.ww)):int(self.box_width +
                                                np.ceil(self.ww))],
        )
Ejemplo n.º 19
0
    def _propagate_core(self,
                        u0=None,
                        dn_ind_start=0,
                        dn_ind_end=1,
                        dn_ind_offset=0,
                        return_comp="field",
                        return_shape="full",
                        free_prop=False,
                        dn_mean_method="none",
                        **kwargs):
        """
        the core propagation method, the refractive index dn is
        assumed to be already residing in gpu memory
        if u0 is None, assumes that the initial field
        to be residing in self._buf_plane

        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
            dn_mean_method = "none", "global", "local"
        """

        print("mean method: ", dn_mean_method)

        free_prop = free_prop or (self.dn is None)

        if return_comp=="field":
            res_type = Bpm3d._complex_type
        elif return_comp=="intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        Nx, Ny, _ = self.shape
        Nz = dn_ind_end-dn_ind_start

        assert dn_ind_start>=0

        # if not u0 is None:
        #     print "huhu"
        #     self._buf_plane.write_array(u0.astype(np.complex64,copy=False))



        if return_shape=="full":
            u = OCLArray.empty((Nz, Ny, Nx), dtype=res_type)

        # copy the first plane
        if return_shape=="full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        if dn_mean_method=="local" and not self.dn is None and not free_prop:
            self.intens_sum_g = OCLArray.from_array(np.ones(1,dtype=Bpm3d._real_type))
            self.intens_dn_sum_g = OCLArray.from_array((self.dn_mean[dn_ind_start+dn_ind_offset]*
                                                       np.ones(1)).astype(dtype=Bpm3d._real_type))
            #self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g)



        self._fill_propagator(self.n0)

        for i in range(Nz-1):

            for j in range(self.simul_z):


                fft(self._buf_plane, inplace=True, plan=self._plan)

                self._mult_complex(self._buf_plane, self._buf_H)



                fft(self._buf_plane, inplace=True, inverse=True, plan=self._plan)


                if not free_prop:
                    #FIXME here we make  a slight error for the first time point, as we
                    #FIXME set dn0 first and the compute the new propagator
                    if dn_mean_method=="local":
                        self._mult_dn_local(self._buf_plane, (i+dn_ind_start+(j+1.)/self.simul_z),
                                            self.intens_sum_g,
                                            self.intens_dn_sum_g,
                                            self.intens_g,
                                            self.intens_dn_g)


                    else:
                        self._mult_dn(self._buf_plane, (i+dn_ind_start+(j+1.)/self.simul_z), dn0)



            if not self.dn is None and not free_prop:
                if dn_mean_method=="local":
                    self._kernel_reduction(self.intens_g, self.intens_dn_g,
                                           outs=[self.intens_sum_g, self.intens_dn_sum_g])



                    self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g)


                    #print(self.intens_dn_sum_g.get(), self.n0)
                    #print("mean dn: ",self.intens_dn_sum_g.get()/self.intens_sum_g.get())

                elif dn_mean_method=="global":
                    if self.dn_mean[i+dn_ind_start+dn_ind_offset]!=dn0:
                        dn0 = self.dn_mean[i+dn_ind_start+dn_ind_offset]
                        self._fill_propagator(self.n0+dn0)

            if return_shape=="full":
                if self._is_subsampled and self.simul_xy!=self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i+1)*(Nx*Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u, (i+1)*(Nx*Ny))

        if return_shape=="full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 20
0
def convolve_spatial3(im, hs,
                      mode = "constant",
                      plan = None,
                      return_plan = False,
                      pad_factor = 2):
    """
    spatial varying convolution of an 3d image with a 3d grid of psfs

    shape(im_ = (Nz,Ny,Nx)
    shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx)

    the input image im is subdivided into (Gx,Gy,Gz) blocks
    hs[k,j,i] is the psf at the center of each block (i,j,k)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0
    Nz % Gz == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition


    """
    if im.ndim !=3 or hs.ndim !=6:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:3])]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s !"%(str(hs.shape[:3])))


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = tuple(im.shape)
    Gs = tuple(hs.shape[:3])


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    print hs.shape
    hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))



    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]




    print Nblocks
    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))



    print patches_g.shape, h_g.shape




    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
    prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None,
                    patches_g.data, h_g.data)

    fft(patches_g,
        inplace=True,
        inverse = True,
        batch = np.prod(Gs),
        plan = plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape,np.float32)

    for k, j, i in product(*[range(g+1) for g in Gs]):
        prog.run_kernel("interpolate3",Nblocks[::-1],None,
                        patches_g.data,
                        res_g.data,
                        np.int32(i),np.int32(j),np.int32(k),
                        np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]),
                        np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0]))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 21
0
def fft_gpu(d_g):
    return gputools.fft(d_g, inplace = True)
Ejemplo n.º 22
0
    def _propagate(self, u0=None, offset=0,
                   return_comp="field",
                   return_shape="full",
                   free_prop=False,
                   slow_mean=False,
                   **kwargs):
        """
        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
        """

        free_prop = free_prop or (self.dn is None)

        if return_comp=="field":
            res_type = Bpm3d._complex_type
        elif return_comp=="intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        if u0 is None:
            u0 = self.u0_plane()

        u0 = u0.astype(np.complex64, copy=False)

        Nx, Ny, Nz = self.shape

        assert offset>=0 and offset<(Nz-1)

        if return_shape=="full":
            u = OCLArray.empty((Nz-offset, Ny, Nx), dtype=res_type)

        self._buf_plane.write_array(u0)

        # copy the first plane
        if return_shape=="full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        for i in range(Nz-1-offset):
            if not self.dn is None and not free_prop:
                if slow_mean:
                    if return_shape=="full":
                        raise NotImplementedError()
                    else:
                        tmp = OCLArray.empty((1, Ny, Nx), dtype=res_type)
                        if self._is_subsampled:
                            self._img_xy.copy_buffer(self._buf_plane)
                            self._copy_down_img(self._img_xy, tmp, 0)
                        else:
                            self._copy_down_buf(self._buf_plane, tmp, 0)

                        dn0 = np.sum(np.abs(self.dn[i])*tmp.get())/np.sum(np.abs(self.dn[i])+1.e-10)

                        self._fill_propagator(self.n0+dn0)
                else:
                    if self.dn_mean[i+offset]!=dn0:
                        dn0 = self.dn_mean[i+offset]
                        self._fill_propagator(self.n0+dn0)

            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)
                self._mult_complex(self._buf_plane, self._buf_H)
                fft(self._buf_plane, inplace=True, inverse=True, plan=self._plan)
                if not free_prop:
                    self._mult_dn(self._buf_plane, (i+offset+(j+1.)/self.simul_z), dn0)

            if return_shape=="full":
                if self._is_subsampled and self.simul_xy!=self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i+1)*(Nx*Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u, (i+1)*(Nx*Ny))

        if return_shape=="full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 23
0
def _single_batched(d, axes):
    res1 = np.fft.fftn(d, axes=axes)
    res2 = fft(d, axes=axes)
    return res1, res2
Ejemplo n.º 24
0
#init_device(id_platform = 0, id_device = 1)

def report_str(success):
    return colored("\t[OK]", "blue") if success else colored("\t[FAIL]", "red")

def _compare_fft_np(d):
    res1 = np.fft.fftn(d)
    res2 = fft(d, fast_math=True)
    return res1, res2

def test_compare():
    for ndim in [1, 2, 3]:
        for dshape in product([32, 64, 128], repeat=ndim):
            d = np.random.uniform(-1, 1, dshape).astype(np.complex64)
            res1, res2 = _compare_fft_np(d)
            print("validating fft of size", d.shape)
            npt.assert_allclose(res1, res2, rtol=1.e-0, atol=1.e-1)





if __name__ == '__main__':
    # test_compare()
    #
    dshape = (128, 128)
    np.random.seed(0)
    d = np.random.uniform(-1, 1, dshape).astype(np.complex64)
    res1 = np.fft.fftn(d)
    res2 = fft(d)
Ejemplo n.º 25
0
def _convolve_spatial2(im,
                       hs,
                       mode="constant",
                       grid_dim=None,
                       pad_factor=2,
                       plan=None,
                       return_plan=False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gx) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisible by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0


    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if grid_dim:
        Gs = tuple(grid_dim)
    else:
        Gs = hs.shape[:2]

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = Gs

    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny // Gy, Nx // Gx

    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(
        pad_factor * Nblock_x), _next_power_of_2(pad_factor * Nblock_y)

    prog = OCLProgram(abspath("kernels/conv_spatial2.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan((Gy, Gx, Npatch_y, Npatch_x), axes=(-2, -1))

    x0s = Nblock_x * np.arange(Gx)
    y0s = Nblock_y * np.arange(Gy)

    patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros((Gy, Gx, Npatch_y, Npatch_x), np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy=False))
        for i, _x0 in enumerate(x0s):
            for j, _y0 in enumerate(y0s):
                prog.run_kernel(
                    "fill_psf_grid2", (Nblock_x, Nblock_y), None, tmp_g.data,
                    np.int32(Nx),
                    np.int32(i * Nblock_x), np.int32(j * Nblock_y), h_g.data,
                    np.int32(Npatch_x), np.int32(Npatch_y),
                    np.int32(-Nblock_x // 2 + Npatch_x // 2),
                    np.int32(-Nblock_y // 2 + Npatch_y // 2),
                    np.int32(i * Npatch_x * Npatch_y +
                             j * Gx * Npatch_x * Npatch_y))
    else:
        hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)),
                             axes=(2, 3))
        h_g = OCLArray.from_array(hs.astype(np.complex64))

    #prepare image
    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    for i, _x0 in enumerate(x0s):
        for j, _y0 in enumerate(y0s):
            prog.run_kernel(
                "fill_patch2", (Npatch_x, Npatch_y), None, im_g,
                np.int32(_x0 + Nblock_x // 2 - Npatch_x // 2),
                np.int32(_y0 + Nblock_y // 2 - Npatch_y // 2), patches_g.data,
                np.int32(i * Npatch_x * Npatch_y +
                         j * Gx * Npatch_x * Npatch_y))

    #return np.abs(patches_g.get())
    # convolution
    fft(patches_g, inplace=True, plan=plan)
    fft(h_g, inplace=True, plan=plan)
    prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None,
                    patches_g.data, h_g.data)
    fft(patches_g, inplace=True, inverse=True, plan=plan)

    logger.debug("Nblock_x: {}, Npatch_x: {}".format(Nblock_x, Npatch_x))
    #return np.abs(patches_g.get())
    #accumulate
    res_g = OCLArray.empty(im.shape, np.float32)

    for j in range(Gy + 1):
        for i in range(Gx + 1):
            prog.run_kernel("interpolate2", (Nblock_x, Nblock_y),
                            None, patches_g.data, res_g.data, np.int32(i),
                            np.int32(j), np.int32(Gx), np.int32(Gy),
                            np.int32(Npatch_x), np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 26
0
def convolve_spatial2(im, hs,
                      mode = "constant",
                      plan = None,
                      return_plan = False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gz) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if im.ndim !=2 or hs.ndim !=4:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:2])]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s shape mismatch"%(str(hs.shape[:2])))


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = hs.shape[:2]


    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny/Gy, Nx/Gx


    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(3*Nblock_x), _next_power_of_2(3*Nblock_y)
    #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y)

    print Nblock_x, Npatch_x

    hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3))


    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y,Npatch_x))


    patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    x0s = Nblock_x*np.arange(Gx)
    y0s = Nblock_y*np.arange(Gy)

    print x0s

    for i,_x0 in enumerate(x0s):
        for j,_y0 in enumerate(y0s):
            prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None,
                    im_g,
                    np.int32(_x0+Nblock_x/2-Npatch_x/2),
                    np.int32(_y0+Nblock_y/2-Npatch_y/2),
                    patches_g.data,
                    np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y))

    # convolution
    fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan)
    fft(h_g,inplace=True, batch = Gx*Gy, plan = plan)
    prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None,
                    patches_g.data, h_g.data)

    fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan)

    #return patches_g.get()

    #accumulate
    res_g = OCLArray.empty(im.shape,np.float32)

    for i in xrange(Gx+1):
        for j in xrange(Gy+1):
            prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None,
                            patches_g.data,res_g.data,
                            np.int32(i),np.int32(j),
                            np.int32(Gx),np.int32(Gy),
                            np.int32(Npatch_x),np.int32(Npatch_y))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 27
0
def _bpm_3d_image(size,
            units,
            lam = .5,
            u0 = None, dn = None,
            subsample = 1,
            n0 = 1.,
            return_scattering = False,
            return_g = False,
            return_full_last = False,
            use_fresnel_approx = False,
            ):
    """
    simulates the propagation of monochromativ wave of wavelength lam with initial conditions u0 along z in a media filled with dn

    size     -    the dimension of the image to be calulcated  in pixels (Nx,Ny,Nz)
    units    -    the unit lengths of each dimensions in microns
    lam      -    the wavelength
    u0       -    the initial field distribution, if u0 = None an incident  plane wave is assumed
    dn       -    the refractive index of the medium (can be complex)

    """
    clock = StopWatch()

    clock.tic("setup")

    Nx, Ny, Nz = size
    dx, dy, dz = units

    # subsampling
    Nx2, Ny2, Nz2 = (subsample*N for N in size)
    dx2, dy2, dz2 = (1.*d/subsample for d in units)

    #setting up the propagator
    k0 = 2.*np.pi/lam

    kxs = 2.*np.pi*np.fft.fftfreq(Nx2,dx2)
    kys = 2.*np.pi*np.fft.fftfreq(Ny2,dy2)

    KY, KX = np.meshgrid(kys,kxs, indexing= "ij")

    #H0 = np.sqrt(0.j+n0**2*k0**2-KX**2-KY**2)
    H0 = np.sqrt(n0**2*k0**2-KX**2-KY**2)

    if use_fresnel_approx:
        H0  = 0.j+n0**2*k0-.5*(KX**2+KY**2)


    outsideInds = np.isnan(H0)

    H = np.exp(-1.j*dz2*H0)

    H[outsideInds] = 0.
    H0[outsideInds] = 0.

    if u0 is None:
        u0 = np.ones((Ny2,Nx2),np.complex64)
    else:
        if subsample >1:
            u0 = zoom(np.real(u0),subsample) + 1.j*zoom(np.imag(u0),subsample)

    # setting up the gpu buffers and kernels

    program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

    plan = fft_plan((Ny2,Nx2))
    plane_g = OCLArray.from_array(u0.astype(np.complex64))

    h_g = OCLArray.from_array(H.astype(np.complex64))

    if dn is not None:
        if isinstance(dn,OCLImage):
            dn_g = dn
        else:
            if dn.dtype.type in (np.complex64,np.complex128):

                dn_complex = np.zeros(dn.shape+(2,),np.float32)
                dn_complex[...,0] = np.real(dn)
                dn_complex[...,1] = np.imag(dn)
                dn_g = OCLImage.from_array(dn_complex)

            else:
                dn_g = OCLImage.from_array(dn.astype(np.float32))

        isComplexDn = dn.dtype.type in (np.complex64,np.complex128)

    else:
        #dummy dn
        dn_g = OCLArray.empty((1,)*3,np.float16)


    if return_scattering:
        cos_theta = np.real(H0)/n0/k0

        # = cos(theta)
        scatter_weights = cos_theta

        scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32))

        # = cos(theta)^2
        gfactor_weights = cos_theta**2

        gfactor_weights_g = OCLArray.from_array(gfactor_weights.astype(np.float32))


        #return None,None,scatter_weights, gfactor_weights

        scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        gfactor_g = OCLArray.zeros(Nz,"float32")

        plain_wave_dct = Nx2*Ny2*np.exp(-1.j*k0*n0*np.arange(Nz)*dz).astype(np.complex64)


        reduce_kernel = OCLReductionKernel(
        np.float32, neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain")

        # reduce_kernel = OCLReductionKernel(
        # np.float32, neutral="0",
        #     reduce_expr="a+b",
        #     map_expr = "weights[i]*(i!=0)*cfloat_abs(field[i])*cfloat_abs(field[i])",
        #     arguments = "__global cfloat_t *field, __global float * weights,cfloat_t plain")


    u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64)

    program.run_kernel("copy_subsampled_buffer",(Nx,Ny),None,
                           u_g.data,plane_g.data,
                           np.int32(subsample),
                           np.int32(0))


    clock.toc("setup")

    clock.tic("run")

    for i in range(Nz-1):
        for substep in range(subsample):
            fft(plane_g,inplace = True, plan  = plan)

            program.run_kernel("mult",(Nx2*Ny2,),None,
                               plane_g.data,h_g.data)

            if return_scattering and substep == (subsample-1):
                scatter_cross_sec_g[i+1] = reduce_kernel(plane_g,
                                                     scatter_weights_g,
                                                     plain_wave_dct[i+1])
                gfactor_g[i+1] = reduce_kernel(plane_g,
                                                     gfactor_weights_g,
                                                     plain_wave_dct[i+1])

            fft(plane_g,inplace = True, inverse = True,  plan  = plan)

            if dn is not None:
                if isComplexDn:

                    program.run_kernel("mult_dn_complex_image",(Nx2,Ny2),None,
                                   plane_g.data,dn_g,
                                   np.float32(k0*dz2),
                                   np.float32(n0),
                                   np.int32(subsample*(i+1.)+substep),
                                   np.int32(subsample))
                else:
                    program.run_kernel("mult_dn_image",(Nx2,Ny2),None,
                                   plane_g.data,dn_g,
                                   np.float32(k0*dz2),
                                   np.float32(n0),
                                   np.int32(subsample*(i+1.)+substep),
                                   np.int32(subsample))


        program.run_kernel("copy_subsampled_buffer",(Nx,Ny),None,
                           u_g.data,plane_g.data,
                           np.int32(subsample),
                           np.int32((i+1)*Nx*Ny))


    clock.toc("run")

    print clock
    result = (u_g.get(), dn_g.get(),)

    if return_scattering:
        # normalizing prefactor dkx = dx2/Nx2
        # prefac = 1./Nx2/Ny2*dx2*dy2/4./np.pi/n0
        prefac = 1./Nx2/Ny2*dx2*dy2
        p = prefac*scatter_cross_sec_g.get()
        result += (p,)

    if return_g:
        prefac = 1./Nx2/Ny2*dx2*dy2
        g = prefac*gfactor_g.get()/p
        result += (g,)

    if return_full_last:
        result += (plane_g.get(),)

    return result
Ejemplo n.º 28
0
def _bpm_3d2(size,
            units,
            lam = .5,
            u0 = None,
            dn = None,
            subsample = 1,
            n0 = 1.,
            return_scattering = False,
            return_g = False,
            return_full = True,
            return_field = True,
            use_fresnel_approx = False,
            absorbing_width = 0,
            scattering_plane_ind = 0,
            return_last_plane = False,
            store_dn_as_half = False):
    """
    simulates the propagation of monochromatic wave of wavelength lam with initial conditions u0 along z in a media filled with dn

    size     -    the dimension of the image to be calulcated  in pixels (Nx,Ny,Nz)
    units    -    the unit lengths of each dimensions in microns
    lam      -    the wavelength
    u0       -    the initial field distribution, if u0 = None an incident  plane wave is assumed
    dn       -    the refractive index of the medium (can be complex)

    """


    if subsample != 1:
        raise NotImplementedError("subsample still has to be 1")

    clock = StopWatch()

    clock.tic("setup")

    Nx, Ny, Nz = size
    dx, dy, dz = units


    #setting up the propagator
    k0 = 2.*np.pi/lam

    kxs = 2.*np.pi*np.fft.fftfreq(Nx,dx)
    kys = 2.*np.pi*np.fft.fftfreq(Ny,dy)

    KY, KX = np.meshgrid(kys,kxs, indexing= "ij")

    #H0 = np.sqrt(0.j+n0**2*k0**2-KX**2-KY**2)
    H0 = np.sqrt(n0**2*k0**2-KX**2-KY**2)

    if use_fresnel_approx:
        H0  = 0.j+n0*k0-.5*(KX**2+KY**2)/n0/k0


    outsideInds = np.isnan(H0)

    H = np.exp(-1.j*dz*H0)

    H[outsideInds] = 0.
    H0[outsideInds] = 0.

    if u0 is None:
        u0 = np.ones((Ny,Nx),np.complex64)

    # setting up the gpu buffers and kernels

    program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

    plan = fft_plan((Ny,Nx))
    plane_g = OCLArray.from_array(u0.astype(np.complex64, copy = False))
    h_g = OCLArray.from_array(H.astype(np.complex64))

    if dn is not None:
        if isinstance(dn,OCLArray):
            dn_g = dn
        else:
            if dn.dtype.type in (np.complex64,np.complex128):
                isComplexDn = True
                dn_g = OCLArray.from_array(dn.astype(np.complex64,copy= False))

            else:
                isComplexDn = False
                if store_dn_as_half:
                    dn_g = OCLArray.from_array(dn.astype(np.float16,copy= False))
                else:
                    dn_g = OCLArray.from_array(dn.astype(np.float32,copy= False))

    else:
        #dummy dn
        dn_g = OCLArray.empty((1,)*3,np.float32)


    if return_scattering:
        cos_theta = np.real(H0)/n0/k0

        # _H = np.sqrt(n0**2*k0**2-KX**2-KY**2)
        # _H[np.isnan(_H)] = 0.
        #
        # cos_theta = _H/n0/k0
        # # = cos(theta)
        scatter_weights = cos_theta

        #scatter_weights = np.sqrt(KX**2+KY**2)/k0/np.real(H0)
        #scatter_weights[outsideInds] = 0.

        scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32))

        # = cos(theta)^2
        gfactor_weights = cos_theta**2

        gfactor_weights_g = OCLArray.from_array(gfactor_weights.astype(np.float32))


        #return None,None,scatter_weights, gfactor_weights

        scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        gfactor_g = OCLArray.zeros(Nz,"float32")

        plain_wave_dct = Nx*Ny*np.exp(-1.j*k0*n0*(scattering_plane_ind+np.arange(Nz))*dz).astype(np.complex64)


        reduce_kernel = OCLReductionKernel(
        np.float32, neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain")

        # reduce_kernel = OCLReductionKernel(
        # np.float32, neutral="0",
        #     reduce_expr="a+b",
        #     map_expr = "weights[i]*(i!=0)*cfloat_abs(field[i])*cfloat_abs(field[i])",
        #     arguments = "__global cfloat_t *field, __global float * weights,cfloat_t plain")

    if return_full:
        if return_field:
            u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64)
            u_g[0] = plane_g
        else:
            u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32)
            program.run_kernel("copy_intens",(Nx*Ny,),None,
                           plane_g.data,u_g.data, np.int32(0))


    clock.toc("setup")

    clock.tic("run")


    for i in range(Nz-1):
        fft(plane_g,inplace = True, plan  = plan)

        program.run_kernel("mult",(Nx*Ny,),None,
                           plane_g.data,h_g.data)


        #a =  dn_g.sum()
        if return_scattering:
            scatter_cross_sec_g[i+1] = reduce_kernel(plane_g,
                                                     scatter_weights_g,
                                                     plain_wave_dct[i+1])
            gfactor_g[i+1] = reduce_kernel(plane_g,
                                                     gfactor_weights_g,
                                                     plain_wave_dct[i+1])

        fft(plane_g,inplace = True, inverse = True,  plan  = plan)

        if dn is not None:
            if isComplexDn:

                kernel_str = "mult_dn_complex"
            else:
                if dn_g.dtype.type == np.float16:
                    kernel_str = "mult_dn_half"
                else:
                    kernel_str = "mult_dn"


            program.run_kernel(kernel_str,(Nx,Ny,),None,
                                   plane_g.data,dn_g.data,
                                   np.float32(k0*dz),
                                   np.int32(Nx*Ny*(i+1)),
                               np.int32(absorbing_width))




        if return_full:
            if return_field:
                u_g[i+1] = plane_g
            else:
                program.run_kernel("copy_intens",(Nx*Ny,),None,
                           plane_g.data,u_g.data, np.int32(Nx*Ny*(i+1)))

    clock.toc("run")

    print clock

    if return_full:
        u = u_g.get()
    else:
        u = plane_g.get()
        if not return_field:
            u = np.abs(u)**2

    if return_scattering:
        # normalizing prefactor dkx = dx/Nx
        # prefac = 1./Nx/Ny*dx*dy/4./np.pi/n0
        prefac = 1./Nx/Ny*dx*dy
        p = prefac*scatter_cross_sec_g.get()


    if return_g:
        prefac = 1./Nx/Ny*dx*dy
        g = prefac*gfactor_g.get()/p



    if return_scattering:
        if return_g:
            result = u,  p, g
        else:
            result =  u,  p
    else:
        result = u

    if return_last_plane:
        if isinstance(result,tuple):
            result = result + (plane_g.get(),)
        else:
            result = (result, plane_g.get())


    return result
Ejemplo n.º 29
0
    def _propagate_single(self, u0 = None,
                          return_full = True,
                          return_intensity = False,
                          absorbing_width = 0, **kwargs):
        """
        :param u0: initial complex field distribution, if None, plane wave is assumed
        :param kwargs:
        :return:
        """


        #plane wave if none
        if u0 is None:
            u0 = np.ones(self.size2d[::-1],np.complex64)


        Nx,Ny,Nz = self.size
        dx, dy, dz = self.units

        plane_g = OCLArray.from_array(u0.astype(np.complex64,copy = False))


        if return_full:
            if return_intensity:
                u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32)
                self.bpm_program.run_kernel("fill_with_energy",(Nx*Ny,),None,
                                   u_g.data,plane_g.data,np.int32(0))

            else:
                u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64)
                u_g[0] = plane_g



        for i in range(Nz-1):
            fft(plane_g,inplace = True, plan  = self._plan)

            self.bpm_program.run_kernel("mult",(Nx*Ny,),None,
                               plane_g.data,self._H_g.data)

            fft(plane_g,inplace = True, inverse = True,  plan  = self._plan)

            if self.dn is not None:
                if self._is_complex_dn:
                    kernel_str = "mult_dn_complex"
                else:
                    kernel_str = "mult_dn"


                self.bpm_program.run_kernel(kernel_str,(Nx,Ny,),None,
                                   plane_g.data,self.dn_g.data,
                                   np.float32(self.k0*dz),
                                   np.int32(Nx*Ny*(i+1)),
                               np.int32(absorbing_width))
            if return_full:
                if return_intensity:
                    self.bpm_program.run_kernel("fill_with_energy",(Nx*Ny,),None,
                                   u_g.data,plane_g.data,np.int32((i+1)*Nx*Ny))

                else:
                    u_g[i+1] = plane_g

        if return_full:
            u = u_g.get()
        else:
            u = plane_g.get()


        return u
Ejemplo n.º 30
0
    def _propagate_core(self,
                        u0=None,
                        dn_ind_start=0,
                        dn_ind_end=1,
                        dn_ind_offset=0,
                        return_comp="field",
                        return_shape="full",
                        free_prop=False,
                        dn_mean_method="none",
                        **kwargs):
        """
        the core propagation method, the refractive index dn is
        assumed to be already residing in gpu memory
        if u0 is None, assumes that the initial field
        to be residing in self._buf_plane

        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
            dn_mean_method = "none", "global", "local"
        """

        print("mean method: ", dn_mean_method)

        free_prop = free_prop or (self.dn is None)

        if return_comp == "field":
            res_type = Bpm3d._complex_type
        elif return_comp == "intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        Nx, Ny, _ = self.shape
        Nz = dn_ind_end - dn_ind_start

        assert dn_ind_start >= 0

        # if not u0 is None:
        #     print "huhu"
        #     self._buf_plane.write_array(u0.astype(np.complex64,copy=False))

        if return_shape == "full":
            u = OCLArray.empty((Nz, Ny, Nx), dtype=res_type)

        # copy the first plane
        if return_shape == "full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        if dn_mean_method == "local" and not self.dn is None and not free_prop:
            self.intens_sum_g = OCLArray.from_array(
                np.ones(1, dtype=Bpm3d._real_type))
            self.intens_dn_sum_g = OCLArray.from_array(
                (self.dn_mean[dn_ind_start + dn_ind_offset] *
                 np.ones(1)).astype(dtype=Bpm3d._real_type))
            #self._fill_propagator_buf(self.n0, self.intens_dn_sum_g, self.intens_sum_g)

        self._fill_propagator(self.n0)

        for i in range(Nz - 1):

            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)

                self._mult_complex(self._buf_plane, self._buf_H)

                fft(self._buf_plane,
                    inplace=True,
                    inverse=True,
                    plan=self._plan)

                if not free_prop:
                    #FIXME here we make  a slight error for the first time point, as we
                    #FIXME set dn0 first and the compute the new propagator
                    if dn_mean_method == "local":
                        self._mult_dn_local(
                            self._buf_plane,
                            (i + dn_ind_start + (j + 1.) / self.simul_z),
                            self.intens_sum_g, self.intens_dn_sum_g,
                            self.intens_g, self.intens_dn_g)

                    else:
                        self._mult_dn(self._buf_plane,
                                      (i + dn_ind_start +
                                       (j + 1.) / self.simul_z), dn0)

            if not self.dn is None and not free_prop:
                if dn_mean_method == "local":
                    self._kernel_reduction(
                        self.intens_g,
                        self.intens_dn_g,
                        outs=[self.intens_sum_g, self.intens_dn_sum_g])

                    self._fill_propagator_buf(self.n0, self.intens_dn_sum_g,
                                              self.intens_sum_g)

                    #print(self.intens_dn_sum_g.get(), self.n0)
                    #print("mean dn: ",self.intens_dn_sum_g.get()/self.intens_sum_g.get())

                elif dn_mean_method == "global":
                    if self.dn_mean[i + dn_ind_start + dn_ind_offset] != dn0:
                        dn0 = self.dn_mean[i + dn_ind_start + dn_ind_offset]
                        self._fill_propagator(self.n0 + dn0)

            if return_shape == "full":
                if self._is_subsampled and self.simul_xy != self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u,
                                        (i + 1) * (Nx * Ny))

        if return_shape == "full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 31
0
def bpm_3d_free(size, units, dz, lam = .5, u0 = None,
                n0 = 1., 
                use_fresnel_approx = False):
    """propagates the field u0 to distance dz
    """
    clock = StopWatch()

    clock.tic("setup")
    Nx, Ny = size
    dx, dy = units

    #setting up the propagator
    k0 = 2.*np.pi/lam*n0

    kxs = np.arange(-Nx/2.,Nx/2.)/Nx
    kys = np.arange(-Ny/2.,Ny/2.)/Ny

    KY, KX = np.meshgrid(kxs,kys, indexing= "ij")

    H0 = np.sqrt(0.j+(1./lam)**2-KX**2/dx**2-KY**2/dy**2)

    if use_fresnel_approx:
        H0  = 1./lam*(0.j+1.-.5*lam**2*(KX**2/dx**2+KY**2/dy**2))

        
    outsideInds = np.isnan(H0)
    H = np.exp(2.j*np.pi*dz*H0)
    H[outsideInds] = 0.
    H0[outsideInds] = 0.

    H = np.fft.fftshift(H).astype(np.complex64)


    if u0 is None:
        u0 = np.ones((Ny,Nx),np.complex64)

    """
    setting up the gpu buffers and kernels
    """

    program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))
    # program = OCLProgram(src_str = kernel_str)

    plan = ocl_fft_plan((Ny,Nx))
    plane_g = OCLArray.from_array(u0.astype(np.complex64))

    h_g = OCLArray.from_array(H.astype(np.complex64))

 
    clock.toc("setup")
    clock.tic("run")

    fft(plane_g,inplace = True, plan  = plan)

    program.run_kernel("mult",(Nx*Ny,),None,
                           plane_g.data,h_g.data)

    fft(plane_g,inplace = True, inverse = True,  plan  = plan)

    clock.toc("run")

    return plane_g.get()
Ejemplo n.º 32
0
def test_fft_np():
    d = np.ones((128,)*2)
    res = fft(d)
Ejemplo n.º 33
0
def _convolve_spatial2(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      pad_factor = 2,
                      plan = None,
                      return_plan = False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gx) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisible by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0


    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if grid_dim:
        Gs = tuple(grid_dim)
    else:
        Gs = hs.shape[:2]


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = Gs


    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny/Gy, Nx/Gx


    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(pad_factor*Nblock_x), _next_power_of_2(pad_factor*Nblock_y)


    prog = OCLProgram(abspath("kernels/conv_spatial2.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y,Npatch_x))

    x0s = Nblock_x*np.arange(Gx)
    y0s = Nblock_y*np.arange(Gy)


    patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros((Gy,Gx,Npatch_y,Npatch_x),np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for i,_x0 in enumerate(x0s):
            for j,_y0 in enumerate(y0s):
                prog.run_kernel("fill_psf_grid2",
                                (Nblock_x,Nblock_y),None,
                        tmp_g.data,
                        np.int32(Nx),
                        np.int32(i*Nblock_x),
                        np.int32(j*Nblock_y),
                        h_g.data,
                        np.int32(Npatch_x),
                        np.int32(Npatch_y),
                        np.int32(-Nblock_x/2+Npatch_x/2),
                        np.int32(-Nblock_y/2+Npatch_y/2),
                        np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)
                            )
    else:
        hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    #prepare image
    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    for i,_x0 in enumerate(x0s):
        for j,_y0 in enumerate(y0s):
            prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None,
                    im_g,
                    np.int32(_x0+Nblock_x/2-Npatch_x/2),
                    np.int32(_y0+Nblock_y/2-Npatch_y/2),
                    patches_g.data,
                    np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y))


    #return np.abs(patches_g.get())
    # convolution
    fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan)
    fft(h_g,inplace=True, batch = Gx*Gy, plan = plan)
    prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None,
                    patches_g.data, h_g.data)
    fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan)


    print Nblock_x, Npatch_x
    #return np.abs(patches_g.get())
    #accumulate
    res_g = OCLArray.empty(im.shape,np.float32)

    for j in xrange(Gy+1):
        for i in xrange(Gx+1):
            prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None,
                            patches_g.data,res_g.data,
                            np.int32(i),np.int32(j),
                            np.int32(Gx),np.int32(Gy),
                            np.int32(Npatch_x),np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 34
0
def _compare_fft_np(d):
    res1 = np.fft.fftn(d)
    res2 = gputools.fft(d, fast_math=True)
    return res1, res2
Ejemplo n.º 35
0
    def _propagate_single(self,
                          u0=None,
                          return_full=True,
                          return_intensity=False,
                          absorbing_width=0,
                          **kwargs):
        """
        :param u0: initial complex field distribution, if None, plane wave is assumed
        :param kwargs:
        :return:
        """

        #plane wave if none
        if u0 is None:
            u0 = np.ones(self.size2d[::-1], np.complex64)

        Nx, Ny, Nz = self.size
        dx, dy, dz = self.units

        plane_g = OCLArray.from_array(u0.astype(np.complex64, copy=False))

        if return_full:
            if return_intensity:
                u_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.float32)
                self.bpm_program.run_kernel("fill_with_energy", (Nx * Ny, ),
                                            None, u_g.data, plane_g.data,
                                            np.int32(0))

            else:
                u_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.complex64)
                u_g[0] = plane_g

        for i in range(Nz - 1):
            fft(plane_g, inplace=True, plan=self._plan)

            self.bpm_program.run_kernel("mult", (Nx * Ny, ), None,
                                        plane_g.data, self._H_g.data)

            fft(plane_g, inplace=True, inverse=True, plan=self._plan)

            if self.dn is not None:
                if self._is_complex_dn:
                    kernel_str = "mult_dn_complex"
                else:
                    kernel_str = "mult_dn"

                self.bpm_program.run_kernel(kernel_str, (
                    Nx,
                    Ny,
                ), None, plane_g.data, self.dn_g.data,
                                            np.float32(self.k0 * dz),
                                            np.int32(Nx * Ny * (i + 1)),
                                            np.int32(absorbing_width))
            if return_full:
                if return_intensity:
                    self.bpm_program.run_kernel("fill_with_energy",
                                                (Nx * Ny, ), None, u_g.data,
                                                plane_g.data,
                                                np.int32((i + 1) * Nx * Ny))

                else:
                    u_g[i + 1] = plane_g

        if return_full:
            u = u_g.get()
        else:
            u = plane_g.get()

        return u
Ejemplo n.º 36
0
def _convolve_spatial3(im,
                       hs,
                       mode="constant",
                       grid_dim=None,
                       plan=None,
                       return_plan=False,
                       pad_factor=2):
    if im.ndim != 3:
        raise ValueError("wrong dimensions of input!")

    if not (hs.ndim == 6 or (hs.ndim == 3 and grid_dim)):
        raise ValueError("wrong dimensions of psf grid!")

    if grid_dim:
        if hs.shape != im.shape:
            raise ValueError("if grid_dim is set, then im.shape = hs.shape !")
        Gs = tuple(grid_dim)
    else:
        if not hs.ndim == 6:
            raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)")
        Gs = hs.shape[:3]

    if not np.all([n % g == 0 for n, g in zip(im.shape, Gs)]):
        raise NotImplementedError(
            "shape of image has to be divisible by Gx Gy  = %s shape mismatch"
            % (str(hs.shape[:2])))

    mode_str = {
        "constant": "CLK_ADDRESS_CLAMP",
        "wrap": "CLK_ADDRESS_REPEAT",
        "edge": "CLK_ADDRESS_CLAMP_TO_EDGE",
        "reflect": "CLK_ADDRESS_MIRRORED_REPEAT"
    }

    Ns = im.shape

    # the size of each block within the grid
    Nblocks = [n // g for n, g in zip(Ns, Gs)]

    # the size of the overlapping patches with safety padding
    Npatchs = tuple([next_power_of_2(pad_factor * nb) for nb in Nblocks])

    prog = OCLProgram(abspath("kernels/conv_spatial3.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan(Gs + Npatchs, axes=(-3, -2, -1))

    Xs = [nb * np.arange(g) for nb, g in zip(Nblocks, Gs)]

    patches_g = OCLArray.empty(Gs + Npatchs, np.complex64)

    # prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros(Gs + Npatchs, np.complex64)

        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy=False))
        for (k, _z0), (j, _y0), (i,
                                 _x0) in product(*[enumerate(X) for X in Xs]):

            prog.run_kernel(
                "fill_psf_grid3", Nblocks[::-1], None, tmp_g.data,
                np.int32(im.shape[2]), np.int32(im.shape[1]),
                np.int32(i * Nblocks[2]), np.int32(j * Nblocks[1]),
                np.int32(k * Nblocks[0]), h_g.data, np.int32(Npatchs[2]),
                np.int32(Npatchs[1]), np.int32(Npatchs[0]),
                np.int32(-Nblocks[2] // 2 + Npatchs[2] // 2),
                np.int32(-Nblocks[1] // 2 + Npatchs[1] // 2),
                np.int32(-Nblocks[0] // 2 + Npatchs[0] // 2),
                np.int32(i * np.prod(Npatchs) + j * Gs[2] * np.prod(Npatchs) +
                         k * Gs[2] * Gs[1] * np.prod(Npatchs)))

    else:
        hs = np.fft.fftshift(pad_to_shape(hs, Gs + Npatchs), axes=(3, 4, 5))
        h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    # this loops over all i,j,k
    for (k, _z0), (j, _y0), (i, _x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel(
            "fill_patch3", Npatchs[::-1], None, im_g,
            np.int32(_x0 + Nblocks[2] // 2 - Npatchs[2] // 2),
            np.int32(_y0 + Nblocks[1] // 2 - Npatchs[1] // 2),
            np.int32(_z0 + Nblocks[0] // 2 - Npatchs[0] // 2), patches_g.data,
            np.int32(i * np.prod(Npatchs) + j * Gs[2] * np.prod(Npatchs) +
                     k * Gs[2] * Gs[1] * np.prod(Npatchs)))

    # convolution
    fft(patches_g, inplace=True, plan=plan)
    fft(h_g, inplace=True, plan=plan)
    prog.run_kernel("mult_inplace", (np.prod(Npatchs) * np.prod(Gs), ), None,
                    patches_g.data, h_g.data)

    fft(patches_g, inplace=True, inverse=True, plan=plan)

    # return patches_g.get()
    # accumulate
    res_g = OCLArray.zeros(im.shape, np.float32)

    for k, j, i in product(*[list(range(g + 1)) for g in Gs]):
        prog.run_kernel("interpolate3", Nblocks[::-1], None, patches_g.data,
                        res_g.data, np.int32(i), np.int32(j), np.int32(k),
                        np.int32(Gs[2]), np.int32(Gs[1]), np.int32(Gs[0]),
                        np.int32(Npatchs[2]), np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 37
0
def _convolve_spatial3(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      plan = None,
                      return_plan = False,
                      pad_factor = 2):



    if im.ndim !=3:
        raise ValueError("wrong dimensions of input!")

    if not (hs.ndim==6 or (hs.ndim==3 and grid_dim)):
        raise ValueError("wrong dimensions of psf grid!")

    if grid_dim:
        if hs.shape != im.shape:
            raise ValueError("if grid_dim is set, then im.shape = hs.shape !")
        Gs = tuple(grid_dim)
    else:
        if not hs.ndim==6:
            raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)")
        Gs = hs.shape[:3]

    if not np.all([n%g==0 for n,g in zip(im.shape,Gs)]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s shape mismatch"%(str(hs.shape[:2])))



    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = im.shape


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    prog = OCLProgram(abspath("kernels/conv_spatial3.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)


    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros(Gs+Npatchs,np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
            prog.run_kernel("fill_psf_grid3",
                        Nblocks[::-1],None,
                        tmp_g.data,
                        np.int32(im.shape[2]),
                        np.int32(im.shape[1]),
                        np.int32(i*Nblocks[2]),
                        np.int32(j*Nblocks[1]),
                        np.int32(k*Nblocks[0]),
                        h_g.data,
                        np.int32(Npatchs[2]),
                        np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]),
                        np.int32(-Nblocks[2]/2+Npatchs[2]/2),
                        np.int32(-Nblocks[1]/2+Npatchs[1]/2),
                        np.int32(-Nblocks[0]/2+Npatchs[0]/2),
                        np.int32(i*np.prod(Npatchs)+
                         j*Gs[2]*np.prod(Npatchs)+
                         k*Gs[2]*Gs[1]*np.prod(Npatchs)))

    else:
        hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))


    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
    prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None,
                    patches_g.data, h_g.data)

    fft(patches_g,
        inplace=True,
        inverse = True,
        batch = np.prod(Gs),
        plan = plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape,np.float32)

    for k, j, i in product(*[range(g+1) for g in Gs]):
        prog.run_kernel("interpolate3",Nblocks[::-1],None,
                        patches_g.data,
                        res_g.data,
                        np.int32(i),np.int32(j),np.int32(k),
                        np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]),
                        np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0]))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 38
0
def convolve_spatial2(im, hs, mode="constant", plan=None, return_plan=False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gz) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if im.ndim != 2 or hs.ndim != 4:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:2])]):
        raise NotImplementedError(
            "shape of image has to be divisible by Gx Gy  = %s shape mismatch"
            % (str(hs.shape[:2])))

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = hs.shape[:2]

    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny / Gy, Nx / Gx

    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(3 * Nblock_x), _next_power_of_2(
        3 * Nblock_y)
    #Npatch_x, Npatch_y = _next_power_of_2(2*Nblock_x), _next_power_of_2(2*Nblock_y)

    print(Nblock_x, Npatch_x)

    hs = np.fft.fftshift(pad_to_shape(hs, (Gy, Gx, Npatch_y, Npatch_x)),
                         axes=(2, 3))

    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y, Npatch_x))

    patches_g = OCLArray.empty((Gy, Gx, Npatch_y, Npatch_x), np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    x0s = Nblock_x * np.arange(Gx)
    y0s = Nblock_y * np.arange(Gy)

    print(x0s)

    for i, _x0 in enumerate(x0s):
        for j, _y0 in enumerate(y0s):
            prog.run_kernel(
                "fill_patch2", (Npatch_x, Npatch_y), None, im_g,
                np.int32(_x0 + Nblock_x / 2 - Npatch_x / 2),
                np.int32(_y0 + Nblock_y / 2 - Npatch_y / 2), patches_g.data,
                np.int32(i * Npatch_x * Npatch_y +
                         j * Gx * Npatch_x * Npatch_y))

    # convolution
    fft(patches_g, inplace=True, batch=Gx * Gy, plan=plan)
    fft(h_g, inplace=True, batch=Gx * Gy, plan=plan)
    prog.run_kernel("mult_inplace", (Npatch_x * Npatch_y * Gx * Gy, ), None,
                    patches_g.data, h_g.data)

    fft(patches_g, inplace=True, inverse=True, batch=Gx * Gy, plan=plan)

    #return patches_g.get()

    #accumulate
    res_g = OCLArray.empty(im.shape, np.float32)

    for i in range(Gx + 1):
        for j in range(Gy + 1):
            prog.run_kernel("interpolate2", (Nblock_x, Nblock_y),
                            None, patches_g.data, res_g.data, np.int32(i),
                            np.int32(j), np.int32(Gx), np.int32(Gy),
                            np.int32(Npatch_x), np.int32(Npatch_y))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 39
0
def convolve_spatial3(im,
                      hs,
                      mode="constant",
                      plan=None,
                      return_plan=False,
                      pad_factor=2):
    """
    spatial varying convolution of an 3d image with a 3d grid of psfs

    shape(im_ = (Nz,Ny,Nx)
    shape(hs) = (Gz,Gy,Gx, Hz,Hy,Hx)

    the input image im is subdivided into (Gx,Gy,Gz) blocks
    hs[k,j,i] is the psf at the center of each block (i,j,k)

    as of now each image dimension has to be divisble by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0
    Nz % Gz == 0

    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition


    """
    if im.ndim != 3 or hs.ndim != 6:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n % g == 0 for n, g in zip(im.shape, hs.shape[:3])]):
        raise NotImplementedError(
            "shape of image has to be divisible by Gx Gy  = %s !" %
            (str(hs.shape[:3])))

    mode_str = {"constant": "CLK_ADDRESS_CLAMP", "wrap": "CLK_ADDRESS_REPEAT"}

    Ns = tuple(im.shape)
    Gs = tuple(hs.shape[:3])

    # the size of each block within the grid
    Nblocks = [n / g for n, g in zip(Ns, Gs)]

    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor * nb) for nb in Nblocks])

    print(hs.shape)
    hs = np.fft.fftshift(pad_to_shape(hs, Gs + Npatchs), axes=(3, 4, 5))

    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D",
                                     "ADDRESSMODE=%s" % mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)

    patches_g = OCLArray.empty(Gs + Npatchs, np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32, copy=False))

    Xs = [nb * np.arange(g) for nb, g in zip(Nblocks, Gs)]

    print(Nblocks)
    # this loops over all i,j,k
    for (k, _z0), (j, _y0), (i, _x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel(
            "fill_patch3", Npatchs[::-1], None, im_g,
            np.int32(_x0 + Nblocks[2] / 2 - Npatchs[2] / 2),
            np.int32(_y0 + Nblocks[1] / 2 - Npatchs[1] / 2),
            np.int32(_z0 + Nblocks[0] / 2 - Npatchs[0] / 2), patches_g.data,
            np.int32(i * np.prod(Npatchs) + j * Gs[2] * np.prod(Npatchs) +
                     k * Gs[2] * Gs[1] * np.prod(Npatchs)))

    print(patches_g.shape, h_g.shape)

    # convolution
    fft(patches_g, inplace=True, batch=np.prod(Gs), plan=plan)
    fft(h_g, inplace=True, batch=np.prod(Gs), plan=plan)
    prog.run_kernel("mult_inplace", (np.prod(Npatchs) * np.prod(Gs), ), None,
                    patches_g.data, h_g.data)

    fft(patches_g, inplace=True, inverse=True, batch=np.prod(Gs), plan=plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape, np.float32)

    for k, j, i in product(*[list(range(g + 1)) for g in Gs]):
        prog.run_kernel("interpolate3", Nblocks[::-1], None, patches_g.data,
                        res_g.data, np.int32(i), np.int32(j), np.int32(k),
                        np.int32(Gs[2]), np.int32(Gs[1]), np.int32(Gs[0]),
                        np.int32(Npatchs[2]), np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]))

    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
Ejemplo n.º 40
0
    def _propagate(self,
                   u0=None,
                   offset=0,
                   return_comp="field",
                   return_shape="full",
                   free_prop=False,
                   slow_mean=False,
                   **kwargs):
        """
        kwargs:
            return_comp in ["field", "intens"]
            return_shape in ["last", "full"]
            free_prop = False | True
        """

        free_prop = free_prop or (self.dn is None)

        if return_comp == "field":
            res_type = Bpm3d._complex_type
        elif return_comp == "intens":
            res_type = Bpm3d._real_type
        else:
            raise ValueError(return_comp)

        if not return_shape in ["last", "full"]:
            raise ValueError()

        if u0 is None:
            u0 = self.u0_plane()

        u0 = u0.astype(np.complex64, copy=False)

        Nx, Ny, Nz = self.shape

        assert offset >= 0 and offset < (Nz - 1)

        if return_shape == "full":
            u = OCLArray.empty((Nz - offset, Ny, Nx), dtype=res_type)

        self._buf_plane.write_array(u0)

        # copy the first plane
        if return_shape == "full":
            if self._is_subsampled:
                self._img_xy.copy_buffer(self._buf_plane)
                self._copy_down_img(self._img_xy, u, 0)
            else:
                self._copy_down_buf(self._buf_plane, u, 0)

        dn0 = 0

        for i in range(Nz - 1 - offset):
            if not self.dn is None and not free_prop:
                if slow_mean:
                    if return_shape == "full":
                        raise NotImplementedError()
                    else:
                        tmp = OCLArray.empty((1, Ny, Nx), dtype=res_type)
                        if self._is_subsampled:
                            self._img_xy.copy_buffer(self._buf_plane)
                            self._copy_down_img(self._img_xy, tmp, 0)
                        else:
                            self._copy_down_buf(self._buf_plane, tmp, 0)

                        dn0 = np.sum(
                            np.abs(self.dn[i]) *
                            tmp.get()) / np.sum(np.abs(self.dn[i]) + 1.e-10)

                        self._fill_propagator(self.n0 + dn0)
                else:
                    if self.dn_mean[i + offset] != dn0:
                        dn0 = self.dn_mean[i + offset]
                        self._fill_propagator(self.n0 + dn0)

            for j in range(self.simul_z):

                fft(self._buf_plane, inplace=True, plan=self._plan)
                self._mult_complex(self._buf_plane, self._buf_H)
                fft(self._buf_plane,
                    inplace=True,
                    inverse=True,
                    plan=self._plan)
                if not free_prop:
                    self._mult_dn(self._buf_plane,
                                  (i + offset + (j + 1.) / self.simul_z), dn0)

            if return_shape == "full":
                if self._is_subsampled and self.simul_xy != self.shape[:2]:
                    self._img_xy.copy_buffer(self._buf_plane)
                    self._copy_down_img(self._img_xy, u, (i + 1) * (Nx * Ny))
                else:
                    self._copy_down_buf(self._buf_plane, u,
                                        (i + 1) * (Nx * Ny))

        if return_shape == "full":
            return u.get()
        else:
            return self._buf_plane.get()
Ejemplo n.º 41
0
    def convolve(self, input_image):
        """
        Input image should have dimensions <h, w> or <s, o, h, w> or <b, s, o, h, w, d>.
        Filter bank should have dimensions <s, o, h, w>
        """
        if len(input_image.shape) == 2:
            bdsohw_input_image = input_image[None, None, None, None, :, :]
        elif len(input_image.shape) == 4:
            bdsohw_input_image = input_image[None, None, :, :, :, :]
        elif len(input_image.shape) == 6:
            bdsohw_input_image = np.einsum("bsohwd->bdsohw", input_image)

        padded_input = np.pad(
            bdsohw_input_image,
            [[0, 0], [0, 0], [0, 0], [0, 0],
             [int(np.ceil(self.box_height / 2)),
              int(self.box_height / 2)],
             [int(np.ceil(self.box_width / 2)),
              int(self.box_width / 2)]])

        padded_input = np.tile(
            padded_input, [1, 1, self.n_scales, self.n_orientations, 1, 1])

        padded_input_ocl = gputools.OCLArray.from_array(
            np.fft.ifftshift(padded_input, (-2, -1)).astype(np.complex64))

        #print("PADDED INPUT COL SIZE", padded_input_ocl.get().shape)

        # TODO MAKE GPU PLAN FIRST, and reuse it
        # TODO Make the broadcasting more efficient
        # TODO Reuse the filter bank across multiple letters.
        gputools.fft(padded_input_ocl, axes=(-2, -1), inplace=True)

        filter_bank_ocl_fft = gputools.OCLArray.from_array(
            self.filter_bank[None, None, :, :, :, :])

        #print("FILTER BANK ", filter_bank_ocl_fft.get().shape)

        padded_input_ocl *= filter_bank_ocl_fft

        gputools.fft(padded_input_ocl,
                     axes=(-2, -1),
                     inplace=True,
                     inverse=True)

        padded_result = np.fft.fftshift(padded_input_ocl.get())

        #input_in_freqdomain = np.fft.fft2(padded_input + 1j * np.zeros_like(padded_input))

        #padded_result = (np.fft.ifft2(input_in_freqdomain * self.filter_bank[None, None, :, :, :, :]))

        if len(input_image.shape) == 2:
            presult = np.fft.fftshift(padded_result[0, 0, :, :, :, :],
                                      axes=[2, 3])
            return presult[:, :,
                           int(np.ceil(self.box_height /
                                       2)):int(self.box_height +
                                               np.ceil(self.box_height / 2)),
                           int(np.ceil(self.box_width /
                                       2)):int(self.box_width +
                                               np.ceil(self.box_width / 2))]
        elif len(input_image.shape) == 4:
            presult = np.fft.fftshift(padded_result[0, 0, :, :, :, :],
                                      axes=[2, 3])
            # Return <s, o, h, w>
            return presult[:, :,
                           int(np.ceil(self.box_height /
                                       2)):int(self.box_height +
                                               np.ceil(self.box_height / 2)),
                           int(np.ceil(self.box_width /
                                       2)):int(self.box_width +
                                               np.ceil(self.box_width / 2))]
        elif len(input_image.shape) == 6:
            presult = np.einsum("bdsohw->bsohwd",
                                np.fft.fftshift(padded_result, axes=[2, 3]))
            return presult[:, :, :, :,
                           int(np.ceil(self.box_height /
                                       2)):int(self.box_height +
                                               np.ceil(self.box_height / 2)),
                           int(np.ceil(self.box_width /
                                       2)):int(self.box_width +
                                               np.ceil(self.box_width / 2))]