def cufft_conv(x, y):

    x = x.astype(np.complex64)
    y = y.astype(np.complex64)

    if (x.shape != y.shape):
        return -1

    plan = fft.Plan(x.shape, np.complex64, np.complex64)
    inverse_plan = fft.Plan(x.shape, np.complex64, np.complex64)

    x_gpu = gpuarray.to_gpu(x)
    y_gpu = gpuarray.to_gpu(y)

    x_fft = gpuarray.empty_like(x_gpu, dtype=np.complex64)
    y_fft = gpuarray.empty_like(y_gpu, dtype=np.complex64)
    out_gpu = gpuarray.empty_like(x_gpu, dtype=np.complex64)

    fft.fft(x_gpu, x_fft, plan)
    fft.fft(y_gpu, y_fft, plan)

    linalg.multiply(x_fft, y_fft, overwrite=True)
    fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
    conv_out = out_gpu.get()

    x_gpu.gpudata.free()
    y_gpu.gpudata.free()
    x_fft.gpudata.free()
    y_fft.gpudata.free()
    out_gpu.gpudata.free()

    return conv_out
def ifft2_gpu(y, fftshift=False):
    ''' This function produce an output that is 
    completely compatible with numpy.fft.ifft2
    The input y is a 2D complex numpy array'''

    # Convert the input array to complex64
    if y.dtype != 'complex64':
        y = y.astype('complex64')

    # Get the shape of the initial numpy array
    n1, n2 = y.shape

    # From numpy array to GPUarray. Take only the first n2/2+1 non redundant FFT coefficients
    if fftshift is False:
        y2 = np.asarray(y[:, 0:n2 // 2 + 1], np.complex64)
    else:
        y2 = np.asarray(np.fft.ifftshift(y)[:, :n2 // 2 + 1], np.complex64)
    ygpu = gpuarray.to_gpu(y2)

    # Initialise empty output GPUarray
    x = gpuarray.empty((n1, n2), np.float32)

    # Inverse FFT
    plan_backward = cu_fft.Plan((n1, n2), np.complex64, np.float32)
    cu_fft.ifft(ygpu, x, plan_backward)

    # Must divide by the total number of pixels in the image to get the normalisation right
    xout = x.get() / n1 / n2

    return xout
Ejemplo n.º 3
0
def cross_correlate(plan, normalize=True):
    #norm_template = np.sum(plan.template.d_data)
    fft(plan.volume, plan.volume_fft, plan.fwd_plan)
    fft(plan.template.d_data, plan.template_fft, plan.fwd_plan)
    conj(plan.template_fft, overwrite=True)
    volume_fft = plan.volume_fft * plan.template_fft
    ifft(volume_fft, plan.ccc_map, plan.inv_plan)
Ejemplo n.º 4
0
    def __ifft(self, F):
        """__ifft(self, F) -> numpy.2darray
        apply 2D inverse Fourier transform

        Parameters
        ----------
        F : numpy.2darray

        Returns
        -------
        numpy.2darray
        """
        if self.__fft_type not in ['numpy', 'fftw', 'cufft']:
            raise ValueError('Invalid parameter for the keyword "fft_type."')
        if found_pyfftw is True and self.__fft_type == 'fftw':
            pyfftw.forget_wisdom()
            ifunc = pyfftw.builders.ifft2(F,
                                          overwrite_input=True,
                                          planner_effort='FFTW_ESTIMATE',
                                          threads=CPU_COUNT)
            return ifunc()
        elif found_cufft is True and self.__fft_type == 'cufft':
            self.__x_gpu.set(F.astype(np.complex64))
            cu_fft.ifft(self.__x_gpu, self.__xf_gpu, self.__plan, True)
            return self.__xf_gpu.get()
        else:
            return ifft2(F)
Ejemplo n.º 5
0
 def _solve_kernel_fast(self):
     '''Fast kernel, use when save_memory is False
     '''
     cu_fft.fft(self.tmpspace, self.tmpspace, plan=self.plan_forward)
     cu_fft.ifft(self.tmpspace * self.fgreentr,
                 self.tmpspace,
                 plan=self.plan_backward)
Ejemplo n.º 6
0
def gpu_irfft(dev_a, n=0, result=None, caller_id=None):
    if (n == 0) and (result == None):
        n = 2*(dev_a.size-1)
    elif (n != 0) and (result == None):
        pass

    if (caller_id == None):
        result = gpuarray.empty(n, dtype=bm.precision.real_t)
    else:
        key = (n, bm.precision.real_t, caller_id, 'irfft')
        result = get_gpuarray(key)

    outSize = n
    inSize = dev_a.size

    if (outSize == 0):
        outSize = 2*(inSize-1)
    n = outSize // 2 + 1

    if (n == inSize):
        dev_in = dev_a
    else:
        dev_in = get_gpuarray((n, bm.precision.complex_t, 0, 'irfft'))
        if (n < inSize):
            gpu_complex_copy(dev_in, dev_a, slice=slice(0, n))
        else:
            gpu_complex_copy(dev_in, dev_a, slice=slice(0, n))

    inverse_plan = inverse_find_plan(outSize)
    fft.ifft(dev_in, result, inverse_plan, scale=True)
    return result
Ejemplo n.º 7
0
def scikit_gpu_fft_pipeline(filename):
    data = []
    start = timer()
    with open(filename, 'r') as file_obj:
        for _ in range(((32768 * 1024 * SIZE_MULTIPLIER // GULP_SIZE) //
                        COMPLEX_MULTIPLIER) // GULP_FRAME_FFT):
            data = np.fromfile(file_obj,
                               dtype=np.complex64,
                               count=GULP_SIZE * GULP_FRAME_FFT).reshape(
                                   (GULP_FRAME_FFT, GULP_SIZE))
            g_data = gpuarray.to_gpu(data)
            plan = Plan(data.shape[1],
                        np.complex64,
                        np.complex64,
                        batch=GULP_FRAME_FFT)
            plan_inverse = Plan(data.shape[1],
                                np.complex64,
                                np.complex64,
                                batch=GULP_FRAME_FFT)
            tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
            tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
            fft(g_data, tmp1, plan)
            ifft(tmp1, tmp2, plan_inverse)
            for _ in range(NUMBER_FFT - 1):
                # Can't do FFT in place for fairness (emulating full pipeline)
                tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
                fft(tmp2, tmp1, plan)
                tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
                ifft(tmp1, tmp2, plan_inverse)
    end = timer()
    return end - start
Ejemplo n.º 8
0
def meanVolUnderMask(volume, mask, out, p=1):
    fft(volume, plan.volume_fft, plan.fwd_plan)
    fft(mask, plan.template_fft, plan.fwd_plan)
    conj(plan.template_fft, overwrite=True)
    volume_fft = plan.volume_fft * plan.template_fft
    ifft(volume_fft, out, plan.inv_plan, scale=True)
    out = out / (plan.p)
Ejemplo n.º 9
0
def cuda_efftn(H, axes, forward):
    hShape = H.shape
    hDim = len(hShape)
    fftDim = len(axes)

    # Reshape 'axes' to be the array's end dimensions and ensure contiguity
    H = np.ascontiguousarray(
        np.moveaxis(H, axes, np.arange(hDim - fftDim, hDim, 1)))

    # Calculate number of batches
    batchSize = 1
    for i in range(hDim - fftDim):
        batchSize *= H.shape[i]

    # Reshape to accomodate batching
    H = np.reshape(
        H,
        (batchSize, H.shape[hDim - 3], H.shape[hDim - 2], H.shape[hDim - 1]))

    # Pass array to the GPU and perform iFFT on each batch
    H_gpu = gpuarray.to_gpu(H)
    plan = skfft.Plan(H_gpu.shape[1:fftDim + 1], H.dtype, H.dtype,
                      H_gpu.shape[0])

    if forward:
        skfft.fft(H_gpu, H_gpu, plan)
    else:
        skfft.ifft(H_gpu, H_gpu, plan, True)

    # Reshape to original dimensions
    H = np.moveaxis(H_gpu.get(), 0, fftDim)
    H = np.reshape(H, hShape)

    return H
Ejemplo n.º 10
0
def ifft2_gpu(y, fftshift=False):
    """
    C2C iFFT
    do numpy.fft.ifft2 
    The input y is a 2D complex numpy array 
    """

    #get the shape of the initial numpy array
    n1, n2 = y.shape

    #from numpy array to GPUarray. Take the only first n2/2+1 non-redundant FFT coefficients when R2C.
    # For C2C, the dimensions of input and output are the same.
    #if fftshift is False:
    #    y2 = np.asarray(y[:,0:n2//2+1],np.complex64)
    #else:
    #    y2 = np.asarray(np.fft.ifftshift(y)[:,0:n2//2+1],np.complex64)
    if fftshift:
        y2 = np.fft.ifftshift(y)
    else:
        y2 = y
    ygpu = gpuarray.to_gpu(y2)

    #Initialise empty output GPUarray
    x = gpuarray.empty((n1, n2), np.complex128)

    #inverse FFT
    plan_backward = cu_fft.Plan((n1, n2), np.complex128, np.complex128)
    cu_fft.ifft(ygpu, x, plan_backward)

    #Must divide by the total number of pixels in the image to get the normalization right
    xout = x.get() / n1 / n2

    return xout
Ejemplo n.º 11
0
def gpu_irfft(dev_a, n=0, result=None, caller_id=None):
    if n == 0 and result is None:
        n = 2 * (dev_a.size - 1)
    elif n != 0 and result is None:
        pass

    if caller_id is None:
        result = gpuarray.empty(n, dtype=bm.precision.real_t)
    else:
        key = (n, bm.precision.real_t, caller_id, 'irfft')
        result = get_gpuarray(key)

    out_size = n
    in_size = dev_a.size

    if out_size == 0:
        out_size = 2 * (in_size - 1)
    n = out_size // 2 + 1

    if n == in_size:
        dev_in = dev_a
    else:
        dev_in = get_gpuarray((n, bm.precision.complex_t, 0, 'irfft'), zero_fills=True)
        if n < in_size:
            gpu_complex_copy(dev_in, dev_a, slice=slice(0, n))
        else:
            gpu_complex_copy(dev_in, dev_a, slice=slice(0, n))

    inverse_plan = inverse_find_plan(out_size)
    fft.ifft(dev_in, result, inverse_plan, scale=True)
    return result
Ejemplo n.º 12
0
def calc_stdV(plan):
    fft(plan.volume, plan.volume_fft, plan.fwd_plan)
    fft(plan.maskPadded, plan.template_fft, plan.fwd_plan)
    conj(plan.template_fft, overwrite=True)
    volume_fft = plan.volume_fft * plan.template_fft
    ifft(volume_fft, plan.stdV, plan.inv_plan, scale=True)
    plan.stdV = plan.stdV / (plan.p)
Ejemplo n.º 13
0
    def run_gpu(self, complex_wave):
        """
        Does CTFSim on GPU. The result is stored on an attribute
        "self.fsdata" containing a GPU array.
        """
        # create outputs
        Nz, Ny, Nx = self.shape
        ftwave = gpuarray.empty((Ny, Nx), np.complex64)
        # extract plans
        ft2dcc = self.pft2dcc
        ft3dcc = self.pft3dcc

        # Propagate the initial wave to simulate defocused waves
        # Psi(x,y,z) = convolve[Psi(x,y,0), CTF(x,y,z)]
        cu_fft.fft(self.wdata, ftwave, ft2dcc)
        for kk in range(Nz):
            self.fsdata[kk, :, :] = self.ctfd[kk, :, :] * ftwave
        cu_fft.ifft(self.fsdata, self.fsdata, ft3dcc, True)
        if not complex_wave:
            # Use the intensities, Isim = |Psi|**2
            self.fsdata = self.fsdata * self.fsdata.conj()
            if self.focal_series.metadata.has_item(
                    'ModImage.convergence_semiangle'):
                # Convolve with spatial-coherence envelope
                # Isim = convolve[Isim, Es]
                cu_fft.fft(self.fsdata, self.fsdata, ft3dcc)
                cu_fft.ifft(self.fsdata * self.Esdata, self.fsdata, ft3dcc,
                            True)
Ejemplo n.º 14
0
 def test_ifft_complex128_to_float64_1d(self):
     x = np.asarray(np.random.rand(self.N), np.float64)
     xf = np.asarray(np.fft.rfftn(x), np.complex128)
     xf_gpu = gpuarray.to_gpu(xf)
     x_gpu = gpuarray.empty(self.N, np.float64)
     plan = fft.Plan(x.shape, np.complex128, np.float64)
     fft.ifft(xf_gpu, x_gpu, plan, True)
     assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Ejemplo n.º 15
0
 def _solve_kernel_slow(self):
     ''' Slow version, use when save_memory is True: Stores only 1 slice
     of the fgreentr function and loops over all slices
     '''
     cu_fft.fft(self.tmpspace, self.tmpspace, plan=self.plan_forward)
     for i in xrange(self.mesh.nz):
         self.tmpspace[i, :, :] = self.tmpspace[i, :, :] * self.fgreentr
     cu_fft.ifft(self.tmpspace, self.tmpspace, plan=self.plan_backward)
Ejemplo n.º 16
0
 def test_ifft_complex128_to_float64_1d(self):
     x = np.asarray(np.random.rand(self.N), np.float64)
     xf = np.asarray(np.fft.rfftn(x), np.complex128)
     xf_gpu = gpuarray.to_gpu(xf)
     x_gpu = gpuarray.empty(self.N, np.float64)
     plan = fft.Plan(x.shape, np.complex128, np.float64)
     fft.ifft(xf_gpu, x_gpu, plan, True)
     assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Ejemplo n.º 17
0
def cross_correlate(plan, normalize=True):#volume, template, volume_fft, plan, inv_plan, template_fft, ccc_map, norm_volume, normalize=True,):
    norm_template = np.sum(plan.template.d_data)
    fft(plan.template.d_data, plan.template_fft, plan.fwd_plan)
    conj(plan.template_fft, overwrite=True)
    volume_fft = plan.volume_fft * plan.template_fft
    ifft(volume_fft, plan.ccc_map, plan.inv_plan)
    if normalize:
        plan.ccc_map = plan.ccc_map / plan.norm_volume / norm_template
Ejemplo n.º 18
0
 def _solve_kernel_slow(self):
     ''' Slow version, use when save_memory is True: Stores only 1 slice
     of the fgreentr function and loops over all slices
     '''
     cu_fft.fft(self.tmpspace, self.tmpspace, plan=self.plan_forward)
     for i in xrange(self.mesh.nz):
         self.tmpspace[i,:,:] = self.tmpspace[i,:,:] * self.fgreentr
     cu_fft.ifft(self.tmpspace, self.tmpspace,
                 plan=self.plan_backward)
Ejemplo n.º 19
0
def inplaceFractShift(img, dx, dy, PhaseShiftFunc, bInverse=False):
    if dx == 0 and dy == 0:
        return
    global plan
    global FT

    Cache(img.shape)
    cu_fft.fft(img, FT, plan)
    PhaseShiftFunc(FT, kxx, kyy, np.float32(dx), np.float32(dy))
    cu_fft.ifft(FT, img, plan, True)
Ejemplo n.º 20
0
def process_video_cuda(data):
    global cs, cs_first
    #	fft_overlap(data, FiltV_GPU)

    if cs_first == True:
        prepare_video_filters(SysParams)
        prepare_video_cuda()
        cs_first = False

    fdata = np.float32(data)

    gpudata = gpuarray.to_gpu(fdata)

    # first fft->ifft cycle applies pre-decoding filtering (low pass filters, CAV/CLV emphasis)
    # and very importantly, performs the Hilbert transform
    fft.fft(gpudata, cs['fft1_out'], cs['plan1'])

    if Inner:
        cs['fft1_out'] *= cs['filt_video_inner']
    else:
        cs['fft1_out'] *= cs['filt_video']

    fft.ifft(cs['fft1_out'], cs['filtered1'], cs['plan1i'], True)

    cs['doanglediff'](cs['fm_demod'],
                      cs['filtered1'],
                      block=(1024, 1, 1),
                      grid=(blocklenk, 1))

    # post-processing:  output low-pass filtering and deemphasis
    fft.fft(cs['fm_demod'], cs['fft2_out'], cs['plan2'])
    cs['fft2_out'] *= cs['filt_post']
    fft.ifft(cs['fft2_out'], cs['postlpf'], cs['plan2i'], True)

    cs['doclamp16'](cs['clipped_gpu'],
                    cs['postlpf'],
                    np.float32(-SysParams['output_minfreq']),
                    np.float32(SysParams['output_scale']),
                    block=(1024, 1, 1),
                    grid=(blocklenk, 1))

    output_16 = cs['clipped_gpu'].get()

    chop = 512
    return output_16[chop:len(output_16) - chop]

    # graph for debug

    #	plt.plot(cs['postlpf'].get()[5000:7500])
    plt.plot(output_16[5000:7000])
    #	plt.plot(range(0, len(output_16)), output_16)
    #	plt.plot(range(0, len(doutput)), doutput)
    #	plt.plot(range(0, len(output_prefilt)), output_prefilt)
    plt.show()
    exit()
Ejemplo n.º 21
0
def process_video_cuda(data):
    global cs, cs_first
    # 	fft_overlap(data, FiltV_GPU)

    if cs_first == True:
        prepare_video_filters()
        prepare_video_cuda()
        cs_first = False

    fdata = np.float32(data)

    gpudata = gpuarray.to_gpu(fdata)

    # first fft->ifft cycle applies pre-decoding filtering (low pass filters, CAV/CLV emphasis)
    # and very importantly, performs the Hilbert transform
    fft.fft(gpudata, cs["fft1_out"], cs["plan1"])

    if Inner:
        cs["fft1_out"] *= cs["filt_video_inner"]
    else:
        cs["fft1_out"] *= cs["filt_video"]

    fft.ifft(cs["fft1_out"], cs["filtered1"], cs["plan1i"], True)

    cs["doanglediff"](cs["fm_demod"], cs["filtered1"], block=(1024, 1, 1), grid=(blocklenk, 1))

    # post-processing:  output low-pass filtering and deemphasis
    fft.fft(cs["fm_demod"], cs["fft2_out"], cs["plan2"])
    cs["fft2_out"] *= cs["filt_post"]
    fft.ifft(cs["fft2_out"], cs["postlpf"], cs["plan2i"], True)

    cs["doclamp16"](
        cs["clipped_gpu"],
        cs["postlpf"],
        np.float32(-SP["output_minfreq"]),
        np.float32(SP["output_scale"]),
        block=(1024, 1, 1),
        grid=(blocklenk, 1),
    )

    output_16 = cs["clipped_gpu"].get()

    chop = 512
    return output_16[chop : len(output_16) - chop]

    # graph for debug
    # 	output = (sps.lfilter(f_deemp_b, f_deemp_a, output)[128:len(output)]) / deemp_corr

    # 	plt.plot(cs['postlpf'].get()[5000:7500])
    plt.plot(output_16[5000:7000])
    # 	plt.plot(range(0, len(output_16)), output_16)
    # 	plt.plot(range(0, len(doutput)), doutput)
    # 	plt.plot(range(0, len(output_prefilt)), output_prefilt)
    plt.show()
    exit()
Ejemplo n.º 22
0
def FractShift(src, dest, dx, dy, PhaseShiftFunc):
    if dx == 0 and dy == 0:
        return
    global plan
    global FT

    Cache(src.shape)

    cu_fft.fft(src, FT, plan, PhaseShiftFunc)
    PhaseShift(FT, dx, dy)
    cu_fft.ifft(FT, dest, plan, True)
Ejemplo n.º 23
0
def cross_correlate(plan, normalize=True):
    #norm_template = sum(plan.mask.d_data)

    fft(plan.volume, plan.volume_fft, plan.fwd_plan)
    fft(plan.templatePadded, plan.template_fft, plan.fwd_plan)

    conj(plan.template_fft, overwrite=True)
    volume_fft = plan.volume_fft * plan.template_fft
    ifft(volume_fft, plan.ccc_map, plan.inv_plan, scale=True)

    plan.ccc_map /= np.float32(plan.p.get()) * plan.stdV
Ejemplo n.º 24
0
    def propagate_eager(self, wavelength, wavefront):
        """
        'Not-Too-Good' version of the propagation on the GPU (lots of Memory issues...)
        Remove in the future
        :param wavelength:
        :param wavefront:
        :return:
        """

        N = self.N_PIX
        # free, total = cuda.mem_get_info()
        free, total = cuda.mem_get_info()
        print("Free: %.2f percent" % (free / total * 100))

        # Pupil Plane -> Image Slicer
        complex_pupil = self.pupil_masks[wavelength] * np.exp(
            1j * 2 * np.pi * self.pupil_masks[wavelength] / wavelength)
        complex_pupil_gpu = gpuarray.to_gpu(
            np.asarray(complex_pupil, np.complex64))
        plan = cu_fft.Plan(complex_pupil_gpu.shape, np.complex64, np.complex64)
        cu_fft.fft(complex_pupil_gpu, complex_pupil_gpu, plan, scale=True)

        # Add N_slices copies to be Masked
        complex_slicer_cpu = complex_pupil_gpu.get()
        complex_pupil_gpu.gpudata.free()

        free, total = cuda.mem_get_info()
        print("*Free: %.2f percent" % (free / total * 100))

        complex_slicer_cpu = np.stack([complex_slicer_cpu] * self.N_slices)
        complex_slicer_gpu = gpuarray.to_gpu(complex_slicer_cpu)
        slicer_masks_gpu = gpuarray.to_gpu(self.slicer_masks_fftshift)
        clinalg.multiply(slicer_masks_gpu, complex_slicer_gpu, overwrite=True)
        slicer_masks_gpu.gpudata.free()
        free, total = cuda.mem_get_info()
        print("**Free: %.2f percent" % (free / total * 100))

        # Slicer -> Pupil Mirror
        plan = cu_fft.Plan((N, N), np.complex64, np.complex64, self.N_slices)
        cu_fft.ifft(complex_slicer_gpu, complex_slicer_gpu, plan, scale=True)
        mirror_mask_gpu = gpuarray.to_gpu(self.pupil_mirror_masks_fft)
        clinalg.multiply(mirror_mask_gpu, complex_slicer_gpu, overwrite=True)

        # Pupil Mirror -> Slits
        cu_fft.fft(complex_slicer_gpu, complex_slicer_gpu, plan)
        slits = complex_slicer_gpu.get()
        complex_slicer_gpu.gpudata.free()
        mirror_mask_gpu.gpudata.free()
        slit = fftshift(np.sum((np.abs(slits))**2, axis=0))

        free, total = cuda.mem_get_info()
        print("***Free: %.2f percent" % (free / total * 100))

        return slit
Ejemplo n.º 25
0
 def irfft(a, normalize=True, nthreads=0):
     if is_memory_enough(a):
         arg = gpuarray.to_gpu(a)
         shape = [s for s in a.shape]
         shape[-1] = (shape[-1]-1)*2
         rtype = G_CTYPES[a.dtype.type]
         afg = gpuarray.empty(shape, rtype)
         plan = fft.Plan(shape, a.dtype.type, rtype)
         fft.ifft(arg, afg, plan)
         return afg.get()
     else:
         return _irfft(a)
Ejemplo n.º 26
0
    def test_ifft_complex64_to_float32_2d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.N, self.M), np.float32)
        xf = np.asarray(np.fft.rfftn(x), np.complex64)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.N, self.M), np.float32)
        plan = fft.Plan(x.shape, np.complex64, np.float32)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float32)
Ejemplo n.º 27
0
    def test_batch_ifft_complex128_to_float64_1d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.B, self.N), np.float64)
        xf = np.asarray(np.fft.rfft(x, axis=1), np.complex128)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.B, self.N), np.float64)
        plan = fft.Plan(x.shape[1], np.complex128, np.float64, batch=self.B)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Ejemplo n.º 28
0
    def test_batch_ifft_complex128_to_float64_2d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.B, self.N, self.M), np.float64)
        xf = np.asarray(np.fft.rfftn(x, axes=(1,2)), np.complex128)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.B, self.N, self.M), np.float64)
        plan = fft.Plan([self.N, self.M], np.complex128, np.float64, batch=self.B)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Ejemplo n.º 29
0
    def test_ifft_complex64_to_float32_2d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.N, self.M), np.float32)
        xf = np.asarray(np.fft.rfftn(x), np.complex64)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.N, self.M), np.float32)
        plan = fft.Plan(x.shape, np.complex64, np.float32)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float32)
Ejemplo n.º 30
0
    def filter(self):
        import pycuda.gpuarray as gpuarray
        import skcuda.fft as cu_fft
        import skcuda.linalg as linalg
        import pycuda.driver as cuda
        from pycuda.tools import make_default_context
        cuda.init()
        context = make_default_context()
        device = context.get_device()
        signal = self.series[0]
        window = self.series[1]
        linalg.init()
        nfft = determine_size(len(signal) + len(window) - 1)
        # Move data to GPU
        sig_zero_pad = np.zeros(nfft, dtype=self.precision['float'])
        win_zero_pad = np.zeros(nfft, dtype=self.precision['float'])
        sig_gpu = gpuarray.zeros(sig_zero_pad.shape,
                                 dtype=self.precision['float'])
        win_gpu = gpuarray.zeros(win_zero_pad.shape,
                                 dtype=self.precision['float'])
        sig_zero_pad[0:len(signal)] = signal
        win_zero_pad[0:len(window)] = window
        sig_gpu.set(sig_zero_pad)
        win_gpu.set(win_zero_pad)

        # Plan forwards
        sig_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex'])
        win_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex'])
        sig_plan_forward = cu_fft.Plan(sig_fft_gpu.shape,
                                       self.precision['float'],
                                       self.precision['complex'])
        win_plan_forward = cu_fft.Plan(win_fft_gpu.shape,
                                       self.precision['float'],
                                       self.precision['complex'])
        cu_fft.fft(sig_gpu, sig_fft_gpu, sig_plan_forward)
        cu_fft.fft(win_gpu, win_fft_gpu, win_plan_forward)

        # Convolve
        out_fft = linalg.multiply(sig_fft_gpu, win_fft_gpu, overwrite=True)
        linalg.scale(2.0, out_fft)

        # Plan inverse
        out_gpu = gpuarray.zeros_like(out_fft)
        plan_inverse = cu_fft.Plan(out_fft.shape, self.precision['complex'],
                                   self.precision['complex'])
        cu_fft.ifft(out_fft, out_gpu, plan_inverse, True)
        out_np = np.zeros(len(out_gpu), self.precision['complex'])
        out_gpu.get(out_np)
        context.pop()
        return out_np
Ejemplo n.º 31
0
def RunCorrection(neib,ROI,DifPad,rspace,kspace,exitWave,buffer_exitWave,finalObj,offsetx,offsety,objsizex,roisizex,CopyFromROI,ExitwaveAndBuffer,ApplyDifPad,cufftplan,aperture,fcachevector):
	Fs = []		
	for jpos in range(-neib,neib+1):
		for ipos in range(-neib,neib+1):
			CopyFromROI(rspace, finalObj, np.int32(offsety+jpos), np.int32(offsetx+ipos), roisizex, objsizex)
			
			ExitwaveAndBuffer(exitWave, buffer_exitWave, aperture, rspace) # Compute exitwaves
			cu_fft.fft(exitWave,kspace,cufftplan) # kspace = wave at detector
			ApplyDifPad(kspace,DifPad,fcachevector) # replace amplitudes.
			cu_fft.ifft(kspace,exitWave,cufftplan,True)	# new exitwave
				
			errori = np.sum(((exitWave-buffer_exitWave).__abs__()**2).get())
			Fs.append(errori+0)
	return GetMin(Fs,neib)
Ejemplo n.º 32
0
        def thunk():
            input_shape = inputs[0][0].shape
            s = inputs[1][0]

            # Since padding is not supported, assert that last dimension corresponds to
            # input forward transform size.
            assert (input_shape[1:-2] == s[:-1]).all()
            assert ((input_shape[-2] - 1) * 2 + s[-1] % 2 == s[-1]).all()

            # construct output shape
            # chop off the extra length-2 dimension for real/imag
            output_shape = [input_shape[0]] + list(s)
            output_shape = tuple(output_shape)

            z = outputs[0]

            # only allocate if there is no previous allocation of the
            # right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = pygpu.zeros(output_shape,
                                   context=inputs[0][0].context,
                                   dtype="float32")

            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]

            with input_pycuda.context:
                # only initialise plan if necessary
                if plan[0] is None or plan_input_shape[0] != input_shape:
                    plan_input_shape[0] = input_shape
                    plan[0] = fft.Plan(s,
                                       np.complex64,
                                       np.float32,
                                       batch=output_shape[0])

                # Sync GPU variables before computation
                input_pycuda.sync()
                output_pycuda.sync()

                fft.ifft(input_pycuda, output_pycuda, plan[0])
                # strangely enough, enabling rescaling here makes it run
                # very, very slowly, so do this rescaling manually
                # afterwards!

                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()
Ejemplo n.º 33
0
    def test_ffts(self):
        t, tsc, y, err = data()

        yhat = np.empty(len(y))

        yg = gpuarray.to_gpu(y.astype(np.complex128))
        yghat = gpuarray.to_gpu(yhat.astype(np.complex128))

        plan = cufft.Plan(len(y), np.complex128, np.complex128)
        cufft.ifft(yg, yghat, plan)

        yhat = fftpack.ifft(y) * len(y)

        tols = dict(rtol=nfft_rtol, atol=nfft_atol)
        assert_allclose(yhat, yghat.get(), **tols)
Ejemplo n.º 34
0
def ifft2c2c_cuda(x, axes=(0, 1)):
    rank = len(axes)
    x = np.array(x).astype(np.complex64)
    x_gpu = gpuarray.to_gpu(x)
    xf_gpu = gpuarray.empty(x.shape, np.complex64)
    if len(x.shape) > rank:
        batch = np.prod(x.shape[rank:len(x.shape)])
        plan  = Plan(x.shape[0:rank], np.complex64, np.complex64, batch, None, 1, \
        np.array(x.shape[0:rank]).astype(np.int32), np.prod(x.shape[rank:len(x.shape)]), 1, \
        np.array(x.shape[0:rank]).astype(np.int32), np.prod(x.shape[rank:len(x.shape)]), 1 )
    else:
        batch = 1
        plan = Plan(x.shape[0:rank], np.complex64, np.complex64)
    ifft(x_gpu, xf_gpu, plan)
    return xf_gpu.get() / np.prod(x.shape[0:rank])
Ejemplo n.º 35
0
def recon_gpu_2d(kdata):
    print(kdata.shape)
    imageData = np.empty(kdata.shape , np.complex128)

    for slice_idx in range(kdata.shape[2]):
        slice_data = kdata[:,:,slice_idx]
        data_gpu = gpuarray.to_gpu(slice_data)#ndarray to gpu data
        result_gpu = gpuarray.empty(slice_data.shape , np.complex128)
        plan_iverse = cu_fft.Plan(slice_data.shape , np.complex128 , np.complex128)

        cu_fft.ifft(data_gpu , result_gpu , plan_iverse , False)

        result = result_gpu.get()/slice_data.shape[0]/slice_data.shape[1]
        result = np.fft.fftshift(result ,1)
        imageData[:,:,slice_idx] = result
    return abs(imageData)
Ejemplo n.º 36
0
 def poisson_solve(self, rho):
     ''' Solve the poisson equation with the given charge distribution
     Args:
         rho: Charge distribution (same dimensions as mesh)
     Returns:
         Phi (same dimensions as rho)
     '''
     rho = rho.astype(np.complex128)
     self._cpyrho2tmp.set_src_device(rho.gpudata)
     self._cpytmp2rho.set_dst_device(rho.gpudata)
     # set to 0 since it might be filled with the old potential
     self.tmpspace.fill(0)
     self._cpyrho2tmp()
     cu_fft.fft(self.tmpspace, self.tmpspace, plan=self.plan_forward)
     cu_fft.ifft(self.tmpspace * self.fgreentr, self.tmpspace,
                 plan=self.plan_backward)
     # store the result in the rho gpuarray to save space
     self._cpytmp2rho()
     # scale (cuFFT is unscaled)
     phi = rho.real/(2**self.mesh.dimension * self.mesh.n_nodes)
     phi *= self.mesh.volume_elem/(2**(self.mesh.dimension-1)*np.pi*epsilon_0)
     return phi
Ejemplo n.º 37
0
def ifft(invec, outvec, prec, itype, otype):
    cuplan = _get_inv_plan(invec.dtype, outvec.dtype, len(outvec))
    cu_fft.ifft(invec.data, outvec.data, cuplan)
Ejemplo n.º 38
0
 def _solve_kernel_fast(self):
     '''Fast kernel, use when save_memory is False
     '''
     cu_fft.fft(self.tmpspace, self.tmpspace, plan=self.plan_forward)
     cu_fft.ifft(self.tmpspace * self.fgreentr, self.tmpspace,
                 plan=self.plan_backward)
Ejemplo n.º 39
0
psiNonlinear = mod2.get_function("test")
modSquared.prepare(["P", "P", "I"])
psiNonlinear.prepare("FFFPPPI")
block = (16, 16, 1)
grid = (64, 64)

for n in np.arange(N_RUNS):
    start = time.time()

    for step in xrange(N_TIMESTEPS):
        # print step
       # Implementing split-step method
       # Update wavefunction and resovoir, record density
        cu_fft.fft(psi_gpu, psi_gpu, plan_forward)
        psi_gpu *= kineticFactorHalf_gpu
        cu_fft.ifft(psi_gpu, psi_gpu, plan_inverse, scale=True)

        # currentDensity_gpu = abs(psi_gpu) ** 2
        # currentDensity_gpu = psi_gpu.real **2 + psi_gpu.imag ** 2
        currentDensity_gpu = (psi_gpu * psi_gpu.conj()).real
        # modSquared.prepared_call(grid, block, psi_gpu.gpudata,
        #                          currentDensity_gpu.gpudata, 1024)
        # n_gpu *= cumath.exp(-gammaRdt_gpu + Rdt_gpu * currentDensity_gpu)
        n_gpu *= cumath.exp(misc.add(- gammaRdt_gpu,
                                     - misc.multiply(Rdt_gpu, currentDensity_gpu)))
        n_gpu += Pdt_gpu
        psi_gpu *= cumath.exp(
            misc.add(
                misc.add(misc.multiply(expFactorPolFirst_gpu, n_gpu),
                         misc.multiply(expFactorPolSecond_gpu, currentDensity_gpu)),
                expFactorPolThird_gpu))
Ejemplo n.º 40
0
def process_audio_cuda(data):
    global cs, csa, csa_first

    if csa_first == True:
        prepare_audio_filters()
        prepare_audio_cuda()
        csa_first = False

    fdata = np.float32(data)
    gpudata = gpuarray.to_gpu(fdata)

    fft.fft(gpudata, cs["fft1_out"], cs["plan1"])

    cs["left_fft1"] = (cs["fft1_out"] * cs["filt_audio_left"])[
        0 : (ablocklen // 2) + 1
    ]  # [0:blocklen])[0:(ablocklen//2)+1]
    cs["right_fft1"] = (cs["fft1_out"] * cs["filt_audio_right"])[0 : (ablocklen // 2) + 1]

    fft.ifft(cs["left_fft1"], cs["fm_left"], cs["plan1i"], True)
    fft.ifft(cs["right_fft1"], cs["fm_right"], cs["plan1i"], True)

    cs["doanglediff_mac"](
        cs["left_clipped"],
        cs["fm_left"],
        np.float32((afreq_hz / 1.0 / np.pi)),
        np.float32(-SysParams["audio_lfreq"]),
        block=(1024, 1, 1),
        grid=(ablocklenk, 1),
    )
    cs["doanglediff_mac"](
        cs["right_clipped"],
        cs["fm_right"],
        np.float32((afreq_hz / 1.0 / np.pi)),
        np.float32(-SysParams["audio_rfreq"]),
        block=(1024, 1, 1),
        grid=(ablocklenk, 1),
    )

    fft.fft(cs["left_clipped"], cs["left_fft2"], cs["plan2"])
    fft.fft(cs["right_clipped"], cs["right_fft2"], cs["plan2"])

    cs["left_fft2"] *= cs["filt_audiolpf"]
    cs["right_fft2"] *= cs["filt_audiolpf"]

    fft.ifft(cs["left_fft2"], cs["left_out"], cs["plan2i"], True)
    fft.ifft(cs["right_fft2"], cs["right_out"], cs["plan2i"], True)

    aclip = 256

    outlen = ablocklen

    cs["doaudioscale"](
        cs["scaledout"],
        cs["left_out"],
        cs["right_out"],
        np.float32(20),
        np.float32(0),
        block=(32, 1, 1),
        grid=(outlen // 32, 1),
    )

    output = cs["scaledout"].get()[aclip:-aclip]

    return output, len(output) * 80 / 2

    plt.plot(cs["scaledout"].get())

    # 	plt.plot(cs['right_clipped'].get()[768:-768])
    # 	plt.plot(cs['right_out'].get()[768:-768] + 100000)
    plt.show()
    exit()
Ejemplo n.º 41
0
 def execute(self):
     cu_fft.ifft(self.invec, self.outvec, self.plan)
Ejemplo n.º 42
0
print('Testing fft/ifft..')
N = 1024
M = N//2

x = np.asarray(np.random.rand(N, M), np.float32)
xf = np.fft.fft2(x)
y = np.real(np.fft.ifft2(xf))

x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((x.shape[0], x.shape[1]//2+1), np.complex64)
plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)

y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)

print('Success status: %r' % np.allclose(y, y_gpu.get(), atol=1e-6))

print('Testing in-place fft..')
x = np.asarray(np.random.rand(N, M) + 1j * np.random.rand(N, M), np.complex64)
x_gpu = gpuarray.to_gpu(x)

plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64)
cu_fft.fft(x_gpu, x_gpu, plan)

cu_fft.ifft(x_gpu, x_gpu, plan, True)

print('Success status: %r' % np.allclose(x, x_gpu.get(), atol=1e-6))