def __init__(self, size, fftw_flags=['measure', 'unaligned'], **keywords): size = int(size) array1 = np.empty(size, dtype=var.FLOAT_DTYPE) array2 = np.empty(size, dtype=var.FLOAT_DTYPE) fplan = fftw3.Plan(array1, array2, direction='forward', flags= \ fftw_flags, realtypes=['halfcomplex r2c'], nthreads=1) bplan = fftw3.Plan(array1, array2, direction='backward', flags=\ fftw_flags, realtypes=['halfcomplex c2r'], nthreads=1) ifplan = fftw3.Plan(array1, direction='forward', flags=fftw_flags, realtypes=['halfcomplex r2c'], nthreads=1) ibplan = fftw3.Plan(array1, direction='backward', flags=fftw_flags, realtypes=['halfcomplex c2r'], nthreads=1) self.size = size self.fftw_flags = fftw_flags self.fplan = fplan self.bplan = bplan self.ifplan = ifplan self.ibplan = ibplan Operator.__init__(self, **keywords)
def shiftFFT(inp, shift, method="fftw"): """ Do shift using FFTs Shift an array like scipy.ndimage.interpolation.shift(input, shift, mode="wrap", order="infinity") but faster @param input: 2d numpy array @param shift: 2-tuple of float @return: shifted image """ d0, d1 = inp.shape v0, v1 = shift f0 = numpy.fft.ifftshift(numpy.arange(-d0 // 2, d0 // 2)) f1 = numpy.fft.ifftshift(numpy.arange(-d1 // 2, d1 // 2)) m1, m0 = numpy.meshgrid(f1, f0) e0 = numpy.exp(-2j * numpy.pi * v0 * m0 / float(d0)) e1 = numpy.exp(-2j * numpy.pi * v1 * m1 / float(d1)) e = e0 * e1 if method.startswith("fftw") and (fftw3 is not None): input = numpy.zeros((d0, d1), dtype=complex) output = numpy.zeros((d0, d1), dtype=complex) with sem: fft = fftw3.Plan(input, output, direction='forward', flags=['estimate']) ifft = fftw3.Plan(output, input, direction='backward', flags=['estimate']) input[:, :] = inp.astype(complex) fft() output *= e ifft() out = input / input.size else: out = numpy.fft.ifft2(numpy.fft.fft2(inp) * e) return abs(out)
def dog_filter(input_img, sigma1, sigma2, mode="reflect", cval=0.0): """ 2-dimensional Difference of Gaussian filter implemented with FFTw @param input_img: input_img array to filter @type input_img: array-like @param sigma: standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. @type sigma: scalar or sequence of scalars @param mode: {'reflect','constant','nearest','mirror', 'wrap'}, optional The ``mode`` parameter determines how the array borders are handled, where ``cval`` is the value when mode is equal to 'constant'. Default is 'reflect' @param cval: scalar, optional Value to fill past edges of input if ``mode`` is 'constant'. Default is 0.0 """ if 1: # try: sigma = max(sigma1, sigma2) if mode != "wrap": input_img = expand(input_img, sigma, mode, cval) s0, s1 = input_img.shape if isinstance(sigma, (list, tuple)): k0 = int(ceil(4.0 * float(sigma[0]))) k1 = int(ceil(4.0 * float(sigma[1]))) else: k0 = k1 = int(ceil(4.0 * float(sigma))) if fftw3: sum_init = input_img.astype(numpy.float32).sum() fftOut = numpy.zeros((s0, s1), dtype=complex) fftIn = numpy.zeros((s0, s1), dtype=complex) with sem: fft = fftw3.Plan(fftIn, fftOut, direction='forward') ifft = fftw3.Plan(fftOut, fftIn, direction='backward') g2fft = numpy.zeros((s0, s1), dtype=complex) fftIn[:, :] = shift(dog(sigma1, sigma2, (s0, s1)), (s0 // 2, s1 // 2)).astype(complex) fft() g2fft[:, :] = fftOut.conjugate() fftIn[:, :] = input_img.astype(complex) fft() fftOut *= g2fft ifft() out = fftIn.real.astype(numpy.float32) sum_out = out.sum() res = out * sum_init / sum_out else: res = numpy.fft.ifft2(numpy.fft.fft2(input_img.astype(complex)) * \ numpy.fft.fft2(shift(dog(sigma1, sigma2, (s0, s1)), (s0 // 2, s1 // 2)).astype(complex)).conjugate()) if mode == "wrap": return res else: return res[k0:-k0, k1:-k1]
def _alloc_fft(N, _cache={}): if N not in _cache: line = numpy.empty((N, ), dtype=complex) fline = numpy.empty((N, ), dtype=complex) fft = fftw3.Plan(line, fline, direction='forward', flags=['estimate']) ifft = fftw3.Plan(fline, line, direction='backward', flags=['estimate']) _cache[N] = (line, fline), (fft, ifft) return _cache[N]
def ifft2(d): global _fftw3_rev, _fftw3_dat if _fftw3_fwd is None: if _fftw3_dat is None: _fftw3_dat = n.zeros(d.shape, dtype=n.complex) _fftw3_fwd = fftw3.Plan(_fftw3_dat, None, direction='backward', flags=['measure']) elif _fftw3_dat[:].shape != d: _fftw3_dat = n.zeros(d.shape, dtype=n.complex) _fftw3_fwd = fftw3.Plan(_fftw3_dat, None, direction='backward', flags=['measure']) _fftw3_dat[:] = d _fftw3_rev() return _fftw3_dat
def fft(data, nfft=None, in_place=False, use_old_plan=True, **kwargs_in): global _plan if USE_OLD_PLAN: use_old_plan = (USE_OLD_PLAN == 1) # print('USE OLD PLAN %s' % str(use_old_plan)) kwargs = dict(flags=['measure']) kwargs.update(kwargs_in) if nfft == None: nfft = len(data) if _plan is None or (use_old_plan and nfft != len(_plan.inarray)): use_old_plan = False if not use_old_plan: input_ = fftw3.create_aligned_array(nfft) if in_place: output = None else: output = fftw3.create_aligned_array(nfft) _plan = fftw3.Plan(input_, output, **kwargs) _plan.inarray[:len(data)] = data _plan.inarray[len(data):] = 0 _plan.outarray[:] = 0 _plan() if _plan.outarray is None: ret = _plan.inarray else: ret = _plan.outarray return ret
def ifft(data, nfft=None, in_place=False, use_old_plan=True, **kwargs_in): global _plan2 if USE_OLD_PLAN: use_old_plan = (USE_OLD_PLAN == 1) kwargs = dict(direction='backward', flags=['measure']) kwargs.update(kwargs_in) if nfft == None: nfft = len(data) if _plan2 is None or (use_old_plan and nfft != len(_plan2.inarray)): use_old_plan = False if not use_old_plan: input_ = fftw3.create_aligned_array(nfft) if in_place: output = None else: output = fftw3.create_aligned_array(nfft) _plan2 = fftw3.Plan(input_, output, **kwargs) _plan2.inarray[:len(data)] = data _plan2.inarray[len(data):] = 0 _plan2() if _plan2.outarray is None: ret = _plan2.inarray else: ret = _plan2.outarray return ret / nfft
def ifftwn(array, nthreads=1): array = array.astype('complex').copy() outarray = array.copy() fft_backward = fftw3.Plan(array, outarray, direction='backward', flags=['estimate'], nthreads=nthreads) fft_backward.execute() return outarray / np.size(array)
def raw_plot(self, fft_mode): fft_size = 512 bins = [i for i in range(fft_size / 2)] while True: val, seq_num = yield self.raw_x.append(seq_num) self.raw_y.append(val) if seq_num % 32 == 0 and len(self.raw_y) >= fft_size: raw_y_list = list(self.raw_y) inputa = numpy.array(raw_y_list[-fft_size:], dtype=complex) #hann_window = numpy.hanning(fft_size) #inputa = inputa * hann_window #inputa = inputa * flattop_window outputa = numpy.zeros(fft_size, dtype=complex) if fft_mode == '1': fft = fftw3.Plan(inputa, outputa, direction='forward', flags=['estimate']) fft.execute() outputa = (numpy.log10(numpy.abs(outputa)) * 20)[:fft_size / 2] self.rd_fftplot.plot(bins, outputa, clear=True) self.rawplot.plot(self.raw_x, self.raw_y, clear=True) pg.QtGui.QApplication.processEvents()
def shiftFFT(input_img, shift_val, method="fftw"): """ Do shift using FFTs Shift an array like scipy.ndimage.interpolation.shift(input, shift, mode="wrap", order="infinity") but faster @param input_img: 2d numpy array @param shift_val: 2-tuple of float @return: shifted image """ # TODO: understand why this is needed ! if "has_fftw3" not in dir(): has_fftw3 = ("fftw3" in sys.modules) if "has_fftw3" and ("fftw3" not in dir()): fftw3 = sys.modules.get("fftw3") else: fftw3 = None # print fftw3 d0, d1 = input_img.shape v0, v1 = shift_val f0 = numpy.fft.ifftshift(numpy.arange(-d0 // 2, d0 // 2)) f1 = numpy.fft.ifftshift(numpy.arange(-d1 // 2, d1 // 2)) m1, m0 = numpy.meshgrid(f1, f0) e0 = numpy.exp(-2j * numpy.pi * v0 * m0 / float(d0)) e1 = numpy.exp(-2j * numpy.pi * v1 * m1 / float(d1)) e = e0 * e1 if method.startswith("fftw") and (fftw3 is not None): input_ = numpy.zeros((d0, d1), dtype=complex) output = numpy.zeros((d0, d1), dtype=complex) with sem: fft = fftw3.Plan(input_, output, direction='forward', flags=['estimate']) ifft = fftw3.Plan(output, input_, direction='backward', flags=['estimate']) input_[:, :] = input_img.astype(complex) fft() output *= e ifft() out = input_ / input_.size else: out = numpy.fft.ifft2(numpy.fft.fft2(input_img) * e) return abs(out)
def init(self, n, measure, outn): inp = fftw3.create_aligned_array(n, dtype=complex) outp = fftw3.create_aligned_array(n, dtype=complex) plan = fftw3.Plan(inp, outp, direction='backward', flags=('measure' if measure else 'estimate', )) return (plan, None, lambda x: x / len(x))
def fft2(d): global _fftw3_fwd, _fftw3_dat if _fftw3_fwd is None: if _fftw3_dat is None: _fftw3_dat = n.zeros(d.shape, dtype=n.complex) _fftw3_fwd = fftw3.Plan(_fftw3_dat, None, direction='forward', flags=['measure']) _fftw3_dat[:] = d _fftw3_fwd() return _fftw3_dat
def init(self, n, measure, outn): inp = fftw3.create_aligned_array(n, dtype=float) outp = fftw3.create_aligned_array(n // 2 + 1, dtype=complex) plan = fftw3.Plan(inp, outp, direction='forward', realtypes='halfcomplex r2c', flags=('measure' if measure else 'estimate', )) return (plan, None, None)
def fftw2(array, nthreads=1): array = array.astype('complex') outarray = array.copy() fft_forward = fftw3.Plan(array, outarray, direction='forward', flags=['estimate'], nthreads=nthreads) fft_forward() return outarray
def wifftn(array, nthreads=4): array = array.astype('complex') outarray = array.copy() fft_backward = fftw3.Plan(array, outarray, direction='backward', flags=['estimate'], nthreads=nthreads) fft_backward() return outarray
def init(self, n, measure, outn): inp = fftw3.create_aligned_array(n, dtype=complex) outp = fftw3.create_aligned_array(outn if outn is not None else (n - 1) // 2, dtype=float) plan = fftw3.Plan(inp, outp, direction='backward', realtypes='halfcomplex c2r', flags=('measure' if measure else 'estimate', )) return (plan, lambda x: x[:n], lambda x: x / len(x))
def make_plan(image_array, rigor): global timing t0 = time.time() input_array = numpy.empty(image_array.shape, numpy.float) fftshape = image_array.shape[0], image_array.shape[1] / 2 + 1 fft_array = numpy.empty(fftshape, dtype=complex) plan_kwargs = dict(pyami.fft.calc_fftw3.global_plan_kwargs) plan_kwargs['flags'] = [rigor] p = fftw3.Plan(input_array, fft_array, direction='forward', **plan_kwargs) p.input_array = input_array p.fft_array = fft_array timing['plan'].append(time.time() - t0) return p
def __init__(self, fft_filter, ncorrelations, nsamples, fftw_flags=['measure', 'unaligned']): filter_length = fft_filter.shape[-1] array = np.empty(filter_length, dtype=var.FLOAT_DTYPE) self.fft_filter = fft_filter self.filter_length = filter_length self.left = ncorrelations self.right = filter_length - nsamples - ncorrelations self.fftw_flags = fftw_flags self.fplan = fftw3.Plan(array, direction='forward', flags=fftw_flags, realtypes=['halfcomplex r2c'], nthreads=1) self.bplan = fftw3.Plan(array, direction='backward', flags=fftw_flags, realtypes=['halfcomplex c2r'], nthreads=1) Operator.__init__(self, shapein=(fft_filter.shape[0], nsamples))
def __call__(self,u,dx): """returns the derivative. Parameters --------- u : tslice.timeslice the data to be differentiated. dx : float the spatial step size. """ if self.u is None: self.u = np.empty_like(u) self.ufft = np.empty((math.floor((u.shape[0]/2)+1),), dtype = np.dtype(np.complex128)) self.dufft = np.empty_like(self.ufft) self.du = np.empty_like(u) self.fft = fftw.Plan(self.u,self.ufft,direction="forward") self.ifft = fftw.Plan(self.dufft,self.du,direction="backward") self.fft.guru_execute_dft(u,self.ufft) if self.dufreq is None: ufreq = np.array([self._compute_freq(i,self.ufft.shape[0],dx[0]) for i in range(self.ufft.shape[0])]) self.dufreq = np.power(2*np.pi*1j*ufreq, self.order) self.dufft = self.dufreq*self.ufft self.ifft.guru_execute_dft(self.dufft,self.du) return self.du/u.shape[0]
import fftw3 import numpy as np from scipy.constants import pi from scipy import signal import matplotlib.pyplot as plt T_start = -10 #Beginning Time T_end = 10 #Ending time T = T_end - T_start #Total time num = 1000 #Number of samples inputa = np.zeros(num, dtype=complex) outputa = np.zeros(num, dtype=complex) timea = np.linspace(T_start, T_end, num) #Time domain sa = np.linspace(0, T, num) #Samples freqa = 2 * pi * sa / T #Frequency domain c = 0 mean = 5 stdev = 0.010 ''' for i in list: #elt=np.exp(-(i-mean)**2/(2*stdev**2)) elt=np.sin(i) inputa[c]+=elt c+=1 ''' inputa = 2 * pi * np.sin(timea) fft = fftw3.Plan(inputa, outputa, direction='forward', flags=['measure']) #ifft=fftw3.Plan(outputa, inputa, direction='backward', flags=['measure']) fftw3.execute(fft) plt.plot(freqa, abs(outputa)) plt.show()
def gaussian_filter(input_img, sigma, mode="reflect", cval=0.0): """ 2-dimensional Gaussian filter implemented with FFTw @param input_img: input array to filter @type input_img: array-like @param sigma: standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. @type sigma: scalar or sequence of scalars @param mode: {'reflect','constant','nearest','mirror', 'wrap'}, optional The ``mode`` parameter determines how the array borders are handled, where ``cval`` is the value when mode is equal to 'constant'. Default is 'reflect' @param cval: scalar, optional Value to fill past edges of input if ``mode`` is 'constant'. Default is 0.0 """ res = None # TODO: understand why this is needed ! if "has_fftw3" not in dir(): has_fftw3 = ("fftw3" in sys.modules) if has_fftw3: try: if isinstance(sigma, (list, tuple)): sigma = (float(sigma[0]), float(sigma[1])) else: sigma = (float(sigma), float(sigma)) k0 = int(ceil(4.0 * float(sigma[0]))) k1 = int(ceil(4.0 * float(sigma[1]))) if mode != "wrap": input_img = expand(input_img, (k0, k1), mode, cval) s0, s1 = input_img.shape sum_init = input_img.astype(numpy.float32).sum() fftOut = numpy.zeros((s0, s1), dtype=complex) fftIn = numpy.zeros((s0, s1), dtype=complex) with sem: fft = fftw3.Plan(fftIn, fftOut, direction='forward') ifft = fftw3.Plan(fftOut, fftIn, direction='backward') g0 = gaussian(s0, sigma[0]) g1 = gaussian(s1, sigma[1]) g0 = numpy.concatenate( (g0[s0 // 2:], g0[:s0 // 2])) # faster than fftshift g1 = numpy.concatenate( (g1[s1 // 2:], g1[:s1 // 2])) # faster than fftshift g2 = numpy.outer(g0, g1) g2fft = numpy.zeros((s0, s1), dtype=complex) fftIn[:, :] = g2.astype(complex) fft() g2fft[:, :] = fftOut.conjugate() fftIn[:, :] = input_img.astype(complex) fft() fftOut *= g2fft ifft() out = fftIn.real.astype(numpy.float32) sum_out = out.sum() res = out * sum_init / sum_out if mode != "wrap": res = res[k0:-k0, k1:-k1] except MemoryError: logging.error("MemoryError in FFTw3 part. Falling back on Scipy") if res is None: has_fftw3 = False res = scipy.ndimage.filters.gaussian_filter(input_img, sigma, mode=(mode or "reflect")) return res
def sample_defrost_gpu(lat, func, gamma, m2_eff): """Calculates a sample of random values in the lattice lat = Lattice func = name of Cuda kernel n = size of cubic lattice gamma = -0.25 or +0.25 m2_eff = effective mass This uses CuFFT to calculate FFTW. """ import scikits.cuda.fft as fft import fftw3 "Various constants:" mpl = lat.mpl n = lat.n nn = lat.nn os = 16 nos = n * pow(os, 2) dk = lat.dk dx = lat.dx dkos = dk / (2. * os) dxos = dx / os kcut = nn * dk / 2.0 norm = 0.5 / (math.sqrt(2 * pi * dk**3.) * mpl) * (dkos / dxos) ker = np.empty(nos, dtype=lat.prec_real) fft1 = fftw3.Plan(ker, ker, direction='forward', flags=['measure'], realtypes=['realodd 10']) for k in xrange(nos): kk = (k + 0.5) * dkos ker[k] = kk * (kk**2. + m2_eff)**gamma * math.exp(-(kk / kcut)**2.) fft1.execute() fftw3.destroy_plan(fft1) for k in xrange(nos): ker[k] = norm * ker[k] / (k + 1) Fk_gpu = gpuarray.zeros((n / 2 + 1, n, n), dtype=lat.prec_complex) ker_gpu = gpuarray.to_gpu(ker) tmp_gpu = gpuarray.zeros((n, n, n), dtype=lat.prec_real) plan = fft.Plan(tmp_gpu.shape, lat.prec_real, lat.prec_complex) plan2 = fft.Plan(tmp_gpu.shape, lat.prec_complex, lat.prec_real) func(tmp_gpu, ker_gpu, np.uint32(nn), np.float64(os), np.uint32(lat.dimx), np.uint32(lat.dimy), np.uint32(lat.dimz), block=lat.cuda_block_1, grid=lat.cuda_grid) fft.fft(tmp_gpu, Fk_gpu, plan) if lat.test == True: print 'Testing mode on! Set testQ to False to disable this.\n' np.random.seed(1) rr1 = (np.random.normal(size=Fk_gpu.shape) + np.random.normal(size=Fk_gpu.shape) * 1j) Fk = Fk_gpu.get() Fk *= rr1 Fk_gpu = gpuarray.to_gpu(Fk) fft.ifft(Fk_gpu, tmp_gpu, plan2) res = (tmp_gpu.get()).astype(lat.prec_real) res *= 1. / lat.VL return res
bias=1.1136226513850853 factor=0.308**0.6 beta=factor/bias ################################################################################ if rank==0: import time print '='*80 print 'displacement' t0=time.time() smooth_x=Tide.LoadDataOfhdf5(Outfile+'0.000den00_s1.25.hdf5') halo=Tide.LoadData(Input) halo=np.array(halo,dtype=np.float64) smooth_k=np.empty((N,N,N/2+1),dtype=np.complex128) s_k=np.empty((N,N,N/2+1),dtype=np.complex128) fft=fftw.Plan(inarray=smooth_x,outarray=smooth_k,direction='forward',nthreads=nthreads) fftw.execute(fft) fftw.destroy_plan(fft) comm.Scatter(smooth_k,recvdata_k1,root=0) #deltak comm.Scatter(halo,mpi_halo,root=0) #deltak k_mag=(mpi_fn[rank][:,None,None]**2.+fn[None,:,None]**2.+fnc[None,None,:]**2)**(1./2.) kp=(np.zeros_like(mpi_fn[rank])[:,None,None]**2.+np.zeros_like(fn)[None,:,None]**2.+fnc[None,None,:]**2)**(1./2.) mu=kp/k_mag senddata_k1=-1j/bias/k_mag*recvdata_k1/(1+beta*mu**2) comm.Gather(senddata_k1,s_k,root=0) if rank==0: s_x=np.empty((N,N,N),dtype=np.float64) ifft=fftw.Plan(inarray=s_k,outarray=s_x,direction='backward',nthreads=nthreads) fftw.execute(ifft)
def measure_offset(img1, img2, method="fftw", withLog=False, withCorr=False): """ Measure the actual offset between 2 images @param img1: ndarray, first image @param img2: ndarray, second image, same shape as img1 @param withLog: shall we return logs as well ? boolean @param _shared: DO NOT USE !!! @return: tuple of floats with the offsets """ method = str(method) ################################################################################ # Start convolutions ################################################################################ shape = img1.shape logs = [] assert img2.shape == shape t0 = time.time() if method[:4] == "fftw" and (fftw3 is not None): input = numpy.zeros(shape, dtype=complex) output = numpy.zeros(shape, dtype=complex) with sem: fft = fftw3.Plan(input, output, direction='forward', flags=['measure']) ifft = fftw3.Plan(output, input, direction='backward', flags=['measure']) input[:, :] = img2.astype(complex) fft() temp = output.conjugate() input[:, :] = img1.astype(complex) fft() output *= temp ifft() res = input.real / input.size if method[:4] == "cuda" and (cu_fft is not None): with sem: cuda_correlate = CudaCorrelate(shape) res = cuda_correlate.correlate(img1, img2) else: #use numpy fftpack i1f = numpy.fft.fft2(img1) i2f = numpy.fft.fft2(img2) res = numpy.fft.ifft2(i1f * i2f.conjugate()).real t1 = time.time() ################################################################################ # END of convolutions ################################################################################ offset1 = maximum_position(res) res = shift(res, (shape[0] // 2, shape[1] // 2)) mean = res.mean(dtype="float64") maxi = res.max() std = res.std(dtype="float64") SN = (maxi - mean) / std new = numpy.maximum(numpy.zeros(shape), res - numpy.ones(shape) * (mean + std * SN * 0.9)) com2 = center_of_mass(new) logs.append("MeasureOffset: fine result of the centered image: %s %s " % com2) offset2 = ((com2[0] - shape[0] // 2) % shape[0], (com2[1] - shape[1] // 2) % shape[1]) delta0 = (offset2[0] - offset1[0]) % shape[0] delta1 = (offset2[1] - offset1[1]) % shape[1] if delta0 > shape[0] // 2: delta0 -= shape[0] if delta1 > shape[1] // 2: delta1 -= shape[1] if (abs(delta0) > 2) or (abs(delta1) > 2): logs.append( "MeasureOffset: Raw offset is %s and refined is %s. Please investigate !" % (offset1, offset2)) listOffset = list(offset2) if listOffset[0] > shape[0] // 2: listOffset[0] -= shape[0] if listOffset[1] > shape[1] // 2: listOffset[1] -= shape[1] offset = tuple(listOffset) t2 = time.time() logs.append("MeasureOffset: fine result: %s %s" % offset) logs.append("MeasureOffset: execution time: %.3fs with %.3fs for FFTs" % (t2 - t0, t1 - t0)) if withLog: if withCorr: return offset, logs, new else: return offset, logs else: if withCorr: return offset, new else: return offset
def sample_defrost_cpu(lat, func, gamma, m2_eff): """Calculates a sample of random values in the lattice. Taken from Defrost-program. func = name of Cuda kernel n = size of cubic lattice gamma = -0.25 or +0.25 m2_eff = effective mass This uses numpy to calculate FFTW. """ import fftw3 "Various constants:" mpl = lat.mpl n = lat.n nn = lat.nn os = 16 nos = n * pow(os, 2) dk = lat.dk dx = lat.dx dkos = dk / (2. * os) dxos = dx / os kcut = nn * dk / 2.0 norm = 0.5 / (math.sqrt(2 * pi * dk**3.) * mpl) * (dkos / dxos) ker = np.empty(nos, dtype=np.float64) fft = fftw3.Plan(ker, ker, direction='forward', flags=['measure'], realtypes=['realodd 10']) for k in xrange(nos): kk = (k + 0.5) * dkos ker[k] = (kk * (kk**2. + m2_eff)**gamma) * math.exp(-(kk / kcut)**2.) fft.execute() fftw3.destroy_plan(fft) for k in xrange(nos): ker[k] = norm * ker[k] / (k + 1) l0 = int(np.floor(np.sqrt(3) * n / 2 * os)) tmp = np.zeros((n, n, n), dtype=np.float64) Fk = np.zeros((n, n, n / 2 + 1), dtype=np.complex128) ker_gpu = gpuarray.to_gpu(ker) tmp_gpu = gpuarray.to_gpu(tmp) func(tmp_gpu, ker_gpu, np.uint32(nn), np.float64(os), np.uint32(lat.dimx), np.uint32(lat.dimy), np.uint32(lat.dimz), block=lat.cuda_block_1, grid=lat.cuda_grid) tmp += tmp_gpu.get() Fk = np.fft.rfftn(tmp) if lat.test == True: print 'Testing mode on! Set testQ to False to disable this.\n' np.random.seed(1) rr1 = np.random.normal( size=Fk.shape) + np.random.normal(size=Fk.shape) * 1j Fk *= rr1 tmp = np.fft.irfftn(Fk) ker_gpu.gpudata.free() tmp_gpu.gpudata.free() return tmp
def fft_comparison_tests(size=2048, dtype=np.complex128, byte_align=False): """ Compare speed and test the API of pyFFTW3 and PyFFTW which are, somewhat surprisingly, completely different independent modules""" test_array = np.ones((size, size), dtype=dtype) test_array[size * 3 // 8:size * 5 // 8, size * 3 // 8:size * 5 // 8] = 1 # square aperture oversampling 2... ncores = multiprocessing.cpu_count() pl.clf() for FFT_direction in ['forward']: #,'backward']: print("Array size: {1} x {1}\nDirection: {2}\ndtype: {3}\nncores: {4}". format(0, size, FFT_direction, dtype, ncores)) print("") # Let's first deal with some planning to make sure wisdom is generated ahead of time. for i, fft_type in enumerate(['numpy', 'pyfftw3', 'pyfftw']): # planning using PyFFTW3 p0 = time.time() print("Now planning " + fft_type + " in the " + FFT_direction + " direction") #if (test_array.shape, FFT_direction) not in _FFTW3_INIT.keys(): if fft_type == 'numpy': print("\tno planning required") elif fft_type == 'pyfftw3': fftplan = fftw3.Plan(test_array.copy(), None, nthreads=ncores, direction=FFT_direction, flags=['measure']) else: pyfftw.interfaces.cache.enable() pyfftw.interfaces.cache.set_keepalive_time(30) if byte_align: test_array = pyfftw.n_byte_align_empty((size, size), 16, dtype=dtype) test_array = pyfftw.interfaces.numpy_fft.fft2( test_array, overwrite_input=True, planner_effort='FFTW_MEASURE', threads=ncores) p1 = time.time() print("\tTime elapsed planning: {0:.4f} s".format(p1 - p0)) print("") # Now let's run some FFTs for i, fft_type in enumerate(['numpy', 'pyfftw3', 'pyfftw']): # display if fft_type == 'pyfftw' and byte_align: test_array = pyfftw.n_byte_align_empty((size, size), 16, dtype=dtype) output_array = pyfftw.n_byte_align_empty((size, size), 16, dtype=dtype) test_array[:, :] = 0 else: test_array = np.zeros((size, size), dtype=np.complex128) test_array[size * 3 // 8:size * 5 // 8, size * 3 // 8:size * 5 // 8] = 1 # square aperture oversampling 2... pl.subplot(2, 3, 1 + i) pl.imshow(np.abs(test_array), vmin=0, vmax=1) pl.title("FFT type: {0:10s} input array".format(fft_type)) pl.draw() # actual timed FFT section starts here: t0 = time.time() if fft_type == 'numpy': test_array = np.fft.fft2(test_array) elif fft_type == 'pyfftw3': fftplan = fftw3.Plan(test_array, None, nthreads=multiprocessing.cpu_count(), direction=FFT_direction, flags=['measure']) fftplan.execute() # execute the plan elif fft_type == 'pyfftw': test_array = pyfftw.interfaces.numpy_fft.fft2( test_array, overwrite_input=True, planner_effort='FFTW_MEASURE', threads=ncores) t1 = time.time() if FFT_direction == 'forward': test_array = np.fft.fftshift(test_array) # display t_elapsed = t1 - t0 summarytext = "FFT type: {0:10s}\tTime Elapsed: {3:.4f} s".format( fft_type, size, FFT_direction, t_elapsed) print(summarytext) pl.subplot(2, 3, 1 + i + 3) psf = np.real(test_array * np.conjugate(test_array)) norm = matplotlib.colors.LogNorm(vmin=psf.max() * 1e-6, vmax=psf.max()) cmap = matplotlib.cm.jet cmap.set_bad((0, 0, 0.5)) pl.imshow(psf[size * 3 // 8:size * 5 // 8, size * 3 // 8:size * 5 // 8], norm=norm) pl.title(summarytext) pl.draw() pl.show(False) pl.pause(0.1)
kappak=None deltak=None delta_k=np.empty((N/size,N,N/2+1),dtype=np.complex128) kappa_k=np.empty((N/size,N,N/2+1),dtype=np.complex128) window_k= np.sinc(1./N*mpi_fn[rank][:,None,None])*np.sinc(1./N*fn[None,:,None])*np.sinc(1./N*fnc[None,None,:]) ########################## Load data ############################################ if rank==0: print '='*80 print 'Cal wkkappa' deltax=np.linspace(0,N,N**3).reshape(N,N,N) change=np.array(Tide.LoadData(Input2),dtype=np.float64) deltax[:]=change[:] deltax=np.array(deltax,dtype=np.float64) del change deltak=np.empty((N,N,N/2+1),dtype=np.complex128) fft=fftw.Plan(inarray=deltax,outarray=deltak,direction='forward',nthreads=nthreads) fftw.execute(fft) fftw.destroy_plan(fft) kappax=Tide.LoadDataOfhdf5(dir+'kappa3dx1.25.hdf5') kappak=np.empty((N,N,N/2+1),dtype=np.complex128) fft=fftw.Plan(inarray=kappax,outarray=kappak,direction='forward',nthreads=nthreads) fftw.execute(fft) fftw.destroy_plan(fft) comm.Scatter(deltak,delta_k,root=0) #deltak comm.Scatter(kappak,kappa_k,root=0) #deltak delta_k/=window_k binlog=np.linspace(0,np.log10(512),bins,endpoint=False) dbinlog=binlog[2]-binlog[1] binlog=np.hstack((binlog,binlog[-1]+dbinlog)) bin=10**binlog
for i in range(Ntest): b3 = anfft.fft(a0) a2 = anfft.ifft(b3) t2b = time.time() print np.sum(a2) t3 = time.time() # create a forward and backward fft plan a = fftw3.create_aligned_array(a0.shape, np.complex128, 16) b = fftw3.create_aligned_array(a0.shape, np.complex128, 16) c = fftw3.create_aligned_array(a0.shape, np.complex128, 16) a[:] = a0 ft3fft = fftw3.Plan(a, b, direction='forward', flags=['measure']) ft3ifft = fftw3.Plan(b, c, direction='backward', flags=['measure']) t4 = time.time() for i in range(Ntest): #perform a forward transformation ft3fft() # alternatively fft.execute() or fftw.execute(fft) #perform a backward transformation ft3ifft() t4b = time.time() print np.sum(c)
k = (mpi_fn[rank][:, None, None]**2. + fn[None, :, None]**2. + fnc[None, None, :]**2)**(1. / 2.) window_k = np.sinc(1. / N * mpi_fn[rank][:, None, None]) * np.sinc( 1. / N * fn[None, :, None]) * np.sinc(1. / N * fnc[None, None, :]) if rank == 0: t0 = time.time() ##################################Wdeltag######################################## print '=' * 80 print 'starting cal wdengx wdengy' deltagw1 = np.empty((N, N, N / 2 + 1), dtype=np.complex128) deltagw2 = np.empty((N, N, N / 2 + 1), dtype=np.complex128) deltax1 = np.empty_like(deltax, dtype=np.float64) deltax2 = np.empty_like(deltax, dtype=np.float64) deltak = np.empty((N, N, N / 2 + 1), dtype=np.complex128) fft = fftw.Plan(inarray=deltax, outarray=deltak, direction='forward', nthreads=nthreads) fftw.execute(fft) fftw.destroy_plan(fft) k[0, 0, 0] = 10**-4 / Kf comm.Scatter(deltak, recvdata_k1, root=0) #deltak smoothed log W = wk(k * Kf) if rank == 0: W[0, 0, 0] = 1 deltak1 = recvdata_k1 * W * 1j * Kf * (mpi_fn[rank][:, None, None] + np.zeros_like(fn)[None, :, None] + np.zeros_like(fnc)[None, None, :]) deltak2 = recvdata_k1 * W * 1j * Kf * ( np.zeros_like(mpi_fn[rank])[:, None, None] + fn[None, :, None] + np.zeros_like(fnc)[None, None, :]) comm.Gather(deltak1, deltagw1, root=0)
senddata_k1=np.empty((N/(size),N,N/2+1),dtype=np.complex128) wk=Tide.Get_wk() if rank==0: import time deltax=np.linspace(0,N,N**3).reshape(N,N,N) change=np.array(Tide.LoadData(Input),dtype=np.float64) deltax[:]=change[:] deltax=np.array(deltax,dtype=np.float64) del change ###################################smooth####################################### print '='*80 print 'smoothing...' t0=time.time() deltak=np.empty((N,N,N/2+1),dtype=np.complex128) fft=fftw.Plan(inarray=deltax,outarray=deltak,direction='forward',nthreads=nthreads) fftw.execute(fft) fftw.destroy_plan(fft) smooth_k=np.empty((N,N,N/2+1),dtype=np.complex128) k=(mpi_fn[rank][:,None,None]**2.+fn[None,:,None]**2.+fnc[None,None,:]**2)**(1./2.) window_k= np.sinc(1./N*mpi_fn[rank][:,None,None])*np.sinc(1./N*fn[None,:,None])*np.sinc(1./N*fnc[None,None,:]) comm.Scatter(deltak,recvdata_k1,root=0) #deltak senddata_k1=recvdata_k1*np.exp(-0.5*Kf*Kf*k*k*Sigma**2)/window_k #smooth_k comm.Gather(senddata_k1,smooth_k,root=0) if rank==0: ifft=fftw.Plan(inarray=smooth_k,outarray=deltax,direction='backward',nthreads=nthreads) fftw.execute(ifft) fftw.destroy_plan(ifft) deltax/=N**3 # smoothed print 'smoothing end, time: %dm %ds',%((time.time()-t0)/60,(time.time()-t0)%60) t0=time.time()