def test_next_fast_len_strict(self): strict_test_cases = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 11: 11, 13: 13, 14: 14, 15: 15, 16: 16, 17: 18, 1021: 1024, # 2 * 3 * 5 * 7 * 11 2310: 2310, 2310 - 1: 2310, # 2 * 3 * 5 * 7 * 13 2730: 2730, 2730 - 1: 2730, # 2**2 * 3**2 * 5**2 * 7**2 * 11 485100: 485100, 485100-1: 485100, # 2**2 * 3**2 * 5**2 * 7**2 * 13 573300: 573300, 573300-1: 573300, # more than one multiple of 11 or 13 is not accepted # 2 * 3 * 5 * 7 * 11**2 25410: 25872, # 2 * 3 * 5 * 7 * 13**2 35490: 35672, # 2 * 3 * 5 * 7 * 11 * 13 30030: 30576, } for x, y in strict_test_cases.items(): assert_equal(pyfftw.next_fast_len(x), y)
def test_next_fast_len(self): def nums(): for j in range(1, 1000): yield j yield 2**5 * 3**5 * 4**5 + 1 for n in nums(): m = pyfftw.next_fast_len(n) msg = "n=%d, m=%d" % (n, m) assert_(m >= n, msg) # check regularity k = m num11 = num13 = 0 # These factors come from the description in the FFTW3 docs: # http://fftw.org/fftw3_doc/Complex-DFTs.html#Complex-DFTs for d in [2, 3, 5, 7, 11, 13]: while True: a, b = divmod(k, d) if b == 0: k = a if d in [11, 13]: # only allowed to match 11 or 13 once if num11 > 0 or num13 > 0: break if d == 11: num11 += 1 else: num13 += 1 else: break assert_equal(k, 1, err_msg=msg)
def prepareFFTW(self, volumeSize): pyfftw.interfaces.cache.disable() fastLenghtT = pyfftw.next_fast_len(self.spaceSamples) fastLenghtV = pyfftw.next_fast_len(volumeSize) pyfftw.config.NUM_THREADS = cpu_count() pyfftw.config.PLANNER_EFFORT = 'FFTW_ESTIMATE' dataSize = (fastLenghtT, fastLenghtT, fastLenghtV) outputMatrixA = pyfftw.empty_aligned(dataSize, dtype='complex128', n=16) outputMatrixB = pyfftw.empty_aligned(dataSize, dtype='complex128', n=16) return outputMatrixA, outputMatrixB
def pwr2345(n): # If pyfftw has been installed, next_fast_len would return the len of best performance try: import pyfftw best = pyfftw.next_fast_len(n) except ImportError: number = numpy.array([2, 3, 4, 5]) ex = numpy.ceil(numpy.log(n) / numpy.log(number)).astype('int') best = min(numpy.power(number[:], ex[:])) return best
def padrightside(nbins): """ Returns pad_width for padding at the right side given a value of ``nbins`` The pad_width is calculated with ``next_fast_len`` function from `PyFFTW` package """ # ~ nextPower = nextpoweroftwo(nbins) # ~ nextPower = nextpow2(nbins) # ~ nextPower = fftpack.next_fast_len(nbins) nextPower = pyfftw.next_fast_len(nbins) deficit = int(nextPower - nbins) # ~ deficit = int(np.power(2, nextPower) - nbins) return deficit
def fft_convergence(self, max_res, n_res, N, period, n_iter=3): N = to_vec2(N) max_res = to_vec2(max_res) res = bm.Vector2d(np.logspace(np.log(2 * N.x - 1), np.log(max_res.x), n_res + 1, base=np.e), np.logspace(np.log(2 * N.y - 1), np.log(max_res.y), n_res + 1, base=np.e)) DT = np.zeros(n_res) D = np.zeros(n_res) self.resolution = res[0] size = period * self.resolution size.x = next_fast_len(int(size.x)) size.y = next_fast_len(int(size.y)) self.resolution = size / period _, grid = self.grid(period=period, feature='eps') self.__ffts(grid, N) for i, r in enumerate(res[1:]): EPS = self._fft_eps.copy() EPSxy = self._fft_eps_ix.copy() EPSyx = self._fft_eps_iy.copy() self.resolution = r size = period * self.resolution size.x = next_fast_len(int(size.x)) size.y = next_fast_len(int(size.y)) self.resolution = size / period _, grid = self.grid(period=period, feature='eps') t0 = time.time() for _ in range(n_iter): self.__ffts(grid, N) t1 = time.time() DT[i] = (t1 - t0) / n_iter d_eps = la.norm(self._fft_eps - EPS) d_eps_x = la.norm(self._fft_eps_ix - EPSxy) d_eps_y = la.norm(self._fft_eps_iy - EPSyx) D[i] = max(d_eps, d_eps_x, d_eps_y) print("Sim " + str(i + 1) + ": res = " + str(r) + "\nTime = " + str(round(DT[i], 3)) + ", diff = " + str(round(D[i], 5))) return (D, DT)
def compute_eigs(self, freq, k, period, N): N_t = N.x * N.y if (self.period != period or self.N != N or self.freq != freq or self.k != k): if self.period != period or self.N != N or self.dispersive: if self.resolution: size = period * self.resolution size.x = next_fast_len(int(size.x)) size.y = next_fast_len(int(size.y)) res = size / period else: size = (2 * N - 1) size.x = next_fast_len(int(size.x)) size.y = next_fast_len(int(size.y)) res = size / period if not self.shapes: self._fft_eps = self.material.get('eps', freq) *\ np.eye(N_t) self._fft_eps_ix = self.material.get('eps', freq) *\ np.eye(N_t) self._fft_eps_iy = self.material.get('eps', freq) *\ np.eye(N_t) else: _, grid = self.grid(res, period, freq, 'eps') self.__ffts(grid, N) self._res = res self._period = period self._N = N k0 = 2 * np.pi * freq m = bm.Vector2d(np.arange(-(N.x // 2), N.x // 2 + 1, dtype=int), np.arange(-(N.y // 2), N.y // 2 + 1, dtype=int)) ki = k0 * k + 2 * np.pi * m / period K = 1j * ki.grid().flatten().diag() / k0 self.__eigs(freq, K) self._k = k
def __init__(self, len1: int, len2: int, fftw_threads: int = 5): # Check that input sizes are compatible with 'valid' mode self.switch_inputs = len2 > len1 if self.switch_inputs: len1, len2 = len2, len1 self.len1 = len1 self.len2 = len2 # Speed up FFT by zero-padding to optimal size for FFTW self.fast_len = pyfftw.next_fast_len(len1 + len2 - 1) self.padding_in1 = np.zeros(self.fast_len - self.len1) self.padding_in2 = np.zeros(self.fast_len - self.len2) # Compute the slice containing the valid convolution results self.valid_len = len1 - len2 + 1 idx_start = (2 * len2 - 2) // 2 self.valid_slice = slice(idx_start, idx_start + self.valid_len) # Create the FFTW plans # fmt: off fast_len2 = self.fast_len // 2 + 1 self._rfft_in1 = pyfftw.empty_aligned(self.fast_len, dtype="float64") self._rfft_in2 = pyfftw.empty_aligned(self.fast_len, dtype="float64") self._rfft_out1 = pyfftw.empty_aligned(fast_len2, dtype="complex128") self._rfft_out2 = pyfftw.empty_aligned(fast_len2, dtype="complex128") self._irfft_in = pyfftw.empty_aligned(fast_len2, dtype="complex128") self._irfft_out = pyfftw.empty_aligned(self.fast_len, dtype="float64") # fmt: on print("Creating FFTW plans for convolution...", end="") sys.stdout.flush() p = { "flags": ("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), "threads": fftw_threads, } self._fftw_rfft1 = pyfftw.FFTW(self._rfft_in1, self._rfft_out1, **p) self._fftw_rfft2 = pyfftw.FFTW(self._rfft_in2, self._rfft_out2, **p) self._fftw_irfft = pyfftw.FFTW( self._irfft_in, self._irfft_out, direction="FFTW_BACKWARD", **p, ) print(" done.")
def __init__(self, len_in1, len_in2): # Check that input sizes are compatible with 'valid' mode self.switch_inputs = (len_in2 > len_in1) if self.switch_inputs: len_in1, len_in2 = len_in2, len_in1 self.len_in1 = len_in1 self.len_in2 = len_in2 # Speed up FFT by zero-padding to optimal size for FFTW self.shape = len_in1 + len_in2 - 1 self.fshape = pyfftw.next_fast_len(self.shape) self.zero_pad_in1 = np.zeros(self.fshape - self.len_in1) self.zero_pad_in2 = np.zeros(self.fshape - self.len_in2) # Valid convolve results self.newshape = len_in1 - len_in2 + 1 idx_start = (2 * len_in2 - 2) // 2 self.valid_slice = slice(idx_start, idx_start + self.newshape) # Prepare the FFTW plans fshape_2 = self.fshape // 2 + 1 self._rfft_in1 = pyfftw.empty_aligned(self.fshape, dtype='float64') self._rfft_out1 = pyfftw.empty_aligned(fshape_2, dtype='complex128') self._rfft_in2 = pyfftw.empty_aligned(self.fshape, dtype='float64') self._rfft_out2 = pyfftw.empty_aligned(fshape_2, dtype='complex128') self._irfft_in = pyfftw.empty_aligned(fshape_2, dtype='complex128') self._irfft_out = pyfftw.empty_aligned(self.fshape, dtype='float64') print("Creating FFTW plans for convolution...", end="") flags = ('FFTW_MEASURE', 'FFTW_DESTROY_INPUT') self._fftw_rfft1 = pyfftw.FFTW(self._rfft_in1, self._rfft_out1, flags=flags) self._fftw_rfft2 = pyfftw.FFTW(self._rfft_in2, self._rfft_out2, flags=flags) self._fftw_irfft = pyfftw.FFTW(self._irfft_in, self._irfft_out, direction='FFTW_BACKWARD', flags=flags) print(" done.")
def test_next_fast_len_strict(self): strict_test_cases = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 11: 11, 13: 13, 14: 14, 15: 15, 16: 16, 17: 18, 1021: 1024, # 2 * 3 * 5 * 7 * 11 2310: 2310, 2310 - 1: 2310, # 2 * 3 * 5 * 7 * 13 2730: 2730, 2730 - 1: 2730, # 2**2 * 3**2 * 5**2 * 7**2 * 11 485100: 485100, 485100 - 1: 485100, # 2**2 * 3**2 * 5**2 * 7**2 * 13 573300: 573300, 573300 - 1: 573300, # more than one multiple of 11 or 13 is not accepted # 2 * 3 * 5 * 7 * 11**2 25410: 25872, # 2 * 3 * 5 * 7 * 13**2 35490: 35672, # 2 * 3 * 5 * 7 * 11 * 13 30030: 30576, } for x, y in strict_test_cases.items(): assert_equal(pyfftw.next_fast_len(x), y)
import pyfftw import numpy import time import scipy from multiprocessing import cpu_count #pyfftw.forget_wisdom() pyfftw.interfaces.cache.disable() mainDataSize = 1024 volumeSize = 20 fastLenghtT = pyfftw.next_fast_len(mainDataSize) fastLenghtV = pyfftw.next_fast_len(volumeSize) pyfftw.config.NUM_THREADS = cpu_count() pyfftw.config.PLANNER_EFFORT = 'FFTW_ESTIMATE' print(fastLenghtT) print(fastLenghtV) dataSize = (fastLenghtV, fastLenghtT, fastLenghtT) f = pyfftw.empty_aligned(dataSize, dtype='complex128', n=16) f[:] = numpy.random.randn(*f.shape) + 1j * numpy.random.randn(*f.shape) tas = time.time() fftf = pyfftw.interfaces.numpy_fft.fftn( f) # here the plan is applied, nothing else. tas = time.time() - tas print("3D FFT, pyfftw:", tas)
def _plan_fftw_convolve(loads: np.ndarray, im: np.ndarray, domain: np.ndarray, circular: typing.Sequence[bool]): """Plans an FFT convolution, returns a function to carry out the convolution FFTW implementation Parameters ---------- loads: np.ndarray An example of a loads array, this is not altered or stored im: np.ndarray The influence matrix component for the transformation, this is not altered but it's fft is stored to save time during convolution, this must be larger in every dimension than the loads array domain: np.ndarray, optional (None) Array with same shape as loads filled with boolean values. If supplied this function will return a function which first fills the supplied loads into the domain then computes the convolution. This is typically used for finding loads from set displacements as the displacements are often not set over the whole surface. circular: Sequence[bool] If True the circular convolution will be calculated, to be used for periodic simulations Returns ------- function A function which takes a single input of loads and returns the result of the convolution with the original influence matrix. If a domain was not supplied the input to the returned function must be exactly the same shape as the loads array used in this function. If a domain was specified the length of the loads input to the returned function must be the same as the number of non zero elements in domain. Notes ----- This function uses FFTW, if you want to use the CUDA implementation make sure that cupy is installed and importable. If cupy can be imported slippy will use the CUDA implementations by default Examples -------- >>> import numpy as np >>> import slippy.contact as c >>> result = c.hertz_full([1,1], [np.inf, np.inf], [200e9, 200e9], [0.3, 0.3], 1e4) >>> X,Y = np.meshgrid(*[np.linspace(-0.005,0.005,256)]*2) >>> grid_spacing = X[1][1]-X[0][0] >>> loads = result['pressure_f'](X,Y) >>> disp_analytical = result['surface_displacement_b_f'][0](X,Y)['uz'] >>> im = c.elastic_influence_matrix('zz', (512,512), (grid_spacing,grid_spacing), 200e9/(2*(1+0.3)), 0.3) >>> convolve_func = plan_convolve(loads, im, None, [False, False]) >>> disp_numerical = convolve_func(loads) """ loads = np.asarray(loads) im = np.asarray(im) im_shape_orig = im.shape if domain is not None: domain = np.asarray(domain, dtype=np.bool) input_shape = [] for i in range(2): if circular[i]: assert loads.shape[i] == im.shape[i], "For circular convolution loads and im must be same shape" input_shape.append(loads.shape[i]) else: input_shape.append(2 * pyfftw.next_fast_len(max(loads.shape[i], im.shape[i]))) input_shape = tuple(input_shape) fft_shape = [input_shape[0], input_shape[1] // 2 + 1] in_empty = pyfftw.empty_aligned(input_shape, dtype=loads.dtype) out_empty = pyfftw.empty_aligned(fft_shape, dtype='complex128') ret_empty = pyfftw.empty_aligned(input_shape, dtype=loads.dtype) forward_trans = pyfftw.FFTW(in_empty, out_empty, axes=(0, 1), direction='FFTW_FORWARD', threads=slippy.CORES) backward_trans = pyfftw.FFTW(out_empty, ret_empty, axes=(0, 1), direction='FFTW_BACKWARD', threads=slippy.CORES) norm_inv = forward_trans.N ** 0.5 norm = 1 / norm_inv shape_diff = [[0, (b - a)] for a, b in zip(im.shape, input_shape)] im = np.pad(im, shape_diff, 'constant') im = np.roll(im, tuple(-((sz - 1) // 2) for sz in im_shape_orig), (-2, -1)) fft_im = forward_trans(im) * norm shape_diff_loads = [[0, (b - a)] for a, b in zip(loads.shape, input_shape)] shape = loads.shape dtype = loads.dtype def inner_no_domain(full_loads): if full_loads.shape == shape: flat = False else: full_loads = np.reshape(full_loads, loads.shape) flat = True loads_pad = np.pad(full_loads, shape_diff_loads, 'constant') full = backward_trans(forward_trans(loads_pad) * fft_im) full = norm_inv * full[:full_loads.shape[0], :full_loads.shape[1]] if flat: full = full.flatten() return full def inner_with_domain(sub_loads, ignore_domain=False): full_loads = np.zeros(shape, dtype=dtype) full_loads[domain] = sub_loads loads_pad = np.pad(full_loads, shape_diff_loads, 'constant') full = backward_trans(forward_trans(loads_pad) * fft_im) same = norm_inv * full[:full_loads.shape[0], :full_loads.shape[1]] if ignore_domain: return same return same[domain] if domain is None: return inner_no_domain else: return inner_with_domain
def from_function(cls, corr_func, shape, is_cyclic=True): """Create an instance to apply the correlation function. Parameters ---------- corr_func: callable(dist) -> float The correlation of the first element of the domain with each other element. shape: tuple of int The state is formally a vector, but the correlations are assumed to depend on the layout in some other shape, usually related to the physical layout. This is the other shape. is_cyclic: bool Whether to assume the domain is periodic in all directions. Returns ------- HomogeneousIsotropicCorrelation """ shape = np.atleast_1d(shape) if is_cyclic: computational_shape = tuple(shape) else: computational_shape = tuple( next_fast_len(2 * dim - 1) for dim in shape) self = cls(tuple(shape), computational_shape) shape = np.asarray(self._computational_shape) ndims = len(shape) broadcastable_shape = shape[:, newaxis] while broadcastable_shape.ndim < ndims + 1: broadcastable_shape = broadcastable_shape[..., newaxis] def corr_from_index(*index): """Correlation of index with zero. Turns a correlation function in terms of index distance into one in terms of indices on a periodic domain. Parameters ---------- index: tuple of int Returns ------- float[-1, 1] The correlation of the given index with the origin. See Also -------- DistanceCorrelationFunction.correlation_from_index """ comp2_1 = square(index) # Components of distance to shifted origin comp2_2 = square(broadcastable_shape - index) # use the smaller components to get the distance to the # closest of the shifted origins comp2 = fmin(comp2_1, comp2_2) return corr_func(sqrt(array_sum(comp2, axis=0))) corr_struct = fromfunction(corr_from_index, shape=tuple(shape), dtype=DTYPE) # I should be able to generate this sequence with a type-I DCT # For some odd reason complex/complex is faster than complex/real # This also ensures the format here is the same as in _matmat corr_fourier = rfftn(corr_struct, axes=arange(ndims, dtype=int), threads=NUM_THREADS, planner_effort=ADVANCE_PLANNER_EFFORT) self._corr_fourier = (corr_fourier) # This is also affected by roundoff abs_corr_fourier = abs(corr_fourier) self._fourier_near_zero = (abs_corr_fourier < FOURIER_NEAR_ZERO * abs_corr_fourier.max()) return self