def test_FFT2(FFT2): if FFT2.rank == 0: A = random((N, N)).astype(FFT2.float) else: A = zeros((N, N), dtype=FFT2.float) FFT2.comm.Bcast(A, root=0) a = zeros(FFT2.real_shape(), dtype=FFT2.float) c = zeros(FFT2.complex_shape(), dtype=FFT2.complex) a[:] = A[FFT2.real_local_slice()] c = FFT2.fft2(a, c) B2 = rfft2(A, axes=(0, 1)) assert allclose(c, B2[FFT2.complex_local_slice()]) a = FFT2.ifft2(c, a) assert allclose(a, A[FFT2.real_local_slice()], 5e-7, 5e-7)
def fft(self, u, fu): """Fast Fourier transform of y and z""" # Intermediate work arrays Uc_mpi = self.work_arrays[((self.num_processes, self.Np[0], self.Np[1], self.Nf), self.complex, 0)] Uc_hatT = self.work_arrays[(self.complex_shape_T(), self.complex, 0)] Uc_hatT = rfft2(u, Uc_hatT, axes=(1, 2), threads=self.threads, planner_effort=self.planner_effort['rfft2']) Uc_mpi[:] = rollaxis( Uc_hatT.reshape(self.Np[0], self.num_processes, self.Np[1], self.Nf), 1) self.comm.Alltoall([Uc_mpi, self.mpitype], [fu, self.mpitype]) return fu
def test_FFT2(FFT2): N = FFT2.N if FFT2.rank == 0: A = random(N).astype(FFT2.float) else: A = zeros(N, dtype=FFT2.float) atol, rtol = (1e-10, 1e-8) if FFT2.float is float64 else (5e-7, 1e-4) FFT2.comm.Bcast(A, root=0) a = zeros(FFT2.real_shape(), dtype=FFT2.float) c = zeros(FFT2.complex_shape(), dtype=FFT2.complex) a[:] = A[FFT2.real_local_slice()] c = FFT2.fft2(a, c) B2 = zeros(FFT2.global_complex_shape(), dtype=FFT2.complex) B2 = rfft2(A, B2, axes=(0, 1)) assert allclose(c, B2[FFT2.complex_local_slice()], rtol, atol) a = FFT2.ifft2(c, a) assert allclose(a, A[FFT2.real_local_slice()], rtol, atol)
def _forward(self, u, fu, fun, dealias=None): # Intermediate work arrays Uc_hat = self.work_arrays[(self.complex_shape(), self.complex, 0)] if self.num_processes == 1: if not dealias == '3/2-rule': assert u.shape == self.real_shape() Uc_hat = rfft2(u, Uc_hat, axes=(1, 2), threads=self.threads, planner_effort=self.planner_effort['rfft2']) fu = fun(Uc_hat, fu) else: if not self.dealias_cheb: Upad_hat = self.work_arrays[(self.complex_shape_padded(), self.complex, 0, False)] Upad_hat_z = self.work_arrays[((self.N[0], int(self.padsize * self.N[1]), self.Nf), self.complex, 0, False)] Upad_hat = rfft(u, Upad_hat, axis=2, threads=self.threads, planner_effort=self.planner_effort['rfft']) Upad_hat_z = SlabShen_R2C.copy_from_padded( Upad_hat, Upad_hat_z, self.N, 2) Upad_hat_z[:] = fft( Upad_hat_z, axis=1, overwrite_input=True, threads=self.threads, planner_effort=self.planner_effort['fft']) Uc_hat = SlabShen_R2C.copy_from_padded( Upad_hat_z, Uc_hat, self.N, 1) fu = fun(Uc_hat / self.padsize**2, fu) else: # Intermediate work arrays required for transform Upad_hat = self.work_arrays[(self.complex_shape_padded_0(), self.complex, 0, False)] Upad_hat0 = self.work_arrays[( self.complex_shape_padded_0(), self.complex, 1, False)] Upad_hat2 = self.work_arrays[( self.complex_shape_padded_2(), self.complex, 0, False)] Upad_hat3 = self.work_arrays[( self.complex_shape_padded_3(), self.complex, 0, False)] # Do ffts and truncation in the padded y and z directions Upad_hat3 = rfft( u, Upad_hat3, axis=2, threads=self.threads, planner_effort=self.planner_effort['rfft']) Upad_hat2 = SlabShen_R2C.copy_from_padded( Upad_hat3, Upad_hat2, self.N, 2) Upad_hat2[:] = fft( Upad_hat2, axis=1, threads=self.threads, planner_effort=self.planner_effort['fft']) Upad_hat = SlabShen_R2C.copy_from_padded( Upad_hat2, Upad_hat, self.N, 1) # Perform fst of data in x-direction Upad_hat0 = fun(Upad_hat, Upad_hat0) # Truncate to original complex shape fu[:] = Upad_hat0[:self.N[0]] / self.padsize**2 return fu if not dealias == '3/2-rule': Uc_hatT = self.work_arrays[(self.complex_shape_T(), self.complex, 0, False)] Uc_hat = self.work_arrays[(fu, 0, False)] if self.communication == 'Alltoall': #Uc_mpi = Uc_hat.reshape((self.num_processes, self.Np[0], self.Np[1], self.Nf)) #Uc_hatT = rfft2(u, Uc_hatT, axes=(1,2), threads=self.threads, planner_effort=self.planner_effort['rfft2']) #Uc_mpi[:] = rollaxis(Uc_hatT.reshape(self.Np[0], self.num_processes, self.Np[1], self.Nf), 1) #self.comm.Alltoall(MPI.IN_PLACE, [Uc_hat, self.mpitype]) # Intermediate work array required for transform U_mpi = self.work_arrays[((self.num_processes, self.Np[0], self.Np[1], self.Nf), self.complex, 0, False)] # Do 2 ffts in y-z directions on owned data Uc_hatT = rfft2(u, Uc_hatT, axes=(1, 2), threads=self.threads, planner_effort=self.planner_effort['rfft2']) #Transform data to align with x-direction U_mpi[:] = rollaxis( Uc_hatT.reshape(self.Np[0], self.num_processes, self.Np[1], self.Nf), 1) #Communicate all values self.comm.Alltoall([U_mpi, self.mpitype], [Uc_hat, self.mpitype]) elif self.communication == 'Alltoallw': if not self._subarraysA: self._subarraysA, self._subarraysB, self._counts_displs = self.get_subarrays( ) # Do 2 ffts in y-z directions on owned data Uc_hatT = rfft2(u, Uc_hatT, axes=(1, 2), threads=self.threads, planner_effort=self.planner_effort['rfft2']) self.comm.Alltoallw( [Uc_hatT, self._counts_displs, self._subarraysB], [Uc_hat, self._counts_displs, self._subarraysA]) fu = fun(Uc_hat, fu) else: Uc_hatT = self.work_arrays[(self.complex_shape_T(), self.complex, 0, False)] if not self.dealias_cheb: Upad_hatT = self.work_arrays[(self.complex_shape_padded_T(), self.complex, 0, False)] Upad_hat_z = self.work_arrays[((self.Np[0], int(self.padsize * self.N[1]), self.Nf), self.complex, 0, False)] Upad_hatT = rfft(u, Upad_hatT, axis=2, threads=self.threads, planner_effort=self.planner_effort['rfft']) Upad_hat_z = SlabShen_R2C.copy_from_padded( Upad_hatT, Upad_hat_z, self.N, 2) Upad_hat_z[:] = fft(Upad_hat_z, axis=1, threads=self.threads, planner_effort=self.planner_effort['fft']) Uc_hatT = SlabShen_R2C.copy_from_padded( Upad_hat_z, Uc_hatT, self.N, 1) if self.communication == 'Alltoall': #Uc_mpi = Uc_hat.reshape((self.num_processes, self.Np[0], self.Np[1], self.Nf)) #Uc_mpi[:] = rollaxis(Uc_hatT.reshape(self.Np[0], self.num_processes, self.Np[1], self.Nf), 1) #self.comm.Alltoall(MPI.IN_PLACE, [Uc_hat, self.mpitype]) Uc_mpi = self.work_arrays[((self.num_processes, self.Np[0], self.Np[1], self.Nf), self.complex, 2, False)] Uc_mpi[:] = rollaxis( Uc_hatT.reshape(self.Np[0], self.num_processes, self.Np[1], self.Nf), 1) self.comm.Alltoall([Uc_mpi, self.mpitype], [Uc_hat, self.mpitype]) elif self.communication == 'Alltoallw': if not self._subarraysA: self._subarraysA, self._subarraysB, self._counts_displs = self.get_subarrays( ) self.comm.Alltoallw( [Uc_hatT, self._counts_displs, self._subarraysB], [Uc_hat, self._counts_displs, self._subarraysA]) fu = fun(Uc_hat / self.padsize**2, fu) else: assert self.num_processes <= self.N[ 0] / 2, "Number of processors cannot be larger than N[0]/2 for 3/2-rule" assert u.shape == self.real_shape_padded() # Intermediate work arrays required for transform Upad_hat = self.work_arrays[(self.complex_shape_padded_0(), self.complex, 0, False)] Upad_hat0 = self.work_arrays[(self.complex_shape_padded_0(), self.complex, 1, False)] Upad_hat1 = self.work_arrays[(self.complex_shape_padded_1(), self.complex, 0, False)] Upad_hat2 = self.work_arrays[(self.complex_shape_padded_2(), self.complex, 0, False)] Upad_hat3 = self.work_arrays[(self.complex_shape_padded_3(), self.complex, 0, False)] # Do ffts and truncation in the padded y and z directions Upad_hat3 = rfft(u, Upad_hat3, axis=2, threads=self.threads, planner_effort=self.planner_effort['rfft']) Upad_hat2 = SlabShen_R2C.copy_from_padded( Upad_hat3, Upad_hat2, self.N, 2) Upad_hat2[:] = fft(Upad_hat2, axis=1, threads=self.threads, planner_effort=self.planner_effort['fft']) Upad_hat1 = SlabShen_R2C.copy_from_padded( Upad_hat2, Upad_hat1, self.N, 1) if self.communication == 'Alltoall': # Transpose and commuincate data U_mpi = Upad_hat.reshape(self.complex_shape_padded_0_I()) U_mpi[:] = rollaxis( Upad_hat1.reshape(self.complex_shape_padded_I()), 1) self.comm.Alltoall(MPI.IN_PLACE, [Upad_hat, self.mpitype]) elif self.communication == 'Alltoallw': if not self._subarraysA_pad: self._subarraysA_pad, self._subarraysB_pad, self._counts_displs = self.get_subarrays( padsize=self.padsize) self.comm.Alltoallw( [Upad_hat1, self._counts_displs, self._subarraysB_pad], [Upad_hat, self._counts_displs, self._subarraysA_pad]) # Perform fst of data in x-direction Upad_hat0 = fun(Upad_hat, Upad_hat0) # Truncate to original complex shape fu[:] = Upad_hat0[:self.N[0]] / self.padsize**2 return fu