def __init__(self, ecut, gd, dtype=None, kd=None, fftwflags=fftw.ESTIMATE): assert gd.pbc_c.all() assert gd.comm.size == 1 self.ecut = ecut self.gd = gd N_c = gd.N_c self.comm = gd.comm assert ((gd.h_cv**2).sum(1) <= 0.5 * pi**2 / ecut).all() if dtype is None: if kd is None or kd.gamma: dtype = float else: dtype = complex self.dtype = dtype if dtype == float: Nr_c = N_c.copy() Nr_c[2] = N_c[2] // 2 + 1 i_Qc = np.indices(Nr_c).transpose((1, 2, 3, 0)) i_Qc[..., :2] += N_c[:2] // 2 i_Qc[..., :2] %= N_c[:2] i_Qc[..., :2] -= N_c[:2] // 2 self.tmp_Q = fftw.empty(Nr_c, complex) self.tmp_R = self.tmp_Q.view(float)[:, :, :N_c[2]] else: i_Qc = np.indices(N_c).transpose((1, 2, 3, 0)) i_Qc += N_c // 2 i_Qc %= N_c i_Qc -= N_c // 2 self.tmp_Q = fftw.empty(N_c, complex) self.tmp_R = self.tmp_Q self.nbytes = self.tmp_R.nbytes self.fftplan = fftw.FFTPlan(self.tmp_R, self.tmp_Q, -1, fftwflags) self.ifftplan = fftw.FFTPlan(self.tmp_Q, self.tmp_R, 1, fftwflags) # Calculate reciprocal lattice vectors: B_cv = 2.0 * pi * gd.icell_cv i_Qc.shape = (-1, 3) self.G_Qv = np.dot(i_Qc, B_cv) self.nbytes += self.G_Qv.nbytes self.kd = kd if kd is None: self.K_qv = np.zeros((1, 3)) else: self.K_qv = np.dot(kd.ibzk_qc, B_cv) # Map from vectors inside sphere to fft grid: self.Q_qG = [] self.G2_qG = [] Q_Q = np.arange(len(i_Qc), dtype=np.int32) self.ngmin = 100000000 self.ngmax = 0 for q, K_v in enumerate(self.K_qv): G2_Q = ((self.G_Qv + K_v)**2).sum(axis=1) mask_Q = (G2_Q <= 2 * ecut) if self.dtype == float: mask_Q &= ((i_Qc[:, 2] > 0) | (i_Qc[:, 1] > 0) | ((i_Qc[:, 0] >= 0) & (i_Qc[:, 1] == 0))) Q_G = Q_Q[mask_Q] self.Q_qG.append(Q_G) self.G2_qG.append(G2_Q[Q_G]) ng = len(Q_G) self.ngmin = min(ng, self.ngmin) self.ngmax = max(ng, self.ngmax) self.nbytes += Q_G.nbytes + self.G2_qG[q].nbytes if kd is not None: self.ngmin = kd.comm.min(self.ngmin) self.ngmax = kd.comm.max(self.ngmax) self.n_c = np.array([self.ngmax]) # used by hs_operators.py XXX
def __init__(self, ecut, gd, dtype=None, kd=None, fftwflags=fftw.ESTIMATE): assert gd.pbc_c.all() assert gd.comm.size == 1 self.ecut = ecut self.gd = gd self.fftwflags = fftwflags N_c = gd.N_c self.comm = gd.comm assert ((gd.h_cv**2).sum(1) <= 0.5 * pi**2 / ecut).all() if dtype is None: if kd is None or kd.gamma: dtype = float else: dtype = complex self.dtype = dtype if dtype == float: Nr_c = N_c.copy() Nr_c[2] = N_c[2] // 2 + 1 i_Qc = np.indices(Nr_c).transpose((1, 2, 3, 0)) i_Qc[..., :2] += N_c[:2] // 2 i_Qc[..., :2] %= N_c[:2] i_Qc[..., :2] -= N_c[:2] // 2 self.tmp_Q = fftw.empty(Nr_c, complex) self.tmp_R = self.tmp_Q.view(float)[:, :, :N_c[2]] else: i_Qc = np.indices(N_c).transpose((1, 2, 3, 0)) i_Qc += N_c // 2 i_Qc %= N_c i_Qc -= N_c // 2 self.tmp_Q = fftw.empty(N_c, complex) self.tmp_R = self.tmp_Q self.nbytes = self.tmp_R.nbytes self.fftplan = fftw.FFTPlan(self.tmp_R, self.tmp_Q, -1, fftwflags) self.ifftplan = fftw.FFTPlan(self.tmp_Q, self.tmp_R, 1, fftwflags) # Calculate reciprocal lattice vectors: B_cv = 2.0 * pi * gd.icell_cv i_Qc.shape = (-1, 3) self.G_Qv = np.dot(i_Qc, B_cv) self.nbytes += self.G_Qv.nbytes self.kd = kd if kd is None: self.K_qv = np.zeros((1, 3)) else: self.K_qv = np.dot(kd.ibzk_qc, B_cv) # Map from vectors inside sphere to fft grid: self.Q_qG = [] self.G2_qG = [] Q_Q = np.arange(len(i_Qc), dtype=np.int32) self.ngmin = 100000000 self.ngmax = 0 for q, K_v in enumerate(self.K_qv): G2_Q = ((self.G_Qv + K_v)**2).sum(axis=1) mask_Q = (G2_Q <= 2 * ecut) if self.dtype == float: mask_Q &= ((i_Qc[:, 2] > 0) | (i_Qc[:, 1] > 0) | ((i_Qc[:, 0] >= 0) & (i_Qc[:, 1] == 0))) Q_G = Q_Q[mask_Q] self.Q_qG.append(Q_G) self.G2_qG.append(G2_Q[Q_G]) ng = len(Q_G) self.ngmin = min(ng, self.ngmin) self.ngmax = max(ng, self.ngmax) self.nbytes += Q_G.nbytes + self.G2_qG[q].nbytes if kd is not None: self.ngmin = kd.comm.min(self.ngmin) self.ngmax = kd.comm.max(self.ngmax) self.n_c = np.array([self.ngmax]) # used by hs_operators.py XXX
def test(Plan, flags, input, output, sign): t0 = time.time() plan = Plan(input, output, sign, flags) t1 = time.time() t = 0.0 for i in range(100): input[:] = 1.3 t2 = time.time() plan.execute() t3 = time.time() t += t3 - t2 return t1 - t0, t / 100 if __name__ == '__main__': a1 = fftw.empty((32, 28, 128), complex) a2 = fftw.empty((32, 28, 128), complex) b = fftw.empty((32, 28, 65), complex) c1 = b.view(dtype=float)[:, :, :128] c2 = fftw.empty((32, 28, 64), complex).view(dtype=float) for input, output, sign in [ (a1, a1, -1), (a1, a2, -1), (b, c1, 1), (b, c2, 1), (c1, b, -1), (c2, b, -1)]: for Plan, flags in [(fftw.NumpyFFTPlan, 117), (fftw.FFTWPlan, fftw.ESTIMATE), (fftw.FFTWPlan, fftw.MEASURE), (fftw.FFTWPlan, fftw.PATIENT),
def test(Plan, flags, input, output, sign): t0 = time.time() plan = Plan(input, output, sign, flags) t1 = time.time() t = 0.0 for i in range(100): input[:] = 1.3 t2 = time.time() plan.execute() t3 = time.time() t += t3 - t2 return t1 - t0, t / 100 if __name__ == '__main__': a1 = fftw.empty((32, 28, 128), complex) a2 = fftw.empty((32, 28, 128), complex) b = fftw.empty((32, 28, 65), complex) c1 = b.view(dtype=float)[:, :, :128] c2 = fftw.empty((32, 28, 64), complex).view(dtype=float) for input, output, sign in [(a1, a1, -1), (a1, a2, -1), (b, c1, 1), (b, c2, 1), (c1, b, -1), (c2, b, -1)]: for Plan, flags in [(fftw.NumpyFFTPlan, 117), (fftw.FFTWPlan, fftw.ESTIMATE), (fftw.FFTWPlan, fftw.MEASURE), (fftw.FFTWPlan, fftw.PATIENT), (fftw.FFTWPlan, fftw.EXHAUSTIVE)]: tplan, tfft = test(Plan, flags, input, output, sign) print(('%-12s %3d %10.6f %10.6f' % (Plan.__name__, flags, tplan, tfft)))