def write(self, filename, idiotproof=True): if idiotproof and not filename.endswith('.ftd'): raise IOError('Filename must end with `.ftd`.') master = self.world.rank == 0 # Open writer on master and set parameters/dimensions if master: tar = Writer(filename) tar['DataType'] = {float: 'Float', complex: 'Complex'}[self.dtype] tar['Time'] = self.time tar['TimeStep'] = self.timestep #non-essential tar['Width'] = self.sigma tar.dimension('nw', self.nw) tar.dimension('nspins', self.nspins) # Create dimensions for varioius netCDF variables: ng = self.gd.get_size_of_global_array() tar.dimension('ngptsx', ng[0]) tar.dimension('ngptsy', ng[1]) tar.dimension('ngptsz', ng[2]) # Write frequencies tar.add('Frequency', ('nw', ), self.omega_w, dtype=float) # Write cumulative phase factors tar.add('PhaseFactor', ('nw', ), self.gamma_w, dtype=self.dtype) # Collect average densities on master and write if master: tar.add('Average', ( 'nspins', 'ngptsx', 'ngptsy', 'ngptsz', ), dtype=float) for s in range(self.nspins): big_Ant_G = self.gd.collect(self.Ant_sG[s]) if master: tar.fill(big_Ant_G) # Collect fourier transforms on master and write if master: tar.add('FourierTransform', ('nw', 'nspins', 'ngptsx', 'ngptsy', \ 'ngptsz', ), dtype=self.dtype) for w in range(self.nw): for s in range(self.nspins): big_Fnt_G = self.gd.collect(self.Fnt_wsG[w, s]) if master: tar.fill(big_Fnt_G) # Close to flush changes if master: tar.close() # Make sure slaves don't return before master is done self.world.barrier()
def write(self, filename, idiotproof=True): if idiotproof and not filename.endswith('.ftd'): raise IOError('Filename must end with `.ftd`.') master = self.world.rank == 0 # Open writer on master and set parameters/dimensions if master: tar = Writer(filename) tar['DataType'] = {float:'Float', complex:'Complex'}[self.dtype] tar['Time'] = self.time tar['TimeStep'] = self.timestep #non-essential tar['Width'] = self.sigma tar.dimension('nw', self.nw) tar.dimension('nspins', self.nspins) # Create dimensions for varioius netCDF variables: ng = self.gd.get_size_of_global_array() tar.dimension('ngptsx', ng[0]) tar.dimension('ngptsy', ng[1]) tar.dimension('ngptsz', ng[2]) # Write frequencies tar.add('Frequency', ('nw',), self.omega_w, dtype=float) # Write cumulative phase factors tar.add('PhaseFactor', ('nw',), self.gamma_w, dtype=self.dtype) # Collect average densities on master and write if master: tar.add('Average', ('nspins', 'ngptsx', 'ngptsy', 'ngptsz', ), dtype=float) for s in range(self.nspins): big_Ant_G = self.gd.collect(self.Ant_sG[s]) if master: tar.fill(big_Ant_G) # Collect fourier transforms on master and write if master: tar.add('FourierTransform', ('nw', 'nspins', 'ngptsx', 'ngptsy', \ 'ngptsz', ), dtype=self.dtype) for w in range(self.nw): for s in range(self.nspins): big_Fnt_G = self.gd.collect(self.Fnt_wsG[w,s]) if master: tar.fill(big_Fnt_G) # Close to flush changes if master: tar.close() # Make sure slaves don't return before master is done self.world.barrier()
def calculate_local_kernel(self): # Standard ALDA exchange kernel # Use with care. Results are very difficult to converge # Sensitive to density_cut ns = self.calc.wfs.nspins gd = self.gd pd = self.pd cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) fxc_sg = ns * self.get_fxc_g(ns * self.n_g) fxc_sg[np.where(self.n_g < self.density_cut)] = 0.0 r_vg = gd.get_grid_point_coordinates() for iq in range(len(self.ibzq_qc)): Gvec_Gc = np.dot(pd.get_reciprocal_vectors(q=iq, add_q=False), cell_cv / (2 * np.pi)) npw = len(Gvec_Gc) l_pw_size = -(-npw // mpi.world.size) l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) fhxc_sGsG = np.zeros((ns * npw, ns * npw), dtype=complex) for s in range(ns): for iG in l_pw_range: for jG in range(npw): fxc = fxc_sg[s].copy() dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_v = np.dot(dG_c, icell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) ft_fxc = gd.integrate(np.exp(-1j * dGr_g) * fxc) fhxc_sGsG[s * npw + iG, s * npw + jG] = ft_fxc mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol Gq2_G = self.pd.G2_qG[iq] if (self.ibzq_qc[iq] == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def dscf_save_band(filename, paw, n): """Extract and save all information for band `n` to a tar file.""" world, bd, gd, kd = paw.wfs.world, paw.wfs.bd, paw.wfs.gd, \ KPointDescriptor(paw.wfs.nspins, paw.wfs.nibzkpts, paw.wfs.kpt_comm, \ paw.wfs.gamma, paw.wfs.dtype) if world.rank == 0: # Minimal amount of information needed: w = Writer(filename) w.dimension('nspins', kd.nspins) w.dimension('nibzkpts', kd.nibzkpts) w.dimension('nproj', sum([setup.ni for setup in paw.wfs.setups])) ng = gd.get_size_of_global_array() w.dimension('ngptsx', ng[0]) w.dimension('ngptsy', ng[1]) w.dimension('ngptsz', ng[2]) # Write projections: if world.rank == 0: w.add('Projection', ('nspins', 'nibzkpts', 'nproj'), dtype=kd.dtype) for s in range(kd.nspins): for k in range(kd.nibzkpts): all_P_ni = paw.wfs.collect_projections(k, s) # gets all bands if world.rank == 0: w.fill(all_P_ni[n]) # Write wave functions: if world.rank == 0: w.add('PseudoWaveFunction', ('nspins', 'nibzkpts', 'ngptsx', 'ngptsy', 'ngptsz'), dtype=kd.dtype) for s in range(kd.nspins): for k in range(kd.nibzkpts): psit_G = paw.wfs.get_wave_function_array(n, k, s) if world.rank == 0: w.fill(psit_G) if world.rank == 0: # Close the file here to ensure that the last wave function is # written to disk: w.close() # We don't want the slaves to start reading before the master has # finished writing: world.barrier()
def calculate_rkernel(self): gd = self.gd ng_c = gd.N_c cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) ns = self.calc.wfs.nspins n_g = self.n_g # density on rough grid fx_g = ns * self.get_fxc_g(n_g) # local exchange kernel qc_g = (-4 * np.pi * ns / fx_g)**0.5 # cutoff functional flocal_g = qc_g**3 * fx_g / (6 * np.pi**2) # ren. x-kernel for r=r' Vlocal_g = 2 * qc_g / np.pi # ren. Hartree kernel for r=r' ng = np.prod(ng_c) # number of grid points r_vg = gd.get_grid_point_coordinates() rx_g = r_vg[0].flatten() ry_g = r_vg[1].flatten() rz_g = r_vg[2].flatten() prnt(' %d grid points and %d plane waves at the Gamma point' % (ng, self.pd.ngmax), file=self.fd) # Unit cells R_Rv = [] weight_R = [] nR_v = self.unit_cells nR = np.prod(nR_v) for i in range(-nR_v[0] + 1, nR_v[0]): for j in range(-nR_v[1] + 1, nR_v[1]): for h in range(-nR_v[2] + 1, nR_v[2]): R_Rv.append(i * cell_cv[0] + j * cell_cv[1] + h * cell_cv[2]) weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) * (nR_v[2] - abs(h)) / float(nR)) if nR > 1: # with more than one unit cell only the exchange kernel is # calculated on the grid. The bare Coulomb kernel is added # in PW basis and Vlocal_g only the exchange part dv = self.calc.density.gd.dv gc = (3 * dv / 4 / np.pi)**(1 / 3.) Vlocal_g -= 2 * np.pi * gc**2 / dv prnt(' Lattice point sampling: ' + '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) + ' Reduced to %s lattice points' % len(R_Rv), file=self.fd) l_g_size = -(-ng // mpi.world.size) l_g_range = range(mpi.world.rank * l_g_size, min((mpi.world.rank+1) * l_g_size, ng)) fhxc_qsGr = {} for iq in range(len(self.ibzq_qc)): fhxc_qsGr[iq] = np.zeros((ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex) inv_error = np.seterr() np.seterr(invalid='ignore') np.seterr(divide='ignore') t0 = time() # Loop over Lattice points for i, R_v in enumerate(R_Rv): # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0]) if i == 1: prnt(' Finished 1 cell in %s seconds' % int(time() - t0) + ' - estimated %s seconds left' % int((len(R_Rv) - 1) * (time() - t0)), file=self.fd) self.fd.flush() if len(R_Rv) > 5: if (i+1) % (len(R_Rv) / 5 + 1) == 0: prnt(' Finished %s cells in %s seconds' % (i, int(time() - t0)) + ' - estimated %s seconds left' % int((len(R_Rv) - i) * (time() - t0) / i), file=self.fd) self.fd.flush() for g in l_g_range: rx = rx_g[g] + R_v[0] ry = ry_g[g] + R_v[1] rz = rz_g[g] + R_v[2] # |r-r'-R_i| rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 + (r_vg[2] - rz)**2)**0.5 n_av = (n_g + n_g.flatten()[g]) / 2. fx_g = ns * self.get_fxc_g(n_av, index=g) qc_g = (-4 * np.pi * ns / fx_g)**0.5 x = qc_g * rr osc_x = np.sin(x) - x*np.cos(x) f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3) if nR > 1: # include only exchange part of the kernel here V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr else: # include the full kernel (also hartree part) V_rr = (sici(x)[0] * 2 / np.pi) / rr # Terms with r = r' if (np.abs(R_v) < 0.001).all(): tmp_flat = f_rr.flatten() tmp_flat[g] = flocal_g.flatten()[g] f_rr = tmp_flat.reshape(ng_c) tmp_flat = V_rr.flatten() tmp_flat[g] = Vlocal_g.flatten()[g] V_rr = tmp_flat.reshape(ng_c) del tmp_flat f_rr[np.where(n_av < self.density_cut)] = 0.0 V_rr[np.where(n_av < self.density_cut)] = 0.0 f_rr *= weight_R[i] V_rr *= weight_R[i] # r-r'-R_i r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz]) # Fourier transform of r for iq, q in enumerate(self.ibzq_qc): q_v = np.dot(q, icell_cv) e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0)) f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q if ns == 2: f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q mpi.world.barrier() np.seterr(**inv_error) for iq, q in enumerate(self.ibzq_qc): npw = len(self.pd.G2_qG[iq]) fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex) l_pw_size = -(-npw // mpi.world.size) # parallelize over PW below l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) if mpi.world.size > 1: # redistribute grid and plane waves in fhxc_qsGr[iq] bg1 = BlacsGrid(mpi.world, 1, mpi.world.size) bg2 = BlacsGrid(mpi.world, mpi.world.size, 1) bd1 = bg1.new_descriptor(npw, ng, npw, - (-ng / mpi.world.size)) bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng) fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) if ns == 2: Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) r = Redistributor(bg1.comm, bd1, bd2) r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng) if ns == 2: r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng) else: fhxc_Glr = fhxc_qsGr[iq][0] if ns == 2: Koff_Glr = fhxc_qsGr[iq][1] # Fourier transform of r' for iG in range(len(l_pw_range)): f_g = fhxc_Glr[iG].reshape(ng_c) f_G = self.pd.fft(f_g.conj(), iq) * vol / ng fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj() if ns == 2: v_g = Koff_Glr[iG].reshape(ng_c) v_G = self.pd.fft(v_g.conj(), iq) * vol / ng fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj() if ns == 2: # f_00 = f_11 and f_01 = f_10 fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw] fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw] mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) if nR > 1: # add Hartree kernel evaluated in PW basis Gq2_G = self.pd.G2_qG[iq] if (q == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition( nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq*world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ('nbzq', 'natoms', 'nG', 'nii',), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) world.barrier() if world.rank == 0: w.close() return
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition(nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq * world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ( 'nbzq', 'natoms', 'nG', 'nii', ), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) if world.rank == 0: w.close() world.barrier()
def calculate_rkernel(self): gd = self.gd ng_c = gd.N_c cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) ns = self.calc.wfs.nspins n_g = self.n_g # density on rough grid fx_g = ns * self.get_fxc_g(n_g) # local exchange kernel qc_g = (-4 * np.pi * ns / fx_g)**0.5 # cutoff functional flocal_g = qc_g**3 * fx_g / (6 * np.pi**2) # ren. x-kernel for r=r' Vlocal_g = 2 * qc_g / np.pi # ren. Hartree kernel for r=r' ng = np.prod(ng_c) # number of grid points r_vg = gd.get_grid_point_coordinates() rx_g = r_vg[0].flatten() ry_g = r_vg[1].flatten() rz_g = r_vg[2].flatten() prnt(' %d grid points and %d plane waves at the Gamma point' % (ng, self.pd.ngmax), file=self.fd) # Unit cells R_Rv = [] weight_R = [] nR_v = self.unit_cells nR = np.prod(nR_v) for i in range(-nR_v[0] + 1, nR_v[0]): for j in range(-nR_v[1] + 1, nR_v[1]): for h in range(-nR_v[2] + 1, nR_v[2]): R_Rv.append(i * cell_cv[0] + j * cell_cv[1] + h * cell_cv[2]) weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) * (nR_v[2] - abs(h)) / float(nR)) if nR > 1: # with more than one unit cell only the exchange kernel is # calculated on the grid. The bare Coulomb kernel is added # in PW basis and Vlocal_g only the exchange part dv = self.calc.density.gd.dv gc = (3 * dv / 4 / np.pi)**(1 / 3.) Vlocal_g -= 2 * np.pi * gc**2 / dv prnt(' Lattice point sampling: ' + '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) + ' Reduced to %s lattice points' % len(R_Rv), file=self.fd) l_g_size = -(-ng // mpi.world.size) l_g_range = range(mpi.world.rank * l_g_size, min((mpi.world.rank + 1) * l_g_size, ng)) fhxc_qsGr = {} for iq in range(len(self.ibzq_qc)): fhxc_qsGr[iq] = np.zeros( (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex) inv_error = np.seterr() np.seterr(invalid='ignore') np.seterr(divide='ignore') t0 = time() # Loop over Lattice points for i, R_v in enumerate(R_Rv): # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0]) if i == 1: prnt(' Finished 1 cell in %s seconds' % int(time() - t0) + ' - estimated %s seconds left' % int( (len(R_Rv) - 1) * (time() - t0)), file=self.fd) self.fd.flush() if len(R_Rv) > 5: if (i + 1) % (len(R_Rv) / 5 + 1) == 0: prnt(' Finished %s cells in %s seconds' % (i, int(time() - t0)) + ' - estimated %s seconds left' % int( (len(R_Rv) - i) * (time() - t0) / i), file=self.fd) self.fd.flush() for g in l_g_range: rx = rx_g[g] + R_v[0] ry = ry_g[g] + R_v[1] rz = rz_g[g] + R_v[2] # |r-r'-R_i| rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 + (r_vg[2] - rz)**2)**0.5 n_av = (n_g + n_g.flatten()[g]) / 2. fx_g = ns * self.get_fxc_g(n_av, index=g) qc_g = (-4 * np.pi * ns / fx_g)**0.5 x = qc_g * rr osc_x = np.sin(x) - x * np.cos(x) f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3) if nR > 1: # include only exchange part of the kernel here V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr else: # include the full kernel (also hartree part) V_rr = (sici(x)[0] * 2 / np.pi) / rr # Terms with r = r' if (np.abs(R_v) < 0.001).all(): tmp_flat = f_rr.flatten() tmp_flat[g] = flocal_g.flatten()[g] f_rr = tmp_flat.reshape(ng_c) tmp_flat = V_rr.flatten() tmp_flat[g] = Vlocal_g.flatten()[g] V_rr = tmp_flat.reshape(ng_c) del tmp_flat f_rr[np.where(n_av < self.density_cut)] = 0.0 V_rr[np.where(n_av < self.density_cut)] = 0.0 f_rr *= weight_R[i] V_rr *= weight_R[i] # r-r'-R_i r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz]) # Fourier transform of r for iq, q in enumerate(self.ibzq_qc): q_v = np.dot(q, icell_cv) e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0)) f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q if ns == 2: f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q mpi.world.barrier() np.seterr(**inv_error) for iq, q in enumerate(self.ibzq_qc): npw = len(self.pd.G2_qG[iq]) fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex) l_pw_size = -(-npw // mpi.world.size) # parallelize over PW below l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) if mpi.world.size > 1: # redistribute grid and plane waves in fhxc_qsGr[iq] bg1 = BlacsGrid(mpi.world, 1, mpi.world.size) bg2 = BlacsGrid(mpi.world, mpi.world.size, 1) bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size)) bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng) fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) if ns == 2: Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) r = Redistributor(bg1.comm, bd1, bd2) r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng) if ns == 2: r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng) else: fhxc_Glr = fhxc_qsGr[iq][0] if ns == 2: Koff_Glr = fhxc_qsGr[iq][1] # Fourier transform of r' for iG in range(len(l_pw_range)): f_g = fhxc_Glr[iG].reshape(ng_c) f_G = self.pd.fft(f_g.conj(), iq) * vol / ng fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj() if ns == 2: v_g = Koff_Glr[iG].reshape(ng_c) v_G = self.pd.fft(v_g.conj(), iq) * vol / ng fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj() if ns == 2: # f_00 = f_11 and f_01 = f_10 fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw] fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw] mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) if nR > 1: # add Hartree kernel evaluated in PW basis Gq2_G = self.pd.G2_qG[iq] if (q == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)