def update(self, x): """Update progress-bar (0 <= x <= 1).""" if x == 0 or self.done: return if self.tty: N = 35 elif self.nobar: N = 10 else: N = 40 n = int(N * x) t = time() - self.t t_dt = self.format_time(t) est = self.format_time(t / x) p = functools.partial(print, file=self.fd) if self.tty: bar = '-' * (n - 1) + self.symbols[int(t % len(self.symbols))] p(('\r{0} / {1} ({2:.0f}%) |{3:' + str(N) + '}| ').format( t_dt, est, x * 100, bar), end='') print(' {0:.0f} MB/core'.format(maxrss() / 1024**2), end='') if x == 1: p() self.done = True self.fd.flush() elif self.nobar: if self.n is None: p(('Started: {0:.0f} MB/core').format(maxrss() / 1024**2)) self.n = 0 if n > self.n: p(('{0} of {1} ({2:.0f}%) {3:.0f} MB/core').format( t_dt, est, x * 100, maxrss() / 1024**2)) self.fd.flush() self.n = n if x == 1: p('Finished in {0}'.format(t_dt)) self.fd.flush() self.done = True else: if self.n is None: p('{0}s |'.format(t / x), end='') self.n = 0 if n > self.n: p('-' * (n - self.n), end='') self.fd.flush() self.n = n if x == 1: p('| Time: {0:.3f}s'.format(t)) self.fd.flush() self.done = True
def update(self, x): """Update progress-bar (0 <= x <= 1).""" if x == 0 or self.done: return if self.tty: N = 35 elif self.nobar: N = 10 else: N = 40 n = int(N * x) t = time() - self.t t_dt = self.format_time(t) est = self.format_time(t / x) p = functools.partial(print, file=self.fd) if self.tty: bar = '-' * (n - 1) + self.symbols[int(t % len(self.symbols))] p(('\r{0} / {1} ({2:.0f}%) |{3:' + str(N) + '}| ') .format(t_dt, est, x * 100, bar), end='') print(' {0:.0f} MB/core'.format(maxrss() / 1024**2), end='') if x == 1: p() self.done = True self.fd.flush() elif self.nobar: if self.n is None: p(('Started: {0:.0f} MB/core') .format(maxrss() / 1024**2)) self.n = 0 if n > self.n: p(('{0} of {1} ({2:.0f}%) {3:.0f} MB/core') .format(t_dt, est, x * 100, maxrss() / 1024**2)) self.fd.flush() self.n = n if x == 1: p('Finished in {0}'.format(t_dt)) self.fd.flush() self.done = True else: if self.n is None: p('{0}s |'.format(t / x), end='') self.n = 0 if n > self.n: p('-' * (n - self.n), end='') self.fd.flush() self.n = n if x == 1: p('| Time: {0:.3f}s'.format(t)) self.fd.flush() self.done = True
def print_memory_estimate(self, log=None, maxdepth=-1): """Print estimated memory usage for PAW object and components. maxdepth is the maximum nesting level of displayed components. The PAW object must be initialize()'d, but needs not have large arrays allocated.""" # NOTE. This should work with --dry-run=N # # However, the initial overhead estimate is wrong if this method # is called within a real mpirun/gpaw-python context. if log is None: log = self.log log('Memory estimate:') mem_init = maxrss() # initial overhead includes part of Hamiltonian! log(' Process memory now: %.2f MiB' % (mem_init / 1024.0**2)) mem = MemNode('Calculator', 0) mem.indent = ' ' try: self.estimate_memory(mem) except AttributeError as m: log('Attribute error: %r' % m) log('Some object probably lacks estimate_memory() method') log('Memory breakdown may be incomplete') mem.calculate_size() mem.write(log.fd, maxdepth=maxdepth, depth=1) log()
def print_memory_estimate(self, txt=None, maxdepth=-1): """Print estimated memory usage for PAW object and components. maxdepth is the maximum nesting level of displayed components. The PAW object must be initialize()'d, but needs not have large arrays allocated.""" # NOTE. This should work with --dry-run=N # # However, the initial overhead estimate is wrong if this method # is called within a real mpirun/gpaw-python context. if txt is None: txt = self.txt txt.write('Memory estimate\n') txt.write('---------------\n') mem_init = maxrss() # initial overhead includes part of Hamiltonian! txt.write('Process memory now: %.2f MiB\n' % (mem_init / 1024.0**2)) mem = MemNode('Calculator', 0) try: self.estimate_memory(mem) except AttributeError, m: txt.write('Attribute error: %r' % m) txt.write('Some object probably lacks estimate_memory() method') txt.write('Memory breakdown may be incomplete')
def estimate_memory(self, mem): """Estimate memory use of this object.""" mem_init = maxrss() # XXX initial overhead includes part of Hamiltonian mem.subnode('Initial overhead', mem_init) for name, obj in [('Density', self.density), ('Hamiltonian', self.hamiltonian), ('Wavefunctions', self.wfs), ]: obj.estimate_memory(mem.subnode(name))
def print_chi(self, pd): calc = self.calc gd = calc.wfs.gd if extra_parameters.get('df_dry_run'): from gpaw.mpi import DryRunCommunicator size = extra_parameters['df_dry_run'] world = DryRunCommunicator(size) else: world = self.world q_c = pd.kd.bzk_kc[0] nw = len(self.omega_w) ecut = self.ecut * Hartree ns = calc.wfs.nspins nbands = self.nbands nk = calc.wfs.kd.nbzkpts nik = calc.wfs.kd.nibzkpts ngmax = pd.ngmax eta = self.eta * Hartree wsize = world.size knsize = self.kncomm.size nocc = self.nocc1 npocc = self.nocc2 ngridpoints = gd.N_c[0] * gd.N_c[1] * gd.N_c[2] nstat = (ns * npocc + world.size - 1) // world.size occsize = nstat * ngridpoints * 16. / 1024**2 bsize = self.blockcomm.size chisize = nw * pd.ngmax**2 * 16. / 1024**2 / bsize p = partial(print, file=self.fd) p('%s' % ctime()) p('Called response.chi0.calculate with') p(' q_c: [%f, %f, %f]' % (q_c[0], q_c[1], q_c[2])) p(' Number of frequency points: %d' % nw) p(' Planewave cutoff: %f' % ecut) p(' Number of spins: %d' % ns) p(' Number of bands: %d' % nbands) p(' Number of kpoints: %d' % nk) p(' Number of irredicible kpoints: %d' % nik) p(' Number of planewaves: %d' % ngmax) p(' Broadening (eta): %f' % eta) p(' world.size: %d' % wsize) p(' kncomm.size: %d' % knsize) p(' blockcomm.size: %d' % bsize) p(' Number of completely occupied states: %d' % nocc) p(' Number of partially occupied states: %d' % npocc) p() p(' Memory estimate of potentially large arrays:') p(' chi0_wGG: %f M / cpu' % chisize) p(' Occupied states: %f M / cpu' % occsize) p(' Memory usage before allocation: %f M / cpu' % (maxrss() / 1024**2)) p()
def __del__(self): """Destructor: Write timing output before closing.""" if not dry_run: mr = maxrss() if mr > 0: if mr < 1024.0**3: self.text('Memory usage: %.2f MiB' % (mr / 1024.0**2)) else: self.text('Memory usage: %.2f GiB' % (mr / 1024.0**3)) self.timer.write(self.txt)
def __del__(self): """Destructor: Write timing output before closing.""" if not hasattr(self, 'txt') or self.txt is None: return if not dry_run: mr = maxrss() if mr > 0: if mr < 1024.0**3: self.text('Memory usage: %.2f MB' % (mr / 1024.0**2)) else: self.text('Memory usage: %.2f GB' % (mr / 1024.0**3)) self.timer.write(self.txt)
def print_chi(self, pd): calc = self.calc gd = calc.wfs.gd ns = calc.wfs.nspins nk = calc.wfs.kd.nbzkpts nb = self.nocc2 if extra_parameters.get("df_dry_run"): from gpaw.mpi import DryRunCommunicator size = extra_parameters["df_dry_run"] world = DryRunCommunicator(size) else: world = self.world nw = len(self.omega_w) q_c = pd.kd.bzk_kc[0] nstat = (ns * nk * nb + world.size - 1) // world.size print("%s" % ctime(), file=self.fd) print("Called response.chi0.calculate with", file=self.fd) print(" q_c: [%f, %f, %f]" % (q_c[0], q_c[1], q_c[2]), file=self.fd) print(" Number of frequency points : %d" % nw, file=self.fd) print(" Planewave cutoff: %f" % (self.ecut * Hartree), file=self.fd) print(" Number of spins: %d" % ns, file=self.fd) print(" Number of bands: %d" % self.nbands, file=self.fd) print(" Number of kpoints: %d" % nk, file=self.fd) print(" Number of planewaves: %d" % pd.ngmax, file=self.fd) print(" Broadening (eta): %f" % (self.eta * Hartree), file=self.fd) print(" Keep occupied states: %s" % self.keep_occupied_states, file=self.fd) print("", file=self.fd) print(" Related to parallelization", file=self.fd) print(" world.size: %d" % world.size, file=self.fd) print(" Number of completely occupied states: %d" % self.nocc1, file=self.fd) print(" Number of partially occupied states: %d" % self.nocc2, file=self.fd) print(" Number of terms handled in chi-sum by each rank: %d" % nstat, file=self.fd) print("", file=self.fd) print(" Memory estimate:", file=self.fd) print(" chi0_wGG: %f M / cpu" % (nw * pd.ngmax ** 2 * 16.0 / 1024 ** 2), file=self.fd) print( " Occupied states: %f M / cpu" % (nstat * gd.N_c[0] * gd.N_c[1] * gd.N_c[2] * 16.0 / 1024 ** 2), file=self.fd, ) print(" Max mem sofar : %f M / cpu" % (maxrss() / 1024 ** 2), file=self.fd) print("", file=self.fd)
def __del__(self): """Destructor: Write timing output before closing.""" if dry_run: return try: mr = maxrss() except (LookupError, TypeError, NameError): # Thing can get weird during interpreter shutdown ... mr = 0 if mr > 0: if mr < 1024**3: self('Memory usage: %.2f MiB' % (mr / 1024**2)) else: self('Memory usage: %.2f GiB' % (mr / 1024**3)) self('Date: ' + time.asctime())
for i in range(mno): a.append(np.ones((msize, msize), dtype=dtype)) # must return and store the matrix to allocate memory return a, mmemory, mno # max memory to be used for allocation matrices in MiB (2**20 B) maxmems = [256] * 5 # exceed 1GiB to get resource.getrusage problem msize = 256 # matrix dimensions a = [] # initial memory # Peak resident set size ("high water mark") (in bytes) mem0 = maxrss() mem = mem0 for n, maxmem in enumerate(maxmems): try: a1, mmemory, mno = allocate_matrix(maxmem, msize, dtype) # store the matrix a.append(a1) # memory used newmem = maxrss() memused = newmem - mem mset = 'Matrix set No ' + str(n) + ':' if verbose: print mset,
def initialize(self): self.printtxt('') self.printtxt('-----------------------------------------') self.printtxt('Response function calculation started at:') self.starttime = time() self.printtxt(ctime()) BASECHI.initialize(self) # Frequency init self.dw = None if len(self.w_w) == 1: self.HilberTrans = False if self.hilbert_trans: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid assert self.w_w.max() == self.w_w[-1] self.dw /= Hartree self.w_w /= Hartree self.wmax = self.w_w[-1] self.wcut = self.wmax + 5. / Hartree self.Nw = int(self.wmax / self.dw) + 1 self.NwS = int(self.wcut / self.dw) + 1 else: self.Nw = len(self.w_w) self.NwS = 0 if len(self.w_w) > 1: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() self.dw /= Hartree if self.hilbert_trans: # for band parallelization. for n in range(self.nbands): if (self.f_kn[:, n] - self.ftol < 0).all(): self.nvalbands = n break else: # if not hilbert transform, all the bands should be used. self.nvalbands = self.nbands # Parallelization initialize self.parallel_init() # Printing calculation information self.print_chi() if extra_parameters.get('df_dry_run'): raise SystemExit calc = self.calc # For LCAO wfs if calc.input_parameters['mode'] == 'lcao': calc.initialize_positions() self.printtxt(' GS calculator : %f M / cpu' % (maxrss() / 1024**2)) # PAW part init # calculate <phi_i | e**(-i(q+G).r) | phi_j> # G != 0 part self.get_phi_aGp() # Calculate ALDA kernel for EELS spectrum # Use RPA kernel for Optical spectrum and rpa correlation energy if not self.optical_limit and np.dtype(self.w_w[0]) == float: R_av = calc.atoms.positions / Bohr self.Kxc_GG = calculate_Kxc( self.gd, # global grid calc.density.nt_sG, self.npw, self.Gvec_Gc, self.nG, self.vol, self.bcell_cv, R_av, calc.wfs.setups, calc.density.D_asp) self.printtxt('Finished ALDA kernel ! ') else: self.Kxc_GG = np.zeros((self.npw, self.npw)) self.printtxt('Use RPA for optical spectrum ! ') self.printtxt('') return
for i in range(mno): a.append(np.ones((msize, msize), dtype=dtype)) # must return and store the matrix to allocate memory return a, mmemory, mno # max memory to be used for allocation matrices in MiB (2**20 B) maxmems = [256] * 5 # exceed 1GiB to get resource.getrusage problem msize = 256 # matrix dimensions a = [] # initial memory # Peak resident set size ("high water mark") (in bytes) mem0 = maxrss() mem = mem0 for n, maxmem in enumerate(maxmems): try: a1, mmemory, mno = allocate_matrix(maxmem, msize, dtype) # store the matrix a.append(a1) # memory used newmem = maxrss() memused = newmem - mem mset = 'Matrix set No ' + str(n) + ':' if verbose: print(mset, end=' ')
def initialize(self): self.printtxt('') self.printtxt('-----------------------------------------') self.printtxt('Response function calculation started at:') self.starttime = time() self.printtxt(ctime()) BASECHI.initialize(self) # Frequency init self.dw = None if len(self.w_w) == 1: self.HilberTrans = False if self.hilbert_trans: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid assert self.w_w.max() == self.w_w[-1] self.dw /= Hartree self.w_w /= Hartree self.wmax = self.w_w[-1] self.wcut = self.wmax + 5. / Hartree self.Nw = int(self.wmax / self.dw) + 1 self.NwS = int(self.wcut / self.dw) + 1 else: self.Nw = len(self.w_w) self.NwS = 0 if len(self.w_w) > 1: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() self.dw /= Hartree if self.hilbert_trans: # for band parallelization. for n in range(self.nbands): if (self.f_kn[:, n] - self.ftol < 0).all(): self.nvalbands = n break else: # if not hilbert transform, all the bands should be used. self.nvalbands = self.nbands # Parallelization initialize self.parallel_init() # Printing calculation information self.print_chi() if extra_parameters.get('df_dry_run'): raise SystemExit calc = self.calc # For LCAO wfs if calc.input_parameters['mode'] == 'lcao': calc.initialize_positions() self.printtxt(' GS calculator : %f M / cpu' %(maxrss() / 1024**2)) # PAW part init # calculate <phi_i | e**(-i(q+G).r) | phi_j> # G != 0 part self.get_phi_aGp() # Calculate ALDA kernel for EELS spectrum # Use RPA kernel for Optical spectrum and rpa correlation energy if not self.optical_limit and np.dtype(self.w_w[0]) == float: R_av = calc.atoms.positions / Bohr self.Kxc_GG = calculate_Kxc(self.gd, # global grid calc.density.nt_sG, self.npw, self.Gvec_Gc, self.nG, self.vol, self.bcell_cv, R_av, calc.wfs.setups, calc.density.D_asp) self.printtxt('Finished ALDA kernel ! ') else: self.Kxc_GG = np.zeros((self.npw, self.npw)) self.printtxt('Use RPA for optical spectrum ! ') self.printtxt('') return
def initialize(self, simple_version=False): self.printtxt("") self.printtxt("-----------------------------------------") self.printtxt("Response function calculation started at:") self.starttime = time() self.printtxt(ctime()) BASECHI.initialize(self) # Frequency init self.dw = None if len(self.w_w) == 1: self.hilbert_trans = False if self.hilbert_trans: self.dw = self.w_w[1] - self.w_w[0] # assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid assert self.w_w.max() == self.w_w[-1] self.dw /= Hartree self.w_w /= Hartree self.wmax = self.w_w[-1] self.wcut = self.wmax + 5.0 / Hartree # self.Nw = int(self.wmax / self.dw) + 1 self.Nw = len(self.w_w) self.NwS = int(self.wcut / self.dw) + 1 else: self.Nw = len(self.w_w) self.NwS = 0 if len(self.w_w) > 2: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() self.dw /= Hartree self.nvalbands = self.nbands tmpn = np.zeros(self.nspins, dtype=int) for spin in range(self.nspins): for n in range(self.nbands): if (self.f_skn[spin][:, n] - self.ftol < 0).all(): tmpn[spin] = n break if tmpn.max() > 0: self.nvalbands = tmpn.max() # Parallelization initialize self.parallel_init() # Printing calculation information self.print_chi() if extra_parameters.get("df_dry_run"): raise SystemExit calc = self.calc # For LCAO wfs if calc.input_parameters["mode"] == "lcao": calc.initialize_positions() self.printtxt(" Max mem sofar : %f M / cpu" % (maxrss() / 1024 ** 2)) if simple_version is True: return # PAW part init # calculate <phi_i | e**(-i(q+G).r) | phi_j> # G != 0 part self.phi_aGp, self.phiG0_avp = self.get_phi_aGp(alldir=True) self.printtxt("Finished phi_aGp !") mem = np.array([self.phi_aGp[i].size * 16 / 1024.0 ** 2 for i in range(len(self.phi_aGp))]) self.printtxt(" Phi_aGp : %f M / cpu" % (mem.sum())) # Calculate ALDA kernel (not used in chi0) R_av = calc.atoms.positions / Bohr if self.xc == "RPA": # type(self.w_w[0]) is float: self.Kc_GG = None self.printtxt("RPA calculation.") elif self.xc == "ALDA" or self.xc == "ALDA_X": # self.Kc_GG = calculate_Kc(self.q_c, # self.Gvec_Gc, # self.acell_cv, # self.bcell_cv, # self.calc.atoms.pbc, # self.vcut) # Initialize a CoulombKernel instance kernel = CoulombKernel(vcut=self.vcut, pbc=self.calc.atoms.pbc, cell=self.acell_cv) self.Kc_GG = kernel.calculate_Kc(self.q_c, self.Gvec_Gc, self.bcell_cv) self.Kxc_sGG = calculate_Kxc( self.gd, # global grid self.gd.zero_pad(calc.density.nt_sG), self.npw, self.Gvec_Gc, self.gd.N_c, self.vol, self.bcell_cv, R_av, calc.wfs.setups, calc.density.D_asp, functional=self.xc, density_cut=self.density_cut, ) self.printtxt("Finished %s kernel ! " % self.xc) return
def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii, m1, m2, spins): wfs = self.calc.wfs if self.keep_occupied_states: self.mykpts = [self.get_k_point(s, K, n1, n2) for s, K, n1, n2 in self.mysKn1n2] numberofkpts = len(self.mysKn1n2) if self.eta == 0.0: update = self.update_hermitian elif self.hilbert: update = self.update_hilbert else: update = self.update q_c = pd.kd.bzk_kc[0] optical_limit = np.allclose(q_c, 0.0) print("\n Starting summation", file=self.fd) self.timer.start("Loop") # kpt1 occupied and kpt2 empty: for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2): if self.keep_occupied_states: kpt1 = self.mykpts[kn] else: kpt1 = self.get_k_point(s, K, n1, n2) if not kpt1.s in spins: continue with self.timer("k+q"): K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0] with self.timer("get k2"): kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True) with self.timer("fft-indices"): Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd, kpt1.shift_c - kpt2.shift_c) for n in range(kpt1.n2 - kpt1.n1): eps1 = kpt1.eps_n[n] # Only update if there exists deps <= omegamax if not self.omegamax is None: m = [m for m, d in enumerate(eps1 - kpt2.eps_n) if abs(d) <= self.omegamax] else: m = range(len(kpt2.eps_n)) if not len(m): continue deps_m = (eps1 - kpt2.eps_n)[m] f1 = kpt1.f_n[n] with self.timer("conj"): ut1cc_R = kpt1.ut_nR[n].conj() with self.timer("paw"): C1_aGi = [np.dot(Q_Gii, P1_ni[n].conj()) for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani)] n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2, pd, Q_G)[m] df_m = (f1 - kpt2.f_n)[m] # This is not quite right for degenerate partially occupied # bands, but good enough for now: df_m[df_m < 0] = 0.0 if optical_limit: self.update_optical_limit(n, m, kpt1, kpt2, deps_m, df_m, n_mG, chi0_wxvG, chi0_wvv) update(n_mG, deps_m, df_m, chi0_wGG) if optical_limit and self.intraband: # Avoid that more ranks are summing up # the intraband contributions if kpt1.n1 == 0: self.update_intraband(kpt2, chi0_wvv) if numberofkpts > 10 and kn % (numberofkpts // 10) == 0: print( " %s," % ctime() + " local Kpoint no: %d / %d," % (kn, numberofkpts) + "\n mem. used.: " + "%f M / cpu" % (maxrss() / 1024 ** 2), file=self.fd, ) self.timer.stop("Loop") print( " %s, Finished kpoint sum" % ctime() + "\n mem. used.: " + "%f M / cpu" % (maxrss() / 1024 ** 2), file=self.fd, ) with self.timer("Sum CHI_0"): for chi0_GG in chi0_wGG: self.kncomm.sum(chi0_GG) if optical_limit: self.world.sum(chi0_wxvG) self.world.sum(chi0_wvv) if self.intraband: self.world.sum(self.chi0_vv) print( " %s, Finished summation over ranks" % ctime() + "\n mem. used.: " + "%f M / cpu" % (maxrss() / 1024 ** 2), file=self.fd, ) if self.eta == 0.0 or (self.hilbert and self.blockcomm.size == 1): # Fill in upper/lower triangle also: nG = pd.ngmax il = np.tril_indices(nG, -1) iu = il[::-1] if self.hilbert: for chi0_GG in chi0_wGG: chi0_GG[il] = chi0_GG[iu].conj() else: for chi0_GG in chi0_wGG: chi0_GG[iu] = chi0_GG[il].conj() if self.hilbert: with self.timer("Hilbert transform"): ht = HilbertTransform(self.omega_w, self.eta, self.timeordered) ht(chi0_wGG) if optical_limit: ht(chi0_wvv) ht(chi0_wxvG) print("Hilbert transform done", file=self.fd) if optical_limit and self.intraband: # Add intraband contribution omega_w = self.omega_w.copy() if omega_w[0] == 0.0: omega_w[0] = 1e-14 chi0_wvv += self.chi0_vv[np.newaxis] / ( omega_w[:, np.newaxis, np.newaxis] * (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta) ) return pd, chi0_wGG, chi0_wxvG, chi0_wvv
def initialize(self, simple_version=False): self.printtxt('') self.printtxt('-----------------------------------------') self.printtxt('Response function calculation started at:') self.starttime = time() self.printtxt(ctime()) BASECHI.initialize(self) # Frequency init self.dw = None if len(self.w_w) == 1: self.hilbert_trans = False if self.hilbert_trans: self.dw = self.w_w[1] - self.w_w[0] # assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid assert self.w_w.max() == self.w_w[-1] self.dw /= Hartree self.w_w /= Hartree self.wmax = self.w_w[-1] self.wcut = self.wmax + 5. / Hartree # self.Nw = int(self.wmax / self.dw) + 1 self.Nw = len(self.w_w) self.NwS = int(self.wcut / self.dw) + 1 else: self.Nw = len(self.w_w) self.NwS = 0 if len(self.w_w) > 2: self.dw = self.w_w[1] - self.w_w[0] assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() self.dw /= Hartree self.nvalbands = self.nbands tmpn = np.zeros(self.nspins, dtype=int) for spin in range(self.nspins): for n in range(self.nbands): if (self.f_skn[spin][:, n] - self.ftol < 0).all(): tmpn[spin] = n break if tmpn.max() > 0: self.nvalbands = tmpn.max() # Parallelization initialize self.parallel_init() # Printing calculation information self.print_chi() if extra_parameters.get('df_dry_run'): raise SystemExit calc = self.calc # For LCAO wfs if calc.input_parameters['mode'] == 'lcao': calc.initialize_positions() self.printtxt(' Max mem sofar : %f M / cpu' %(maxrss() / 1024**2)) if simple_version is True: return # PAW part init # calculate <phi_i | e**(-i(q+G).r) | phi_j> # G != 0 part self.phi_aGp, self.phiG0_avp = self.get_phi_aGp(alldir=True) self.printtxt('Finished phi_aGp !') mem = np.array([self.phi_aGp[i].size * 16 /1024.**2 for i in range(len(self.phi_aGp))]) self.printtxt(' Phi_aGp : %f M / cpu' %(mem.sum())) # Calculate ALDA kernel (not used in chi0) R_av = calc.atoms.positions / Bohr if self.xc == 'RPA': #type(self.w_w[0]) is float: self.Kc_GG = None self.printtxt('RPA calculation.') elif self.xc == 'ALDA' or self.xc == 'ALDA_X': #self.Kc_GG = calculate_Kc(self.q_c, # self.Gvec_Gc, # self.acell_cv, # self.bcell_cv, # self.calc.atoms.pbc, # self.vcut) # Initialize a CoulombKernel instance kernel = CoulombKernel(vcut=self.vcut, pbc=self.calc.atoms.pbc, cell=self.acell_cv) self.Kc_GG = kernel.calculate_Kc(self.q_c, self.Gvec_Gc, self.bcell_cv) self.Kxc_sGG = calculate_Kxc(self.gd, # global grid self.gd.zero_pad(calc.density.nt_sG), self.npw, self.Gvec_Gc, self.gd.N_c, self.vol, self.bcell_cv, R_av, calc.wfs.setups, calc.density.D_asp, functional=self.xc, density_cut=self.density_cut) self.printtxt('Finished %s kernel ! ' % self.xc) return
def print_chi(self, pd): calc = self.calc gd = calc.wfs.gd if extra_parameters.get('df_dry_run'): from gpaw.mpi import DryRunCommunicator size = extra_parameters['df_dry_run'] world = DryRunCommunicator(size) else: world = self.world print('%s' % ctime(), file=self.fd) print('Called response.chi0.calculate with', file=self.fd) q_c = pd.kd.bzk_kc[0] print(' q_c: [%f, %f, %f]' % (q_c[0], q_c[1], q_c[2]), file=self.fd) nw = len(self.omega_w) print(' Number of frequency points: %d' % nw, file=self.fd) ecut = self.ecut * Hartree print(' Planewave cutoff: %f' % ecut, file=self.fd) ns = calc.wfs.nspins print(' Number of spins: %d' % ns, file=self.fd) nbands = self.nbands print(' Number of bands: %d' % nbands, file=self.fd) nk = calc.wfs.kd.nbzkpts print(' Number of kpoints: %d' % nk, file=self.fd) nik = calc.wfs.kd.nibzkpts print(' Number of irredicible kpoints: %d' % nik, file=self.fd) ngmax = pd.ngmax print(' Number of planewaves: %d' % ngmax, file=self.fd) eta = self.eta * Hartree print(' Broadening (eta): %f' % eta, file=self.fd) wsize = world.size print(' world.size: %d' % wsize, file=self.fd) knsize = self.kncomm.size print(' kncomm.size: %d' % knsize, file=self.fd) bsize = self.blockcomm.size print(' blockcomm.size: %d' % bsize, file=self.fd) nocc = self.nocc1 print(' Number of completely occupied states: %d' % nocc, file=self.fd) npocc = self.nocc2 print(' Number of partially occupied states: %d' % npocc, file=self.fd) keep = self.keep_occupied_states print(' Keep occupied states: %s' % keep, file=self.fd) print('', file=self.fd) print(' Memory estimate of potentially large arrays:', file=self.fd) chisize = nw * pd.ngmax**2 * 16. / 1024**2 print(' chi0_wGG: %f M / cpu' % chisize, file=self.fd) ngridpoints = gd.N_c[0] * gd.N_c[1] * gd.N_c[2] if self.keep_occupied_states: nstat = (ns * nk * npocc + world.size - 1) // world.size else: nstat = (ns * npocc + world.size - 1) // world.size occsize = nstat * ngridpoints * 16. / 1024**2 print(' Occupied states: %f M / cpu' % occsize, file=self.fd) print(' Memory usage before allocation: %f M / cpu' % (maxrss() / 1024**2), file=self.fd) print('', file=self.fd)
def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, m1, m2, spins): # Choose which update method to use if self.eta == 0.0: update = self.update_hermitian elif self.hilbert: update = self.update_hilbert else: update = self.update q_c = pd.kd.bzk_kc[0] optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0) generator = self.generate_pair_densities # Use symmetries PWSA = PWSymmetryAnalyzer PWSA = PWSA(self.calc.wfs.kd, pd, disable_point_group=self.disable_point_group, disable_time_reversal=self.disable_time_reversal, timer=self.timer, txt=self.fd) # If chi's are supplied it # is assumed that they are symmetric # and we have to divide by the number of # symmetries if we are adding # the unsymmetric chi if self.unsymmetrized: nsym = PWSA.how_many_symmetries() if nsym > 1: chi0_wGG /= nsym if chi0_wxvG is not None: chi0_wxvG /= nsym if chi0_wvv is not None: chi0_wvv /= nsym # Calculate unsymmetrized chi or spectral function self.timer.start('Loop') for f2_m, df_m, deps_m, n_mG, n_mv, vel_mv in \ generator(pd, m1, m2, spins, PWSA=PWSA, disable_optical_limit=not optical_limit, intraband=self.intraband, use_more_memory=self.use_more_memory, unsymmetrized=self.unsymmetrized): # If the generator returns None for a pair-density # then skip updating if n_mG is not None: update(np.ascontiguousarray(n_mG), deps_m, df_m, chi0_wGG) if optical_limit and n_mv is not None: self.update_optical_limit(n_mv, deps_m, df_m, n_mG, chi0_wxvG, chi0_wvv) if optical_limit and self.intraband and vel_mv is not None: self.update_intraband(f2_m, vel_mv, self.chi0_vv) self.timer.stop('Loop') # Sum chi with self.timer('Sum CHI_0'): for chi0_GG in chi0_wGG: self.kncomm.sum(chi0_GG) if optical_limit: self.kncomm.sum(chi0_wxvG) self.kncomm.sum(chi0_wvv) if self.intraband: self.kncomm.sum(self.chi0_vv) print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2), file=self.fd) if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1: # Fill in upper/lower triangle also: nG = pd.ngmax il = np.tril_indices(nG, -1) iu = il[::-1] if self.hilbert: for chi0_GG in chi0_wGG: chi0_GG[il] = chi0_GG[iu].conj() else: for chi0_GG in chi0_wGG: chi0_GG[iu] = chi0_GG[il].conj() if self.hilbert: with self.timer('Hilbert transform'): ht = HilbertTransform(self.omega_w, self.eta, self.timeordered) ht(chi0_wGG) if optical_limit: ht(chi0_wvv) ht(chi0_wxvG) print('Hilbert transform done', file=self.fd) if optical_limit and self.intraband: # Add intraband contribution omega_w = self.omega_w.copy() if omega_w[0] == 0.0: omega_w[0] = 1e-14 chi0_vv = self.chi0_vv self.world.broadcast(chi0_vv, 0) chi0_wvv += (chi0_vv[np.newaxis] / (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta)**2) if self.unsymmetrized: # Carry out symmetrization # Redistribute if block par tmpchi0_wGG = self.redistribute(chi0_wGG) PWSA.symmetrize_wGG(tmpchi0_wGG) self.redistribute(tmpchi0_wGG, chi0_wGG) if optical_limit: PWSA.symmetrize_wxvG(chi0_wxvG) PWSA.symmetrize_wvv(chi0_wvv) # Since chi_wGG is nonanalytic in the head # and wings we have to take care that # these are handled correctly. Note that # it is important that the wings are overwritten first. chi0_wGG[:, :, 0] = chi0_wxvG[:, 1, 0, self.Ga:self.Gb] if self.blockcomm.rank == 0: chi0_wGG[:, 0] = chi0_wxvG[:, 0, 0] chi0_wGG[:, 0, 0] = chi0_wvv[:, 0, 0] return pd, chi0_wGG, chi0_wxvG, chi0_wvv
def calculate(self): calc = self.calc focc_S = self.focc_S e_S = self.e_S op_scc = calc.wfs.kd.symmetry.op_scc # Get phi_qaGp if self.mode == 'RPA': self.phi_aGp = self.get_phi_aGp() else: fd = opencew('phi_qaGp') if fd is None: self.reader = Reader('phi_qaGp') tmp = self.load_phi_aGp(self.reader, 0)[0] assert len(tmp) == self.npw self.printtxt('Finished reading phi_aGp') else: self.printtxt('Calculating phi_qaGp') self.get_phi_qaGp() world.barrier() self.reader = Reader('phi_qaGp') self.printtxt('Memory used %f M' % (maxrss() / 1024.**2)) self.printtxt('') if self.optical_limit: iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0] else: iq = np.where( np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0] kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)]) if self.optical_limit: kc_G[0] = 0. # Get screened Coulomb kernel if self.mode == 'BSE': try: # Read data = pickle.load(open(self.kernel_file + '.pckl')) W_qGG = data['W_qGG'] assert np.shape(W_qGG) == np.shape(self.V_qGG) self.printtxt('Finished reading screening interaction kernel') except: # Calculate from scratch self.printtxt('Calculating screening interaction kernel.') W_qGG = self.full_static_screened_interaction() self.printtxt('') else: W_qGG = self.V_qGG t0 = time() self.printtxt('Calculating %s matrix elements' % self.mode) # Calculate full kernel K_SS = np.zeros((self.nS_local, self.nS), dtype=complex) self.rhoG0_S = np.zeros(self.nS, dtype=complex) #noGmap = 0 for iS in range(self.nS_start, self.nS_end): k1, n1, m1 = self.Sindex_S3[iS] rho1_G = self.density_matrix(n1, m1, k1) self.rhoG0_S[iS] = rho1_G[0] for jS in range(self.nS): k2, n2, m2 = self.Sindex_S3[jS] rho2_G = self.density_matrix(n2, m2, k2) K_SS[iS - self.nS_start, jS] = np.sum(rho1_G.conj() * rho2_G * kc_G) if not self.mode == 'RPA': rho3_G = self.density_matrix(n1, n2, k1, k2) rho4_G = self.density_matrix(m1, m2, self.kq_k[k1], self.kq_k[k2]) q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1] q_c[np.where(q_c > 0.501)] -= 1. q_c[np.where(q_c < -0.499)] += 1. iq = self.kd.where_is_q(q_c, self.bzq_qc) if not self.qsymm: W_GG = W_qGG[iq] else: ibzq = self.ibzq_q[iq] W_GG_tmp = W_qGG[ibzq] iop = self.iop_q[iq] timerev = self.timerev_q[iq] diff_c = self.diff_qc[iq] invop = np.linalg.inv(op_scc[iop]) Gindex = np.zeros(self.npw, dtype=int) for iG in range(self.npw): G_c = self.Gvec_Gc[iG] if timerev: RotG_c = -np.int8( np.dot(invop, G_c + diff_c).round()) else: RotG_c = np.int8( np.dot(invop, G_c + diff_c).round()) tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1) try: Gindex[iG] = np.where(tmp_G < 1e-5)[0][0] except: #noGmap += 1 Gindex[iG] = -1 W_GG = np.zeros_like(W_GG_tmp) for iG in range(self.npw): for jG in range(self.npw): if Gindex[iG] == -1 or Gindex[jG] == -1: W_GG[iG, jG] = 0 else: W_GG[iG, jG] = W_GG_tmp[Gindex[iG], Gindex[jG]] if self.mode == 'BSE': tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_GG) else: tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG) K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_G) self.timing(iS, t0, self.nS_local, 'pair orbital') K_SS /= self.vol world.sum(self.rhoG0_S) #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap) # Get and solve Hamiltonian H_sS = np.zeros_like(K_SS) for iS in range(self.nS_start, self.nS_end): H_sS[iS - self.nS_start, iS] = e_S[iS] for jS in range(self.nS): H_sS[iS - self.nS_start, jS] += focc_S[iS] * K_SS[iS - self.nS_start, jS] # Force matrix to be Hermitian if not self.coupling: if world.size > 1: H_Ss = self.redistribute_H(H_sS) else: H_Ss = H_sS H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * ( np.imag(H_sS) - np.imag(H_Ss.T)) / 2. # Save H_sS matrix self.par_save('H_SS', 'H_SS', H_sS) return H_sS
def calculate(self): calc = self.calc f_skn = self.f_skn e_skn = self.e_skn kq_k = self.kq_k focc_S = self.focc_S e_S = self.e_S op_scc = calc.wfs.symmetry.op_scc # Get phi_qaGp if self.mode == 'RPA': self.phi_aGp = self.get_phi_aGp() else: try: self.reader = Reader('phi_qaGp') tmp = self.load_phi_aGp(self.reader, 0)[0] assert len(tmp) == self.npw self.printtxt('Finished reading phi_aGp') except: self.printtxt('Calculating phi_qaGp') self.get_phi_qaGp() world.barrier() self.reader = Reader('phi_qaGp') self.printtxt('Memory used %f M' % (maxrss() / 1024.**2)) self.printtxt('') if self.optical_limit: iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0] else: iq = np.where(np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0] kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)]) if self.optical_limit: kc_G[0] = 0. # Get screened Coulomb kernel if self.mode == 'BSE': try: # Read data = pickle.load(open(self.kernel_file+'.pckl')) W_qGG = data['W_qGG'] assert np.shape(W_qGG) == np.shape(self.V_qGG) self.printtxt('Finished reading screening interaction kernel') except: # Calculate from scratch self.printtxt('Calculating screening interaction kernel.') W_qGG = self.full_static_screened_interaction() self.printtxt('') else: W_qGG = self.V_qGG t0 = time() self.printtxt('Calculating %s matrix elements' % self.mode) # Calculate full kernel K_SS = np.zeros((self.nS_local, self.nS), dtype=complex) self.rhoG0_S = np.zeros(self.nS, dtype=complex) #noGmap = 0 for iS in range(self.nS_start, self.nS_end): k1, n1, m1 = self.Sindex_S3[iS] rho1_G = self.density_matrix(n1,m1,k1) self.rhoG0_S[iS] = rho1_G[0] for jS in range(self.nS): k2, n2, m2 = self.Sindex_S3[jS] rho2_G = self.density_matrix(n2,m2,k2) K_SS[iS-self.nS_start, jS] = np.sum(rho1_G.conj() * rho2_G * kc_G) if not self.mode == 'RPA': rho3_G = self.density_matrix(n1,n2,k1,k2) rho4_G = self.density_matrix(m1,m2,self.kq_k[k1], self.kq_k[k2]) q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1] q_c[np.where(q_c > 0.501)] -= 1. q_c[np.where(q_c < -0.499)] += 1. iq = self.kd.where_is_q(q_c, self.bzq_qc) if not self.qsymm: W_GG = W_qGG[iq] else: ibzq = self.ibzq_q[iq] W_GG_tmp = W_qGG[ibzq] iop = self.iop_q[iq] timerev = self.timerev_q[iq] diff_c = self.diff_qc[iq] invop = np.linalg.inv(op_scc[iop]) Gindex = np.zeros(self.npw, dtype=int) for iG in range(self.npw): G_c = self.Gvec_Gc[iG] if timerev: RotG_c = -np.int8(np.dot(invop, G_c+diff_c).round()) else: RotG_c = np.int8(np.dot(invop, G_c+diff_c).round()) tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1) try: Gindex[iG] = np.where(tmp_G < 1e-5)[0][0] except: #noGmap += 1 Gindex[iG] = -1 W_GG = np.zeros_like(W_GG_tmp) for iG in range(self.npw): for jG in range(self.npw): if Gindex[iG] == -1 or Gindex[jG] == -1: W_GG[iG, jG] = 0 else: W_GG[iG, jG] = W_GG_tmp[Gindex[iG], Gindex[jG]] if self.mode == 'BSE': tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_GG) else: tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG) K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_G) self.timing(iS, t0, self.nS_local, 'pair orbital') K_SS /= self.vol world.sum(self.rhoG0_S) #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap) # Get and solve Hamiltonian H_sS = np.zeros_like(K_SS) for iS in range(self.nS_start, self.nS_end): H_sS[iS-self.nS_start,iS] = e_S[iS] for jS in range(self.nS): H_sS[iS-self.nS_start,jS] += focc_S[iS] * K_SS[iS-self.nS_start,jS] # Force matrix to be Hermitian if not self.coupling: if world.size > 1: H_Ss = self.redistribute_H(H_sS) else: H_Ss = H_sS H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * (np.imag(H_sS) - np.imag(H_Ss.T)) /2. # Save H_sS matrix self.par_save('H_SS','H_SS', H_sS) return H_sS
def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii, m1, m2, spins): wfs = self.calc.wfs if self.keep_occupied_states: self.mykpts = [self.get_k_point(s, K, n1, n2) for s, K, n1, n2 in self.mysKn1n2] if self.eta == 0.0: update = self.update_hermitian elif self.hilbert: update = self.update_hilbert else: update = self.update q_c = pd.kd.bzk_kc[0] optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0) pb = ProgressBar(self.fd) self.timer.start('Loop') # kpt1 occupied and kpt2 empty: for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2): pb.update(kn / len(self.mysKn1n2)) if self.keep_occupied_states: kpt1 = self.mykpts[kn] else: kpt1 = self.get_k_point(s, K, n1, n2) if kpt1.s not in spins: continue with self.timer('k+q'): K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0] with self.timer('get k2'): kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True) with self.timer('fft-indices'): Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd, kpt1.shift_c - kpt2.shift_c) for n in range(kpt1.n2 - kpt1.n1): eps1 = kpt1.eps_n[n] # Only update if there exists deps <= omegamax if self.omegamax is not None: m = [m for m, d in enumerate(eps1 - kpt2.eps_n) if abs(d) <= self.omegamax] else: m = range(0, kpt2.n2 - kpt2.n1) if not len(m): continue deps_m = (eps1 - kpt2.eps_n)[m] f1 = kpt1.f_n[n] with self.timer('conj'): ut1cc_R = kpt1.ut_nR[n].conj() with self.timer('paw'): C1_aGi = [np.dot(Q_Gii, P1_ni[n].conj()) for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani)] n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2, pd, Q_G)[m] df_m = (f1 - kpt2.f_n)[m] # This is not quite right for degenerate partially occupied # bands, but good enough for now: df_m[df_m <= 1e-20] = 0.0 if optical_limit: self.update_optical_limit( n, m, kpt1, kpt2, deps_m, df_m, n_mG, chi0_wxvG, chi0_wvv) update(n_mG, deps_m, df_m, chi0_wGG) if optical_limit and self.intraband: # Avoid that more ranks are summing up # the intraband contributions if kpt1.n1 == 0 and self.blockcomm.rank == 0: assert self.nocc2 <= kpt2.nb, \ print('Error: Too few unoccupied bands') self.update_intraband(kpt2) self.timer.stop('Loop') pb.finish() with self.timer('Sum CHI_0'): for chi0_GG in chi0_wGG: self.kncomm.sum(chi0_GG) if optical_limit: self.kncomm.sum(chi0_wxvG) self.kncomm.sum(chi0_wvv) if self.intraband: self.kncomm.sum(self.chi0_vv) print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2), file=self.fd) if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1: # Fill in upper/lower triangle also: nG = pd.ngmax il = np.tril_indices(nG, -1) iu = il[::-1] if self.hilbert: for chi0_GG in chi0_wGG: chi0_GG[il] = chi0_GG[iu].conj() else: for chi0_GG in chi0_wGG: chi0_GG[iu] = chi0_GG[il].conj() if self.hilbert: with self.timer('Hilbert transform'): ht = HilbertTransform(self.omega_w, self.eta, self.timeordered) ht(chi0_wGG) if optical_limit: ht(chi0_wvv) ht(chi0_wxvG) print('Hilbert transform done', file=self.fd) if optical_limit and self.intraband: # Add intraband contribution omega_w = self.omega_w.copy() if omega_w[0] == 0.0: omega_w[0] = 1e-14 chi0_vv = self.chi0_vv self.world.broadcast(chi0_vv, 0) chi0_wvv += (chi0_vv[np.newaxis] / (omega_w[:, np.newaxis, np.newaxis] * (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta))) return pd, chi0_wGG, chi0_wxvG, chi0_wvv
def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii, m1, m2, spins): wfs = self.calc.wfs if self.keep_occupied_states: self.mykpts = [ self.get_k_point(s, K, n1, n2) for s, K, n1, n2 in self.mysKn1n2 ] if self.eta == 0.0: update = self.update_hermitian elif self.hilbert: update = self.update_hilbert else: update = self.update q_c = pd.kd.bzk_kc[0] optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0) pb = ProgressBar(self.fd) self.timer.start('Loop') # kpt1 occupied and kpt2 empty: for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2): pb.update(kn / len(self.mysKn1n2)) if self.keep_occupied_states: kpt1 = self.mykpts[kn] else: kpt1 = self.get_k_point(s, K, n1, n2) if kpt1.s not in spins: continue with self.timer('k+q'): K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0] with self.timer('get k2'): kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True) with self.timer('fft-indices'): Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd, kpt1.shift_c - kpt2.shift_c) for n in range(kpt1.n2 - kpt1.n1): eps1 = kpt1.eps_n[n] # Only update if there exists deps <= omegamax if self.omegamax is not None: m = [ m for m, d in enumerate(eps1 - kpt2.eps_n) if abs(d) <= self.omegamax ] else: m = range(0, kpt2.n2 - kpt2.n1) if not len(m): continue deps_m = (eps1 - kpt2.eps_n)[m] f1 = kpt1.f_n[n] with self.timer('conj'): ut1cc_R = kpt1.ut_nR[n].conj() with self.timer('paw'): C1_aGi = [ np.dot(Q_Gii, P1_ni[n].conj()) for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani) ] n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2, pd, Q_G)[m] df_m = (f1 - kpt2.f_n)[m] # This is not quite right for degenerate partially occupied # bands, but good enough for now: df_m[df_m <= 1e-20] = 0.0 if optical_limit: self.update_optical_limit(n, m, kpt1, kpt2, deps_m, df_m, n_mG, chi0_wxvG, chi0_wvv) update(n_mG, deps_m, df_m, chi0_wGG) if optical_limit and self.intraband: # Avoid that more ranks are summing up # the intraband contributions if kpt1.n1 == 0 and self.blockcomm.rank == 0: assert self.nocc2 <= kpt2.nb, \ print('Error: Too few unoccupied bands') self.update_intraband(kpt2) self.timer.stop('Loop') pb.finish() with self.timer('Sum CHI_0'): for chi0_GG in chi0_wGG: self.kncomm.sum(chi0_GG) if optical_limit: self.kncomm.sum(chi0_wxvG) self.kncomm.sum(chi0_wvv) if self.intraband: self.kncomm.sum(self.chi0_vv) print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2), file=self.fd) if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1: # Fill in upper/lower triangle also: nG = pd.ngmax il = np.tril_indices(nG, -1) iu = il[::-1] if self.hilbert: for chi0_GG in chi0_wGG: chi0_GG[il] = chi0_GG[iu].conj() else: for chi0_GG in chi0_wGG: chi0_GG[iu] = chi0_GG[il].conj() if self.hilbert: with self.timer('Hilbert transform'): ht = HilbertTransform(self.omega_w, self.eta, self.timeordered) ht(chi0_wGG) if optical_limit: ht(chi0_wvv) ht(chi0_wxvG) print('Hilbert transform done', file=self.fd) if optical_limit and self.intraband: # Add intraband contribution omega_w = self.omega_w.copy() if omega_w[0] == 0.0: omega_w[0] = 1e-14 chi0_vv = self.chi0_vv self.world.broadcast(chi0_vv, 0) chi0_wvv += ( chi0_vv[np.newaxis] / (omega_w[:, np.newaxis, np.newaxis] * (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta))) return pd, chi0_wGG, chi0_wxvG, chi0_wvv