def update_hilbert(self, n_mG, deps_m, df_m, chi0_wGG): """Update spectral function. Updates spectral function A_wGG and saves it to chi0_wGG for later hilbert-transform.""" self.timer.start('prep') beta = (2**0.5 - 1) * self.domega0 / self.omega2 o_m = abs(deps_m) w_m = (o_m / (self.domega0 + beta * o_m)).astype(int) o1_m = self.omega_w[w_m] o2_m = self.omega_w[w_m + 1] p_m = self.prefactor * abs(df_m) / (o2_m - o1_m)**2 # XXX abs()? p1_m = p_m * (o2_m - o_m) p2_m = p_m * (o_m - o1_m) self.timer.stop('prep') if self.blockcomm.size > 1: for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): myn_G = n_G[self.Ga:self.Gb].reshape((-1, 1)) gemm(p1, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w], 'c') gemm(p2, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w + 1], 'c') return for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): czher(p1, n_G.conj(), chi0_wGG[w]) czher(p2, n_G.conj(), chi0_wGG[w + 1])
def update_hilbert(self, n_mG, deps_m, wd, chi0_wGG): """Update spectral function. Updates spectral function A_wGG and saves it to chi0_wGG for later hilbert-transform.""" self.timer.start('prep') omega_w = wd.get_data() deps_m += self.eshift * np.sign(deps_m) o_m = abs(deps_m) w_m = wd.get_closest_index(o_m) o1_m = omega_w[w_m] o2_m = omega_w[w_m + 1] p_m = np.abs(1 / (o2_m - o1_m)**2) p1_m = p_m * (o2_m - o_m) p2_m = p_m * (o_m - o1_m) self.timer.stop('prep') if self.blockcomm.size > 1: for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): if w + 1 < wd.wmax: # The last frequency is not reliable myn_G = n_G[self.Ga:self.Gb].reshape((-1, 1)) gemm(p1, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w], 'c') gemm(p2, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w + 1], 'c') return for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): if w + 1 < wd.wmax: # The last frequency is not reliable czher(p1, n_G.conj(), chi0_wGG[w]) czher(p2, n_G.conj(), chi0_wGG[w + 1])
def update_hilbert(self, n_mG, deps_m, df_m, chi0_wGG): self.timer.start("prep") beta = (2 ** 0.5 - 1) * self.domega0 / self.omega2 o_m = abs(deps_m) w_m = (o_m / (self.domega0 + beta * o_m)).astype(int) o1_m = self.omega_w[w_m] o2_m = self.omega_w[w_m + 1] p_m = self.prefactor * abs(df_m) / (o2_m - o1_m) ** 2 # XXX abs()? p1_m = p_m * (o2_m - o_m) p2_m = p_m * (o_m - o1_m) self.timer.stop("prep") if self.blockcomm.size > 1: for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): myn_G = n_G[self.Ga : self.Gb].reshape((-1, 1)) gemm(p1, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w], "c") gemm(p2, n_G.reshape((-1, 1)), myn_G, 1.0, chi0_wGG[w + 1], "c") # chi0_wGG[w + 1] += p2 * np.outer(myn_G, n_G.conj()) return for p1, p2, n_G, w in zip(p1_m, p2_m, n_mG, w_m): czher(p1, n_G.conj(), chi0_wGG[w]) czher(p2, n_G.conj(), chi0_wGG[w + 1])
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype=complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if self.f_skn[spin][ibzkpt1, n] - self.ftol < 0: continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g, k, index1_g, phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(" ", k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1, 1, 1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if ( np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1 / Hartree ): rho_G[0] /= ( self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] ) rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0.0 rhoG0_v[:] = 0.0 if k_pad: rho_G[:] = 0.0 if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0, :] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0, :] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = 1.0 / ( w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta ) - 1.0 / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m] w0 = e_skn[spin][ibzkpt2, m] - e_skn[spin][ibzkpt1, n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0 / self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local]) if self.wScomm.rank == (w0_id + 1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id + 1) % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id + 1) % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id + 1) % self.NwS_local]) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt("Finished n 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt( "Finished n %d in %d seconds, estimate %d seconds left." % (n, dt, totaltime - dt) ) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt("Finished k 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt( "Finished k %d in %d seconds, estimate %d seconds left. " % (k, dt, totaltime - dt) ) self.printtxt("Finished summation over k") self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0, 1:, 0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2.0 assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform( specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.printtxt("Finished hilbert transform !") del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw ** 2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw ** 2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt("Finished Slice Along Frequency !") chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[ : self.Nw ] self.printtxt("Finished hilbert transform !") self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt("Finished Slice along orbitals !") self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform( specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.chi0G0_wGv = hilbert_transform( specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt("") self.printtxt("Finished chi0 !")
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype = complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if (self.f_skn[spin][ibzkpt1, n] - self.ftol < 0): continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g,k,index1_g,phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(' ', k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1,1,1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1/Hartree: rho_G[0] /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0. rhoG0_v[:] = 0. if k_pad: rho_G[:] = 0. if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0,:] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0,:] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = ( 1. / (w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta) - 1. / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) ) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1,n] - f_skn[spin][ibzkpt2,m] w0 = e_skn[spin][ibzkpt2,m] - e_skn[spin][ibzkpt1,n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0/self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local] ) if self.wScomm.rank == (w0_id+1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id+1) % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id+1) % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id+1) % self.NwS_local] ) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt('Finished n 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime) ) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt('Finished n %d in %d seconds, estimate %d seconds left.'%(n, dt, totaltime-dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt('Finished k 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt('Finished k %d in %d seconds, estimate %d seconds left. '%(k, dt, totaltime - dt) ) self.printtxt('Finished summation over k') self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0,1:,0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2. assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform(specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform(specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.chi0G0_wGv = hilbert_transform(specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt('') self.printtxt('Finished chi0 !')
from gpaw.utilities.blas import czher, axpy import numpy as np from time import time alpha = 0.5 x = np.random.rand(3) + 1j * np.random.rand(3) a = np.random.rand(9).reshape(3, 3) + np.random.rand(9).reshape(3, 3) * 1j # make a hermitian for i in range(3): for j in range(3): a[i, j] = a[j, i].conj() a[i, i] = np.real(a[i, i]) b = alpha * np.outer(x.conj(), x) + a czher(alpha, x, a) for i in range(3): for j in range(i, 3): a[j, i] = a[i, j].conj() assert np.abs(b - a).sum() < 1e-14 # testing speed t_czher = 0 t_axpy = 0 for i in np.arange(1000): t0 = time() czher(alpha, x, a) t_czher += time() - t0
from gpaw.utilities.blas import czher, axpy import numpy as np from time import time alpha = 0.5 x = np.random.rand(3) + 1j * np.random.rand(3) a = np.random.rand(9).reshape(3,3) + np.random.rand(9).reshape(3,3) * 1j # make a hermitian for i in range(3): for j in range(3): a[i,j] = a[j,i].conj() a[i,i] = np.real(a[i,i]) b = alpha * np.outer(x.conj(), x) + a czher(alpha, x, a) for i in range(3): for j in range(i,3): a[j,i] = a[i,j].conj() assert np.abs(b-a).sum() < 1e-14 # testing speed t_czher = 0 t_axpy = 0 for i in np.arange(1000): t0 = time() czher(alpha, x, a) t_czher += time() - t0
def spectral_function_integration(self, domain=None, integrand=None, x=None, kwargs=None, out_wxx=None): """Integrate response function. Assume that the integral has the form of a response function. For the linear tetrahedron method it is possible calculate frequency dependent weights and do a point summation using these weights.""" if out_wxx is None: raise NotImplementedError nG = out_wxx.shape[2] mynG = (nG + self.blockcomm.size - 1) // self.blockcomm.size self.Ga = min(self.blockcomm.rank * mynG, nG) self.Gb = min(self.Ga + mynG, nG) # assert mynG * (self.blockcomm.size - 1) < nG, \ # print('mynG', mynG, 'nG', nG, 'nblocks', self.blockcomm.size) # Input domain td = self.tesselate(domain[0]) args = domain[1:] get_matrix_element, get_eigenvalues = integrand # The kwargs contain any constant # arguments provided by the user if kwargs is not None: get_matrix_element = partial(get_matrix_element, **kwargs[0]) get_eigenvalues = partial(get_eigenvalues, **kwargs[1]) # Relevant quantities bzk_kc = td.points nk = len(bzk_kc) with self.timer('pts'): # Point to simplex pts_k = [[] for n in range(nk)] for s, K_k in enumerate(td.simplices): A_kv = np.append(td.points[K_k], np.ones(4)[:, np.newaxis], axis=1) D_kv = np.append((A_kv[:, :-1]**2).sum(1)[:, np.newaxis], A_kv, axis=1) a = np.linalg.det(D_kv[:, np.arange(5) != 0]) if np.abs(a) < 1e-10: continue for K in K_k: pts_k[K].append(s) # Change to numpy arrays: for k in range(nk): pts_k[k] = np.array(pts_k[k], int) with self.timer('neighbours'): # Nearest neighbours neighbours_k = [None for n in range(nk)] for k in range(nk): neighbours_k[k] = np.unique(td.simplices[pts_k[k]]) # Distribute everything myterms_t = self.distribute_domain(list(args) + [list(range(nk))]) with self.timer('eigenvalues'): # Store eigenvalues deps_tMk = None # t for term shape = [len(domain_l) for domain_l in args] nterms = int(np.prod(shape)) for t in range(nterms): if len(shape) == 0: arguments = () else: arguments = np.unravel_index(t, shape) for K in range(nk): k_c = bzk_kc[K] deps_M = -get_eigenvalues(k_c, *arguments) if deps_tMk is None: deps_tMk = np.zeros([nterms] + list(deps_M.shape) + [nk], float) deps_tMk[t, :, K] = deps_M omega_w = x.get_data() # Calculate integrations weight pb = ProgressBar(self.fd) for _, arguments in pb.enumerate(myterms_t): K = arguments[-1] if len(shape) == 0: t = 0 else: t = np.ravel_multi_index(arguments[:-1], shape) deps_Mk = deps_tMk[t] teteps_Mk = deps_Mk[:, neighbours_k[K]] i0_M, i1_M = x.get_index_range(teteps_Mk.min(1), teteps_Mk.max(1)) n_MG = get_matrix_element(bzk_kc[K], *arguments[:-1]) for n_G, deps_k, i0, i1 in zip(n_MG, deps_Mk, i0_M, i1_M): if i0 == i1: continue W_w = self.get_kpoint_weight(K, deps_k, pts_k, omega_w[i0:i1], td) for iw, weight in enumerate(W_w): if self.blockcomm.size > 1: myn_G = n_G[self.Ga:self.Gb].reshape((-1, 1)) gemm(weight, n_G.reshape((-1, 1)), myn_G, 1.0, out_wxx[i0 + iw], 'c') else: czher(weight, n_G.conj(), out_wxx[i0 + iw]) self.kncomm.sum(out_wxx) if self.blockcomm.size == 1: # Fill in upper/lower triangle also: nx = out_wxx.shape[1] il = np.tril_indices(nx, -1) iu = il[::-1] for out_xx in out_wxx: out_xx[il] = out_xx[iu].conj()