def calculate_residuals(self, kpt, wfs, hamiltonian, psit_xG, P_axi, eps_x, R_xG, n_x=None, calculate_change=False): """Calculate residual. From R=Ht*psit calculate R=H*psit-eps*S*psit.""" for R_G, eps, psit_G in zip(R_xG, eps_x, psit_xG): axpy(-eps, psit_G, R_G) c_axi = {} for a, P_xi in P_axi.items(): dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) dO_ii = hamiltonian.setups[a].dO_ii c_xi = (np.dot(P_xi, dH_ii) - np.dot(P_xi * eps_x[:, np.newaxis], dO_ii)) c_axi[a] = c_xi hamiltonian.xc.add_correction(kpt, psit_xG, R_xG, P_axi, c_axi, n_x, calculate_change) wfs.pt.add(R_xG, c_axi, kpt.q)
def calculate_residuals(self, kpt, wfs, ham, psit, P, eps_n, R, C, n_x=None, calculate_change=False): """Calculate residual. From R=Ht*psit calculate R=H*psit-eps*S*psit.""" for R_G, eps, psit_G in zip(R.array, eps_n, psit.array): axpy(-eps, psit_G, R_G) ham.dH(P, out=C) for a, I1, I2 in P.indices: dS_ii = ham.setups[a].dO_ii C.array[..., I1:I2] -= np.dot((P.array[..., I1:I2].T * eps_n).T, dS_ii) ham.xc.add_correction(kpt, psit.array, R.array, {a: P_ni for a, P_ni in P.items()}, {a: C_ni for a, C_ni in C.items()}, n_x, calculate_change) wfs.pt.add(R.array, {a: C_ni for a, C_ni in C.items()}, kpt.q)
def __call__(self, residuals, kpt, ekin=None): nb = len(residuals) # number of bands phases = kpt.phase_cd step = self.step d0, q0 = self.scratch0[:,:nb] r1, d1, q1 = self.scratch1[:, :nb] r2, d2, q2 = self.scratch2[:, :nb] self.restrictor0(-residuals, r1, phases) d1[:] = 4 * step * r1 self.kin1.apply(d1, q1, phases) q1 -= r1 self.restrictor1(q1, r2, phases) d2 = 16 * step * r2 self.kin2.apply(d2, q2, phases) q2 -= r2 d2 -= 16 * step * q2 self.interpolator2(d2, q1, phases) d1 -= q1 self.kin1.apply(d1, q1, phases) q1 -= r1 d1 -= 4 * step * q1 self.interpolator1(-d1, d0, phases) self.kin0.apply(d0, q0, phases) q0 -= residuals axpy(-step, q0, d0) # d0 -= step * q0 d0 *= -1.0 return d0
def multi_zaxpy(self, a, x, y, nvec): if isinstance(a, (float, complex)): for i in range(nvec): axpy(a * (1 + 0J), x[i], y[i]) else: for i in range(nvec): axpy(a[i] * (1.0 + 0.0J), x[i], y[i])
def multi_zaxpy(self, a,x,y, nvec): if isinstance(a, (float, complex)): for i in range(nvec): axpy(a*(1+0J), x[i], y[i]) else: for i in range(nvec): axpy(a[i]*(1.0+0.0J), x[i], y[i])
def calculate_gga(self, e_g, nt_sg, v_sg, sigma_xg, dedsigma_xg): try: taut_sG = self.wfs.calculate_kinetic_energy_density() except RuntimeError: nspins = self.wfs.nspins # Initialize with von Weizsaecker kinetic energy density taut_sG = self.wfs.gd.empty((nspins)) gradn_g = self.gd.empty() for s in range(nspins): taut_g = self.gd.zeros() for v in range(3): self.grad_v[v](nt_sg[s], gradn_g) axpy(0.125, gradn_g**2, taut_g) ntinv_g = 0. * taut_g nt_ok = np.where(nt_sg[s] > 1e-7) ntinv_g[nt_ok] = 1.0 / nt_sg[s][nt_ok] taut_g *= ntinv_g self.restrict(taut_g, taut_sG[s]) taut_sg = np.empty_like(nt_sg) for taut_G, taut_g in zip(taut_sG, taut_sg): taut_G += 1.0 / self.wfs.nspins * self.tauct_G self.interpolate(taut_G, taut_g) dedtaut_sg = np.empty_like(nt_sg) self.kernel.calculate(e_g, nt_sg, v_sg, sigma_xg, dedsigma_xg, taut_sg, dedtaut_sg) self.dedtaut_sG = self.wfs.gd.empty(self.wfs.nspins) self.ekin = 0.0 for s in range(self.wfs.nspins): self.restrict(dedtaut_sg[s], self.dedtaut_sG[s]) self.ekin -= self.wfs.gd.integrate( self.dedtaut_sG[s] * (taut_sG[s] - self.tauct_G / self.wfs.nspins))
def apply_orbital_dependent_hamiltonian(self, kpt, psit_xG, Htpsit_xG, dH_asp): a_G = self.wfs.gd.empty(dtype=psit_xG.dtype) for psit_G, Htpsit_G in zip(psit_xG, Htpsit_xG): for v in range(3): self.taugrad_v[v](psit_G, a_G, kpt.phase_cd) self.taugrad_v[v](self.dedtaut_sG[kpt.s] * a_G, a_G, kpt.phase_cd) axpy(-0.5, a_G, Htpsit_G)
def calculate_kinetic_energy_density(self): if self.taugrad_v is None: self.taugrad_v = [ Gradient(self.gd, v, n=3, dtype=self.dtype).apply for v in range(3) ] assert not hasattr(self.kpt_u[0], 'c_on') if self.kpt_u[0].psit_nG is None: raise RuntimeError('No wavefunctions yet') if isinstance(self.kpt_u[0].psit_nG, FileReference): # XXX initialize raise RuntimeError('Wavefunctions have not been initialized.') taut_sG = self.gd.zeros(self.nspins) dpsit_G = self.gd.empty(dtype=self.dtype) for kpt in self.kpt_u: for f, psit_G in zip(kpt.f_n, kpt.psit_nG): for v in range(3): self.taugrad_v[v](psit_G, dpsit_G, kpt.phase_cd) axpy(0.5 * f, abs(dpsit_G)**2, taut_sG[kpt.s]) self.kd.comm.sum(taut_sG) self.band_comm.sum(taut_sG) return taut_sG
def __call__(self, residuals, kpt, ekin=None): nb = len(residuals) # number of bands phases = kpt.phase_cd step = self.step d0, q0 = self.scratch0[:, :nb] r1, d1, q1 = self.scratch1[:, :nb] r2, d2, q2 = self.scratch2[:, :nb] self.restrictor0(-residuals, r1, phases) d1[:] = 4 * step * r1 self.kin1.apply(d1, q1, phases) q1 -= r1 self.restrictor1(q1, r2, phases) d2 = 16 * step * r2 self.kin2.apply(d2, q2, phases) q2 -= r2 d2 -= 16 * step * q2 self.interpolator2(d2, q1, phases) d1 -= q1 self.kin1.apply(d1, q1, phases) q1 -= r1 d1 -= 4 * step * q1 self.interpolator1(-d1, d0, phases) self.kin0.apply(d0, q0, phases) q0 -= residuals axpy(-step, q0, d0) # d0 -= step * q0 d0 *= -1.0 return d0
def calculate_sigma(gd, grad_v, n_sg): """Calculate sigma(r) and grad n(r). _ __ _ 2 __ _ Returns sigma(r) = |\/ n(r)| and \/ n (r). With multiple spins, sigma has the three elements _ __ _ 2 sigma (r) = |\/ n (r)| , 0 up _ __ _ __ _ sigma (r) = \/ n (r) . \/ n (r) , 1 up dn _ __ _ 2 sigma (r) = |\/ n (r)| . 2 dn """ nspins = len(n_sg) gradn_svg = gd.empty((nspins, 3)) sigma_xg = gd.zeros(nspins * 2 - 1) for v in range(3): for s in range(nspins): grad_v[v](n_sg[s], gradn_svg[s, v]) axpy(1.0, gradn_svg[s, v]**2, sigma_xg[2 * s]) if nspins == 2: axpy(1.0, gradn_svg[0, v] * gradn_svg[1, v], sigma_xg[1]) return sigma_xg, gradn_svg
def apply_mgga_orbital_dependent_hamiltonian(self, kpt, psit_xG, Htpsit_xG, dH_asp, dedtaut_G): a_G = self.gd.empty(dtype=psit_xG.dtype) for psit_G, Htpsit_G in zip(psit_xG, Htpsit_xG): for v in range(3): self.taugrad_v[v](psit_G, a_G, kpt.phase_cd) self.taugrad_v[v](dedtaut_G * a_G, a_G, kpt.phase_cd) axpy(-0.5, a_G, Htpsit_G)
def apply_mgga_orbital_dependent_hamiltonian(self, kpt, psit_xG, Htpsit_xG, dH_asp, dedtaut_R): G_Gv = self.pd.G_Qv[self.pd.Q_qG[kpt.q]] + self.pd.K_qv[kpt.q] for psit_G, Htpsit_G in zip(psit_xG, Htpsit_xG): for v in range(3): a_R = self.pd.ifft(1j * G_Gv[:, v] * psit_G, kpt.q) axpy(-0.5, 1j * G_Gv[:, v] * self.pd.fft(dedtaut_R * a_R, kpt.q), Htpsit_G)
def apply_mgga_orbital_dependent_hamiltonian(self, kpt, psit_xG, Htpsit_xG, dH_asp, dedtaut_R): G_Gv = self.pd.get_reciprocal_vectors(q=kpt.q) for psit_G, Htpsit_G in zip(psit_xG, Htpsit_xG): for v in range(3): a_R = self.pd.ifft(1j * G_Gv[:, v] * psit_G, kpt.q) axpy(-0.5, 1j * G_Gv[:, v] * self.pd.fft(dedtaut_R * a_R, kpt.q), Htpsit_G)
def get_pbe_g(self, n_g, index=None): if index is None: gradn_vg = self.gradn_vg else: gradn_vg = self.calc.density.gd.empty(3) for v in range(3): gradn_vg[v] = (self.gradn_vg[v] + self.gradn_vg[v].flatten()[index]) / 2 kf_g = (3. * np.pi**2 * n_g)**(1 / 3.) s2_g = np.zeros_like(n_g) for v in range(3): axpy(1.0, gradn_vg[v]**2, s2_g) s2_g /= 4 * kf_g**2 * n_g**2 e_g = self.A_x * n_g**(4 / 3.) v_g = (4 / 3.) * e_g / n_g f_g = (1 / 3.) * v_g / n_g kappa = 0.804 mu = 0.2195149727645171 denom_g = (1 + mu * s2_g / kappa) F_g = 1. + kappa - kappa / denom_g Fn_g = -mu / denom_g**2 * 8 * s2_g / (3 * n_g) Fnn_g = -11 * Fn_g / (3 * n_g) - 2 * Fn_g**2 / kappa fxc_g = f_g * F_g fxc_g += 2 * v_g * Fn_g fxc_g += e_g * Fnn_g # Contributions from varying the gradient #Fgrad_vg = np.zeros_like(gradn_vg) #Fngrad_vg = np.zeros_like(gradn_vg) #for v in range(3): # axpy(1.0, mu / den_g**2 * gradn_vg[v] / (2 * kf_g**2 * n_g**2), # Fgrad_vg[v]) # axpy(-8.0, Fgrad_vg[v] / (3 * n_g), Fngrad_vg[v]) # axpy(-2.0, Fgrad_vg[v] * Fn_g / kappa, Fngrad_vg[v]) #tmp = np.zeros_like(fxc_g) #tmp1 = np.zeros_like(fxc_g) #for v in range(3): #self.grad_v[v](Fgrad_vg[v], tmp) #axpy(-2.0, tmp * v_g, fxc_g) #for u in range(3): #self.grad_v[u](Fgrad_vg[u] * tmp, tmp1) #axpy(-4.0/kappa, tmp1 * e_g, fxc_g) #self.grad_v[v](Fngrad_vg[v], tmp) #axpy(-2.0, tmp * e_g, fxc_g) #self.laplace(mu / den_g**2 / (2 * kf_g**2 * n_g**2), tmp) #axpy(1.0, tmp * e_g, fxc_g) return fxc_g
def calculate_sigma(self, n_sg): nspins = len(n_sg) gradn_svg = self.gd.empty((nspins, 3)) sigma_xg = self.gd.zeros(nspins * 2 - 1) for v in range(3): for s in range(nspins): self.grad_v[v](n_sg[s], gradn_svg[s, v]) axpy(1.0, gradn_svg[s, v] ** 2, sigma_xg[2 * s]) if nspins == 2: axpy(1.0, gradn_svg[0, v] * gradn_svg[1, v], sigma_xg[1]) return sigma_xg, gradn_svg
def calculate_sigma(self, n_sg): nspins = len(n_sg) gradn_svg = self.gd.empty((nspins, 3)) sigma_xg = self.gd.zeros(nspins * 2 - 1) for v in range(3): for s in range(nspins): self.grad_v[v](n_sg[s], gradn_svg[s, v]) axpy(1.0, gradn_svg[s, v]**2, sigma_xg[2 * s]) if nspins == 2: axpy(1.0, gradn_svg[0, v] * gradn_svg[1, v], sigma_xg[1]) return sigma_xg, gradn_svg
def calculate_lda(self, e_g, n_sg, v_sg): nspins = len(n_sg) sigma_xg, gradn_svg = self.calculate_sigma(n_sg) dedsigma_xg = self.gd.empty(nspins * 2 - 1) self.calculate_gga(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg) vv_g = sigma_xg[0] for v in range(3): for s in range(nspins): self.grad_v[v](dedsigma_xg[2 * s] * gradn_svg[s, v], vv_g) axpy(-2.0, vv_g, v_sg[s]) if nspins == 2: self.grad_v[v](dedsigma_xg[1] * gradn_svg[s, v], vv_g) axpy(-1.0, vv_g, v_sg[1 - s])
def calculate_kinetic_energy_density(self, tauct, grad_v): assert not hasattr(self.kpt_u[0], 'c_on') if isinstance(self.kpt_u[0].psit_nG, TarFileReference): raise RuntimeError('Wavefunctions have not been initialized.') taut_sG = self.gd.zeros(self.nspins) dpsit_G = self.gd.empty(dtype=self.dtype) for kpt in self.kpt_u: for f, psit_G in zip(kpt.f_n, kpt.psit_nG): for v in range(3): grad_v[v](psit_G, dpsit_G, kpt.phase_cd) axpy(0.5 * f, abs(dpsit_G)**2, taut_sG[kpt.s]) self.kpt_comm.sum(taut_sG) self.band_comm.sum(taut_sG) return taut_sG
def calculate_pseudo_density(self, wfs): """Calculate nt_sG from scratch. nt_sG will be equal to nct_G plus the contribution from wfs.add_to_density(). """ nvspins = wfs.kd.nspins npspins = self.nspins self.nt_sG = self.gd.zeros(npspins) for s in range(npspins): for kpt in wfs.kpt_u: if s == kpt.s or npspins > nvspins: f_n = kpt.f_n / (1. + int(npspins > nvspins)) for f, psit_G in zip((f_n - self.wocc_sn[s] + self.wunocc_sn[s]), kpt.psit_nG): axpy(f, psit_G**2, self.nt_sG[s]) self.nt_sG[:self.nspins] += self.nct_G
def calculate_pseudo_density(self, wfs): """Calculate nt_sG from scratch. nt_sG will be equal to nct_G plus the contribution from wfs.add_to_density(). """ nvspins = wfs.kd.nspins npspins = self.nspins self.nt_sG = self.gd.zeros(npspins) for s in range(npspins): for kpt in wfs.kpt_u: if s == kpt.s or npspins > nvspins: f_n = kpt.f_n / (1. + int(npspins > nvspins)) for f, psit_G in zip((f_n - self.wocc_sn[s] + self.wunocc_sn[s]), kpt.psit_nG): axpy(f, psit_G ** 2, self.nt_sG[s]) self.nt_sG += self.nct_G
def calculate_kinetic_energy_density(self): if self.taugrad_v is None: self.taugrad_v = [ Gradient(self.gd, v, n=3, dtype=self.dtype).apply for v in range(3) ] assert not hasattr(self.kpt_u[0], 'c_on') if not isinstance(self.kpt_u[0].psit_nG, np.ndarray): return None taut_sG = self.gd.zeros(self.nspins) dpsit_G = self.gd.empty(dtype=self.dtype) for kpt in self.kpt_u: for f, psit_G in zip(kpt.f_n, kpt.psit_nG): for v in range(3): self.taugrad_v[v](psit_G, dpsit_G, kpt.phase_cd) axpy(0.5 * f, abs(dpsit_G)**2, taut_sG[kpt.s]) self.kptband_comm.sum(taut_sG) return taut_sG
def add_realspace_orbital_to_density(self, nt_G, psit_G): if psit_G.dtype == float: axpy(1.0, psit_G**2, nt_G) else: assert psit_G.dtype == complex axpy(1.0, psit_G.real**2, nt_G) axpy(1.0, psit_G.imag**2, nt_G)
def add_gradient_correction(grad_v, gradn_svg, sigma_xg, dedsigma_xg, v_sg): """Add gradient correction to potential. :: __ / de(r) __ \ v (r) += -2 \/ . | --------- \/ n(r) | xc \ dsigma(r) / Adds arbitrary data to sigma_xg. Be sure to pass a copy if you need sigma_xg after calling this function. """ nspins = len(v_sg) # vv_g is a calculation buffer. Its contents will be corrupted. vv_g = sigma_xg[0] for v in range(3): for s in range(nspins): grad_v[v](dedsigma_xg[2 * s] * gradn_svg[s, v], vv_g) axpy(-2.0, vv_g, v_sg[s]) if nspins == 2: grad_v[v](dedsigma_xg[1] * gradn_svg[s, v], vv_g) axpy(-1.0, vv_g, v_sg[1 - s])
def calculate_kinetic_energy_density(self): if self.taugrad_v is None: self.taugrad_v = [ Gradient(self.gd, v, n=3, dtype=self.dtype).apply for v in range(3)] assert not hasattr(self.kpt_u[0], 'c_on') if self.kpt_u[0].psit_nG is None: raise RuntimeError('No wavefunctions yet') if isinstance(self.kpt_u[0].psit_nG, FileReference): # XXX initialize raise RuntimeError('Wavefunctions have not been initialized.') taut_sG = self.gd.zeros(self.nspins) dpsit_G = self.gd.empty(dtype=self.dtype) for kpt in self.kpt_u: for f, psit_G in zip(kpt.f_n, kpt.psit_nG): for v in range(3): self.taugrad_v[v](psit_G, dpsit_G, kpt.phase_cd) axpy(0.5 * f, abs(dpsit_G)**2, taut_sG[kpt.s]) self.kpt_comm.sum(taut_sG) self.band_comm.sum(taut_sG) return taut_sG
def __call__(self, residuals, kpt, ekin=None, out=None): if residuals.ndim == 3: if out is None: return self.__call__(residuals[np.newaxis], kpt)[0] return self.__call__(residuals[np.newaxis], kpt, out=out[np.newaxis])[0] nb = len(residuals) # number of bands phases = kpt.phase_cd step = self.step if out is None: d0, q0 = self.scratch0[:, :nb] else: d0 = out q0 = self.scratch0[0, :nb] r1, d1, q1 = self.scratch1[:, :nb] r2, d2, q2 = self.scratch2[:, :nb] self.restrictor0(-residuals, r1, phases) d1[:] = 4 * step * r1 self.kin1.apply(d1, q1, phases) q1 -= r1 self.restrictor1(q1, r2, phases) d2 = 16 * step * r2 self.kin2.apply(d2, q2, phases) q2 -= r2 d2 -= 16 * step * q2 self.interpolator2(d2, q1, phases) d1 -= q1 self.kin1.apply(d1, q1, phases) q1 -= r1 d1 -= 4 * step * q1 self.interpolator1(-d1, d0, phases) self.kin0.apply(d0, q0, phases) q0 -= residuals axpy(-step, q0, d0) # d0 -= step * q0 d0 *= -1.0 return d0
def calculate_lda(self, e_g, n_sg, v_sg): nspins = len(n_sg) gradn_svg = self.gd.empty((nspins, 3)) sigma_xg = self.gd.zeros(nspins * 2 - 1) dedsigma_xg = self.gd.empty(nspins * 2 - 1) for v in range(3): for s in range(nspins): self.grad_v[v](n_sg[s], gradn_svg[s, v]) axpy(1.0, gradn_svg[s, v]**2, sigma_xg[2 * s]) if nspins == 2: axpy(1.0, gradn_svg[0, v] * gradn_svg[1, v], sigma_xg[1]) self.calculate_gga(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg) vv_g = sigma_xg[0] for v in range(3): for s in range(nspins): self.grad_v[v](dedsigma_xg[2 * s] * gradn_svg[s, v], vv_g) axpy(-2.0, vv_g, v_sg[s]) if nspins == 2: self.grad_v[v](dedsigma_xg[1] * gradn_svg[s, v], vv_g) axpy(-1.0, vv_g, v_sg[1 - s])
def add_to_density_from_k_point_with_occupation(self, nt_sG, kpt, f_n): # Used in calculation of response part of GLLB-potential nt_G = nt_sG[kpt.s] if self.dtype == float: for f, psit_G in zip(f_n, kpt.psit_nG): axpy(f, psit_G**2, nt_G) else: for f, psit_G in zip(f_n, kpt.psit_nG): axpy(f, psit_G.real**2, nt_G) axpy(f, psit_G.imag**2, nt_G) # Hack used in delta-scf calculations: if hasattr(kpt, 'c_on'): assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=complex) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) for d_n, psi0_G in zip(d_nn, kpt.psit_nG): for d, psi_G in zip(d_n, kpt.psit_nG): if abs(d) > 1.e-12: nt_G += (psi0_G.conj() * d * psi_G).real
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do a conjugate gradient iterations for the kpoint""" niter = self.niter phi_G = self.phi_G phi_old_G = self.phi_old_G self.subspace_diagonalize(hamiltonian, wfs, kpt) R_nG = wfs.matrixoperator.suggest_temporary_buffer() Htphi_G = R_nG[0] R_nG[:] = self.Htpsit_nG self.timer.start('Residuals') self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Residuals') self.timer.start('CG') vt_G = hamiltonian.vt_sG[kpt.s] total_error = 0.0 for n in range(self.nbands): R_G = R_nG[n] Htpsit_G = self.Htpsit_nG[n] gamma_old = 1.0 phi_old_G[:] = 0.0 error = self.gd.comm.sum(np.vdot(R_G, R_G).real) for nit in range(niter): if error < self.tolerance / self.nbands: # print >> self.f, "cg:iters", n, nit break pR_G = self.preconditioner(R_G, kpt) # New search direction gamma = self.gd.comm.sum(np.vdot(pR_G, R_G).real) phi_G[:] = -pR_G - gamma/gamma_old * phi_old_G gamma_old = gamma phi_old_G[:] = phi_G[:] # Calculate projections P2_ai = wfs.pt.dict() wfs.pt.integrate(phi_G, P2_ai, kpt.q) # Orthonormalize phi_G to all bands self.timer.start('CG: orthonormalize') for nn in range(self.nbands): overlap = np.vdot(kpt.psit_nG[nn], phi_G) * self.gd.dv for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][nn] dO_ii = wfs.setups[a].dO_ii overlap += np.vdot(P_i, np.inner(dO_ii, P2_i)) overlap = self.gd.comm.sum(overlap) # phi_G -= overlap * kpt.psit_nG[nn] axpy(-overlap, kpt.psit_nG[nn], phi_G) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][nn] P2_i -= P_i * overlap norm = np.vdot(phi_G, phi_G) * self.gd.dv for a, P2_i in P2_ai.items(): dO_ii = wfs.setups[a].dO_ii norm += np.vdot(P2_i, np.inner(dO_ii, P2_i)) norm = self.gd.comm.sum(norm.real) phi_G /= sqrt(norm) for P2_i in P2_ai.values(): P2_i /= sqrt(norm) self.timer.stop('CG: orthonormalize') #find optimum linear combination of psit_G and phi_G an = kpt.eps_n[n] wfs.kin.apply(phi_G, Htphi_G, kpt.phase_cd) Htphi_G += phi_G * vt_G b = np.vdot(phi_G, Htpsit_G) * self.gd.dv c = np.vdot(phi_G, Htphi_G) * self.gd.dv for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) b += dot(P2_i, dot(dH_ii, P_i.conj())) c += dot(P2_i, dot(dH_ii, P2_i.conj())) b = self.gd.comm.sum(b.real) c = self.gd.comm.sum(c.real) theta = 0.5 * atan2(2 * b, an - c) enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) # theta can correspond either minimum or maximum if ( enew - kpt.eps_n[n] ) > 0.0: #we were at maximum theta += pi / 2.0 enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) kpt.eps_n[n] = enew kpt.psit_nG[n] *= cos(theta) # kpt.psit_nG[n] += sin(theta) * phi_G axpy(sin(theta), phi_G, kpt.psit_nG[n]) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] P_i *= cos(theta) P_i += sin(theta) * P2_i if nit < niter - 1: Htpsit_G *= cos(theta) # Htpsit_G += sin(theta) * Htphi_G axpy(sin(theta), Htphi_G, Htpsit_G) #adjust residuals R_G[:] = Htpsit_G - kpt.eps_n[n] * kpt.psit_nG[n] coef_ai = wfs.pt.dict() for a, coef_i in coef_ai.items(): P_i = kpt.P_ani[a][n] dO_ii = wfs.setups[a].dO_ii dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) coef_i[:] = (dot(P_i, dH_ii) - dot(P_i * kpt.eps_n[n], dO_ii)) wfs.pt.add(R_G, coef_ai, kpt.q) error_new = self.gd.comm.sum(np.vdot(R_G, R_G).real) if error_new / error < 0.30: # print >> self.f, "cg:iters", n, nit+1 break if (self.nbands_converge == 'occupied' and kpt.f_n is not None and kpt.f_n[n] == 0.0): # print >> self.f, "cg:iters", n, nit+1 break error = error_new if kpt.f_n is None: weight = 1.0 else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': weight = kpt.weight * float(n < self.nbands_converge) total_error += weight * error # if nit == 3: # print >> self.f, "cg:iters", n, nit+1 self.timer.stop('CG') return total_error
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype = complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if (self.f_skn[spin][ibzkpt1, n] - self.ftol < 0): continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g,k,index1_g,phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(' ', k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1,1,1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1/Hartree: rho_G[0] /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0. rhoG0_v[:] = 0. if k_pad: rho_G[:] = 0. if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0,:] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0,:] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = ( 1. / (w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta) - 1. / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) ) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1,n] - f_skn[spin][ibzkpt2,m] w0 = e_skn[spin][ibzkpt2,m] - e_skn[spin][ibzkpt1,n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0/self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local] ) if self.wScomm.rank == (w0_id+1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id+1) % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id+1) % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id+1) % self.NwS_local] ) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt('Finished n 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime) ) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt('Finished n %d in %d seconds, estimate %d seconds left.'%(n, dt, totaltime-dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt('Finished k 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt('Finished k %d in %d seconds, estimate %d seconds left. '%(k, dt, totaltime - dt) ) self.printtxt('Finished summation over k') self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0,1:,0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2. assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform(specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform(specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.chi0G0_wGv = hilbert_transform(specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt('') self.printtxt('Finished chi0 !')
nt_G = calc.density.gd.zeros() bfs = wfs.basis_functions nao = wfs.setups.nao f_n = kpt.f_n rho_MM = np.zeros((nao, nao)) wfs.calculate_density_matrix(kpt.f_n, kpt.C_nM, rho_MM) bfs.construct_density(rho_MM, nt_G, -1) nbands = wfs.bd.nbands psit_nG = wfs.gd.zeros(nbands) bfs.lcao_to_grid(kpt.C_nM, psit_nG, -1) nt2_G = calc.density.gd.zeros() for f, psit_G in zip(f_n, psit_nG): axpy(f, psit_G**2, nt2_G) identity_MM = np.identity(nao) Phit_MG = calc.wfs.gd.zeros(nao) bfs.lcao_to_grid(identity_MM, Phit_MG, -1) nt3_G = calc.density.gd.zeros() for M1, Phit1_G in enumerate(Phit_MG): for M2, Phit2_G in enumerate(Phit_MG): nt3_G += rho_MM[M1, M2] * Phit1_G * Phit2_G err1_G = nt2_G - nt_G err2_G = nt3_G - nt_G maxerr1 = np.abs(err1_G).max() maxerr2 = np.abs(err2_G).max()
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do a single RMM-DIIS iteration for the kpoint""" psit_nG, R_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) self.timer.start('RMM-DIIS') self.timer.start('Calculate residuals') if self.keep_htpsit: self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Calculate residuals') def integrate(a_G, b_G): return np.real(wfs.integrate(a_G, b_G, global_integral=False)) comm = wfs.gd.comm B = self.blocksize dR_xG = wfs.empty(B, q=kpt.q) P_axi = wfs.pt.dict(B) errors_x = np.zeros(B) state_done = np.zeros(B, dtype=bool) errors_n = np.zeros(wfs.bd.mynbands) # Arrays needed for DIIS step if self.niter > 1: psit_diis_nxG = wfs.empty(B * self.niter, q=kpt.q) R_diis_nxG = wfs.empty(B * self.niter, q=kpt.q) # P_diis_anxi = wfs.pt.dict(B * self.niter) eig_n = np.zeros(self.niter) # eigenvalues for diagonalization # not needed in any step error = 0.0 for n1 in range(0, wfs.bd.mynbands, B): state_done[:] = False n2 = n1 + B if n2 > wfs.bd.mynbands: n2 = wfs.bd.mynbands B = n2 - n1 P_axi = dict((a, P_xi[:B]) for a, P_xi in P_axi.items()) dR_xG = dR_xG[:B] n_x = range(n1, n2) psit_xG = psit_nG[n1:n2] self.timer.start('Calculate residuals') if self.keep_htpsit: R_xG = R_nG[n1:n2] else: R_xG = wfs.empty(B, q=kpt.q) wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_xG, R_xG) wfs.pt.integrate(psit_xG, P_axi, kpt.q) self.calculate_residuals(kpt, wfs, hamiltonian, psit_xG, P_axi, kpt.eps_n[n_x], R_xG, n_x) self.timer.stop('Calculate residuals') errors_x[:] = 0.0 for n in range(n1, n2): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': if wfs.bd.global_index(n) < self.nbands_converge: weight = kpt.weight else: weight = 0.0 errors_x[n - n1] = weight * integrate(R_xG[n - n1], R_xG[n - n1]) errors_n[n] = errors_x[n - n1] comm.sum(errors_x) error += np.sum(errors_x) # Insert first vectors and residuals for DIIS step if self.niter > 1: # Save the previous vectors contiguously for each band # in the block psit_diis_nxG[:B * self.niter:self.niter] = psit_xG R_diis_nxG[:B * self.niter:self.niter] = R_xG # Precondition the residual: self.timer.start('precondition') # ekin_x = self.preconditioner.calculate_kinetic_energy( # R_xG, kpt) ekin_x = self.preconditioner.calculate_kinetic_energy( psit_xG, kpt) dpsit_xG = self.preconditioner(R_xG, kpt, ekin_x) self.timer.stop('precondition') # Calculate the residual of dpsit_G, dR_G = (H - e S) dpsit_G: # self.timer.start('Apply Hamiltonian') wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, dpsit_xG, dR_xG) # self.timer.stop('Apply Hamiltonian') self.timer.start('projections') wfs.pt.integrate(dpsit_xG, P_axi, kpt.q) self.timer.stop('projections') if self.use_rayleigh: self.timer.start('Minimize Rayleigh') i1 = wfs.integrate(psit_xG, dR_xG, global_integral=False).item() i2 = wfs.integrate(dpsit_xG, dR_xG, global_integral=False).item() i3 = wfs.integrate(dpsit_xG, psit_xG, global_integral=False).item() i4 = wfs.integrate(dpsit_xG, dpsit_xG, global_integral=False).item() for a, dP_xi in P_axi.items(): P_i = kpt.P_ani[a][n1] dP_i = dP_xi[0] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) dO_ii = wfs.setups[a].dO_ii i1 += np.dot(P_i, np.dot(dH_ii, dP_i.conj())).item() i2 += np.dot(dP_i, np.dot(dH_ii, dP_i.conj())).item() i3 += np.dot(dP_i, np.dot(dO_ii, P_i.conj())).item() i4 += np.dot(dP_i, np.dot(dO_ii, dP_i.conj())).item() i1 = comm.sum(i1) i2 = comm.sum(i2) i3 = comm.sum(i3) i4 = comm.sum(i4) a = np.real(i2 * i3 - i1 * i4) b = np.real(i2 - kpt.eps_n[n1] * i4) c = np.real(i1 - kpt.eps_n[n1] * i3) # print "A,B,C", a,b,c lam_x = np.array((-2.0 * c / (b + np.sqrt(b**2 - 4.0 * a * c)),)) self.timer.stop('Minimize Rayleigh') self.timer.start('Calculate residuals') self.calculate_residuals(kpt, wfs, hamiltonian, dpsit_xG, P_axi, kpt.eps_n[n_x], dR_xG, n_x, calculate_change=True) self.timer.stop('Calculate residuals') else: self.timer.start('Calculate residuals') self.calculate_residuals(kpt, wfs, hamiltonian, dpsit_xG, P_axi, kpt.eps_n[n_x], dR_xG, n_x, calculate_change=True) self.timer.stop('Calculate residuals') # Find lam that minimizes the norm of R'_G = R_G + lam dR_G self.timer.start('Find lambda') RdR_x = np.array([integrate(dR_G, R_G) for R_G, dR_G in zip(R_xG, dR_xG)]) dRdR_x = np.array([integrate(dR_G, dR_G) for dR_G in dR_xG]) comm.sum(RdR_x) comm.sum(dRdR_x) lam_x = -RdR_x / dRdR_x self.timer.stop('Find lambda') # print "Lam_x:", lam_x # Limit abs(lam) to [0.15, 1.0] if self.limit_lambda: upper = self.limit_lambda['upper'] lower = self.limit_lambda['lower'] if self.limit_lambda.get('absolute', False): lam_x = np.where(np.abs(lam_x) < lower, lower * np.sign(lam_x), lam_x) lam_x = np.where(np.abs(lam_x) > upper, upper * np.sign(lam_x), lam_x) else: lam_x = np.where(lam_x < lower, lower, lam_x) lam_x = np.where(lam_x > upper, upper, lam_x) # lam_x[:] = 0.1 # New trial wavefunction and residual self.timer.start('Update psi') for lam, psit_G, dpsit_G, R_G, dR_G in zip(lam_x, psit_xG, dpsit_xG, R_xG, dR_xG): axpy(lam, dpsit_G, psit_G) # psit_G += lam * dpsit_G axpy(lam, dR_G, R_G) # R_G += lam** dR_G self.timer.stop('Update psi') self.timer.start('DIIS step') # DIIS step for nit in range(1, self.niter): # Do not perform DIIS if error is small # if abs(error_block / B) < self.rtol: # break # Update the subspace psit_diis_nxG[nit:B * self.niter:self.niter] = psit_xG R_diis_nxG[nit:B * self.niter:self.niter] = R_xG # XXX Only integrals of nit old psits would be needed # self.timer.start('projections') # wfs.pt.integrate(psit_diis_nxG, P_diis_anxi, kpt.q) # self.timer.stop('projections') if nit > 1 or self.limit_lambda: for ib in range(B): if state_done[ib]: continue istart = ib * self.niter iend = istart + nit + 1 # Residual matrix self.timer.start('Construct matrix') R_nn = wfs.integrate(R_diis_nxG[istart:iend], R_diis_nxG[istart:iend], global_integral=True) # Full matrix A_nn = -np.ones((nit + 2, nit + 2), wfs.dtype) A_nn[:nit+1, :nit+1] = R_nn[:] A_nn[-1,-1] = 0.0 x_n = np.zeros(nit + 2, wfs.dtype) x_n[-1] = -1.0 self.timer.stop('Construct matrix') self.timer.start('Linear solve') alpha_i = np.linalg.solve(A_nn, x_n)[:-1] self.timer.stop('Linear solve') self.timer.start('Update trial vectors') psit_xG[ib] = alpha_i[nit] * psit_diis_nxG[istart + nit] R_xG[ib] = alpha_i[nit] * R_diis_nxG[istart + nit] for i in range(nit): # axpy(alpha_i[i], psit_diis_nxG[istart + i], # psit_diis_nxG[istart + nit]) # axpy(alpha_i[i], R_diis_nxG[istart + i], # R_diis_nxG[istart + nit]) axpy(alpha_i[i], psit_diis_nxG[istart + i], psit_xG[ib]) axpy(alpha_i[i], R_diis_nxG[istart + i], R_xG[ib]) self.timer.stop('Update trial vectors') if nit < self.niter - 1: self.timer.start('precondition') # ekin_x = self.preconditioner.calculate_kinetic_energy( # R_xG, kpt) dpsit_xG = self.preconditioner(R_xG, kpt, ekin_x) self.timer.stop('precondition') for psit_G, lam, dpsit_G in zip(psit_xG, lam_x, dpsit_xG): axpy(lam, dpsit_G, psit_G) # Calculate the new residuals self.timer.start('Calculate residuals') wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_xG, R_xG) wfs.pt.integrate(psit_xG, P_axi, kpt.q) self.calculate_residuals(kpt, wfs, hamiltonian, psit_xG, P_axi, kpt.eps_n[n_x], R_xG, n_x, calculate_change=True) self.timer.stop('Calculate residuals') self.timer.start('Calculate errors') errors_new_x = np.zeros(B) # errors_x[:] = 0.0 for n in range(n1, n2): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': if wfs.bd.global_index(n) < self.nbands_converge: weight = kpt.weight else: weight = 0.0 errors_new_x[n-n1] += weight * integrate(R_xG[n - n1], R_xG[n - n1]) comm.sum(errors_x) self.timer.stop('Calculate errors') self.timer.stop('DIIS step') # Final trial step self.timer.start('precondition') # ekin_x = self.preconditioner.calculate_kinetic_energy( # R_xG, kpt) dpsit_xG = self.preconditioner(R_xG, kpt, ekin_x) self.timer.stop('precondition') self.timer.start('Update psi') if self.trial_step is not None: lam_x[:] = self.trial_step for lam, psit_G, dpsit_G in zip(lam_x, psit_xG, dpsit_xG): axpy(lam, dpsit_G, psit_G) # psit_G += lam * dpsit_G self.timer.stop('Update psi') # norm = wfs.integrate(psit_xG[0], psit_xG[0]) # wfs.pt.integrate(psit_xG, P_axi, kpt.q) # for a, P_xi in P_axi.items(): # dO_ii = wfs.setups[a].dO_ii # norm += np.vdot(P_xi[0], np.inner(dO_ii, P_xi[0])) # norm = comm.sum(np.real(norm).item()) # psit_xG /= np.sqrt(norm) self.timer.stop('RMM-DIIS') return error, psit_nG
def solve(self, phi, rho, charge=None, eps=None, maxcharge=1e-6, zero_initial_phi=False): if eps is None: eps = self.eps actual_charge = self.gd.integrate(rho) background = (actual_charge / self.gd.dv / self.gd.get_size_of_global_array().prod()) if charge is None: charge = actual_charge if abs(charge) <= maxcharge: # System is charge neutral. Use standard solver return self.solve_neutral(phi, rho - background, eps=eps) elif abs(charge) > maxcharge and self.gd.pbc_c.all(): # System is charged and periodic. Subtract a homogeneous # background charge if self.charged_periodic_correction is None: print "+-----------------------------------------------------+" print "| Calculating charged periodic correction using the |" print "| Ewald potential from a lattice of probe charges in |" print "| a homogenous background density |" print "+-----------------------------------------------------+" self.charged_periodic_correction = madelung(self.gd.cell_cv) print "Potential shift will be ", \ self.charged_periodic_correction , "Ha." # Set initial guess for potential if zero_initial_phi: phi[:] = 0.0 else: phi -= charge * self.charged_periodic_correction iters = self.solve_neutral(phi, rho - background, eps=eps) phi += charge * self.charged_periodic_correction return iters elif abs(charge) > maxcharge and not self.gd.pbc_c.any(): # The system is charged and in a non-periodic unit cell. # Determine the potential by 1) subtract a gaussian from the # density, 2) determine potential from the neutralized density # and 3) add the potential from the gaussian density. # Load necessary attributes self.load_gauss() # Remove monopole moment q = actual_charge / np.sqrt(4 * pi) # Monopole moment rho_neutral = rho - q * self.rho_gauss # neutralized density # Set initial guess for potential if zero_initial_phi: phi[:] = 0.0 else: axpy(-q, self.phi_gauss, phi) #phi -= q * self.phi_gauss # Determine potential from neutral density using standard solver niter = self.solve_neutral(phi, rho_neutral, eps=eps) # correct error introduced by removing monopole axpy(q, self.phi_gauss, phi) #phi += q * self.phi_gauss return niter else: # System is charged with mixed boundaryconditions raise NotImplementedError
def solve(self, phi, rho, charge=None, eps=None, maxcharge=1e-6, zero_initial_phi=False): assert np.all(phi.shape == self.gd.n_c) assert np.all(rho.shape == self.gd.n_c) if eps is None: eps = self.eps actual_charge = self.gd.integrate(rho) background = (actual_charge / self.gd.dv / self.gd.get_size_of_global_array().prod()) if self.remove_moment: assert not self.gd.pbc_c.any() if not hasattr(self, 'gauss'): self.gauss = Gaussian(self.gd) rho_neutral = rho.copy() phi_cor_L = [] for L in range(self.remove_moment): phi_cor_L.append(self.gauss.remove_moment(rho_neutral, L)) # Remove multipoles for better initial guess for phi_cor in phi_cor_L: phi -= phi_cor niter = self.solve_neutral(phi, rho_neutral, eps=eps) # correct error introduced by removing multipoles for phi_cor in phi_cor_L: phi += phi_cor return niter if charge is None: charge = actual_charge if abs(charge) <= maxcharge: # System is charge neutral. Use standard solver return self.solve_neutral(phi, rho - background, eps=eps) elif abs(charge) > maxcharge and self.gd.pbc_c.all(): # System is charged and periodic. Subtract a homogeneous # background charge # Set initial guess for potential if zero_initial_phi: phi[:] = 0.0 iters = self.solve_neutral(phi, rho - background, eps=eps) return iters elif abs(charge) > maxcharge and not self.gd.pbc_c.any(): # The system is charged and in a non-periodic unit cell. # Determine the potential by 1) subtract a gaussian from the # density, 2) determine potential from the neutralized density # and 3) add the potential from the gaussian density. # Load necessary attributes # use_charge_center: The monopole will be removed at the # center of the majority charge, which prevents artificial # dipoles. # Due to the shape of the Gaussian and it's Fourier-Transform, # the Gaussian representing the charge should stay at least # 7 gpts from the borders - see: # https://listserv.fysik.dtu.dk/pipermail/gpaw-developers/2015-July/005806.html if self.use_charge_center: charge_sign = actual_charge / abs(actual_charge) rho_sign = rho * charge_sign rho_sign[np.where(rho_sign < 0)] = 0 absolute_charge = self.gd.integrate(rho_sign) center = - self.gd.calculate_dipole_moment(rho_sign) \ / absolute_charge border_offset = np.inner(self.gd.h_cv, np.array((7, 7, 7))) borders = np.inner(self.gd.h_cv, self.gd.N_c) borders -= border_offset if np.any(center > borders) or np.any(center < border_offset): raise RuntimeError( 'Poisson solver: center of charge outside' + \ ' borders - please increase box') center[np.where(center > borders)] = borders self.load_gauss(center=center) else: self.load_gauss() # Remove monopole moment q = actual_charge / np.sqrt(4 * pi) # Monopole moment rho_neutral = rho - q * self.rho_gauss # neutralized density # Set initial guess for potential if zero_initial_phi: phi[:] = 0.0 else: axpy(-q, self.phi_gauss, phi) # phi -= q * self.phi_gauss # Determine potential from neutral density using standard solver niter = self.solve_neutral(phi, rho_neutral, eps=eps) # correct error introduced by removing monopole axpy(q, self.phi_gauss, phi) # phi += q * self.phi_gauss return niter else: # System is charged with mixed boundaryconditions msg = ('Charged systems with mixed periodic/zero' ' boundary conditions') raise NotImplementedError(msg)
nt_G = calc.density.gd.zeros() bfs = wfs.basis_functions nao = wfs.setups.nao f_n = kpt.f_n rho_MM = np.zeros((nao, nao)) wfs.calculate_density_matrix(kpt.f_n, kpt.C_nM, rho_MM) bfs.construct_density(rho_MM, nt_G, -1) nbands = wfs.bd.nbands psit_nG = wfs.gd.zeros(nbands) bfs.lcao_to_grid(kpt.C_nM, psit_nG, -1) nt2_G = calc.density.gd.zeros() for f, psit_G in zip(f_n, psit_nG): axpy(f, psit_G**2, nt2_G) identity_MM = np.identity(nao) Phit_MG = calc.wfs.gd.zeros(nao) bfs.lcao_to_grid(identity_MM, Phit_MG, -1) nt3_G = calc.density.gd.zeros() for M1, Phit1_G in enumerate(Phit_MG): for M2, Phit2_G in enumerate(Phit_MG): nt3_G += rho_MM[M1, M2] * Phit1_G * Phit2_G err1_G = nt2_G - nt_G err2_G = nt3_G - nt_G maxerr1 = np.abs(err1_G).max()
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype=complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if self.f_skn[spin][ibzkpt1, n] - self.ftol < 0: continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g, k, index1_g, phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(" ", k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1, 1, 1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if ( np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1 / Hartree ): rho_G[0] /= ( self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] ) rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0.0 rhoG0_v[:] = 0.0 if k_pad: rho_G[:] = 0.0 if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0, :] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0, :] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = 1.0 / ( w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta ) - 1.0 / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m] w0 = e_skn[spin][ibzkpt2, m] - e_skn[spin][ibzkpt1, n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0 / self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local]) if self.wScomm.rank == (w0_id + 1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id + 1) % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id + 1) % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id + 1) % self.NwS_local]) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt("Finished n 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt( "Finished n %d in %d seconds, estimate %d seconds left." % (n, dt, totaltime - dt) ) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt("Finished k 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt( "Finished k %d in %d seconds, estimate %d seconds left. " % (k, dt, totaltime - dt) ) self.printtxt("Finished summation over k") self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0, 1:, 0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2.0 assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform( specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.printtxt("Finished hilbert transform !") del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw ** 2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw ** 2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt("Finished Slice Along Frequency !") chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[ : self.Nw ] self.printtxt("Finished hilbert transform !") self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt("Finished Slice along orbitals !") self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform( specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.chi0G0_wGv = hilbert_transform( specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt("") self.printtxt("Finished chi0 !")
# Check gemm for transa='n' a2 = np.arange(7 * 5 * 1 * 3).reshape(7, 5, 1, 3) * (-1. + 4.j) + 3. c = np.tensordot(a, a2, [1, 0]) gemm(1., a2, a, -1., c, 'n') assert not c.any() # Check gemm for transa='c' a = np.arange(4 * 5 * 1 * 3).reshape(4, 5, 1, 3) * (3. - 2.j) + 4. c = np.tensordot(a, a2.conj(), [[1, 2, 3], [1, 2, 3]]) gemm(1., a2, a, -1., c, 'c') assert not c.any() # Check axpy c = 5.j * a axpy(-5.j, a, c) assert not c.any() # Check rk c = np.tensordot(a, a.conj(), [[1, 2, 3], [1, 2, 3]]) rk(1., a, -1., c) tri2full(c) assert not c.any() # Check gemmdot for transa='c' c = np.tensordot(a, a2.conj(), [-1, -1]) gemmdot(a, a2, beta=-1., out=c, trans='c') assert not c.any() # Check gemmdot for transa='n' a2.shape = 3, 7, 5, 1
def calculate(self, spin=0): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = self.ibzk_kc bzk_kc = self.bzk_kc kq_k = self.kq_k pt = self.pt f_kn = self.f_kn e_kn = self.e_kn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if not (f_kn > self.ftol).any(): self.chi0_wGG = chi0_wGG return if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype = complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rho_G = np.zeros(self.npw, dtype=complex) t0 = time() t_get_wfs = 0 for k in range(self.kstart, self.kend): # Find corresponding kpoint in IBZ ibzkpt1 = kd.kibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.kibz_k[kq_k[k]] for n in range(self.nstart, self.nend): # print >> self.txt, k, n, t_get_wfs, time() - t0 t1 = time() psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) t_get_wfs += time() - t1 psit1new_g = kd.transform_wave_function(psitold_g, k) P1_ai = pt.dict() pt.integrate(psit1new_g, P1_ai, k) psit1_g = psit1new_g.conj() * self.expqr_g for m in range(self.nbands): if self.hilbert_trans: check_focc = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol else: check_focc = np.abs(f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol t1 = time() psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) t_get_wfs += time() - t1 if check_focc: psit2_g = kd.transform_wave_function(psitold_g, kq_k[k]) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq_k[k]) # fft tmp_g = np.fft.fftn(psit2_g*psit1_g) * self.vol / self.nG0 for iG in range(self.npw): index = self.Gindex_G[iG] rho_G[iG] = tmp_g[index[0], index[1], index[2]] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) # PAW correction for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: rho_G[0] /= e_kn[ibzkpt2, m] - e_kn[ibzkpt1, n] rho_GG = np.outer(rho_G, rho_G.conj()) if not self.hilbert_trans: for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree C = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) / ( w + e_kn[ibzkpt1, n] - e_kn[ibzkpt2, m] + 1j * self.eta) axpy(C, rho_GG, chi0_wGG[iw]) else: focc = f_kn[ibzkpt1,n] - f_kn[ibzkpt2,m] w0 = e_kn[ibzkpt2,m] - e_kn[ibzkpt1,n] scal(focc, rho_GG) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0/self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local] ) if self.wScomm.rank == (w0_id+1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id+1) % self.NwS_local] ) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.nkpt == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nband_local self.printtxt('Finished n 0 in %f seconds, estimated %f seconds left.' %(dt, totaltime) ) if rank == 0 and self.nband_local // 5 > 0: if n > 0 and n % (self.nband_local // 5) == 0: dt = time() - t0 self.printtxt('Finished n %d in %f seconds, estimated %f seconds left.'%(n, dt, totaltime-dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local self.printtxt('Finished k 0 in %f seconds, estimated %f seconds left.' %(dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt('Finished k %d in %f seconds, estimated %f seconds left. '%(k, dt, totaltime - dt) ) self.printtxt('Finished summation over k') self.kcomm.barrier() del rho_GG, rho_G # Hilbert Transform if not self.hilbert_trans: self.kcomm.sum(chi0_wGG) else: self.kcomm.sum(specfunc_wGG) if self.wScomm.size == 1: if not self.full_hilbert_trans: chi0_wGG = hilbert_transform(specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] else: chi0_wGG = full_hilbert_transform(specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') if not self.full_hilbert_trans: chi0_Wg = hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] else: chi0_Wg = full_hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg self.chi0_wGG = chi0_wGG / self.vol self.printtxt('') self.printtxt('Finished chi0 !') return
def mix(self, nt_G, D_ap): if self.step > 2: del self.d_nt_G[0] for d_Dp in self.d_D_ap: del d_Dp[0] if self.step > 0: self.d_nt_G.append(nt_G - self.nt_iG[-1]) for d_Dp, D_p, D_ip in zip(self.d_D_ap, D_ap, self.D_iap): d_Dp.append(D_p - D_ip[-1]) fmin_G = self.gd.integrate(self.d_nt_G[-1] * self.d_nt_G[-1]) self.dNt = self.gd.integrate(np.fabs(self.d_nt_G[-1])) if self.verbose: print 'Mixer: broydn: fmin_G = %f fmin_D = %f'% fmin_G if self.step == 0: self.eta_G = np.empty(nt_G.shape) self.eta_D = [] for D_p in D_ap: self.eta_D.append(0) self.u_D.append([]) self.D_iap.append([]) self.d_D_ap.append([]) else: if self.step >= 2: del self.c_G[:] if len(self.v_G) >= self.nmaxold: del self.u_G[0] del self.v_G[0] for u_D in self.u_D: del u_D[0] temp_nt_G = self.d_nt_G[1] - self.d_nt_G[0] self.v_G.append(temp_nt_G / self.gd.integrate(temp_nt_G * temp_nt_G)) if len(self.v_G) < self.nmaxold: nstep = self.step - 1 else: nstep = self.nmaxold for i in range(nstep): self.c_G.append(self.gd.integrate(self.v_G[i] * self.d_nt_G[1])) self.u_G.append(self.beta * temp_nt_G + self.nt_iG[1] - self.nt_iG[0]) for d_Dp, u_D, D_ip in zip(self.d_D_ap, self.u_D, self.D_iap): temp_D_ap = d_Dp[1] - d_Dp[0] u_D.append(self.beta * temp_D_ap + D_ip[1] - D_ip[0]) usize = len(self.u_G) for i in range(usize - 1): a_G = self.gd.integrate(self.v_G[i] * temp_nt_G) axpy(-a_G, self.u_G[i], self.u_G[usize - 1]) for u_D in self.u_D: axpy(-a_G, u_D[i], u_D[usize - 1]) self.eta_G = self.beta * self.d_nt_G[-1] for i, d_Dp in enumerate(self.d_D_ap): self.eta_D[i] = self.beta * d_Dp[-1] usize = len(self.u_G) for i in range(usize): axpy(-self.c_G[i], self.u_G[i], self.eta_G) for eta_D, u_D in zip(self.eta_D, self.u_D): axpy(-self.c_G[i], u_D[i], eta_D) axpy(-1.0, self.d_nt_G[-1], nt_G) axpy(1.0, self.eta_G, nt_G) for D_p, d_Dp, eta_D in zip(D_ap, self.d_D_ap, self.eta_D): axpy(-1.0, d_Dp[-1], D_p) axpy(1.0, eta_D, D_p) if self.step >= 2: del self.nt_iG[0] for D_ip in self.D_iap: del D_ip[0] self.nt_iG.append(np.copy(nt_G)) for D_ip, D_p in zip(self.D_iap, D_ap): D_ip.append(np.copy(D_p)) self.step += 1
def mix(self, nt_G, D_ap, phase_cd=None): iold = len(self.nt_iG) if iold > 0: if iold > self.nmaxold: # Throw away too old stuff: del self.nt_iG[0] del self.R_iG[0] del self.D_iap[0] del self.dD_iap[0] # for D_p, D_ip, dD_ip in self.D_a: # del D_ip[0] # del dD_ip[0] iold = self.nmaxold # Calculate new residual (difference between input and output) R_G = nt_G - self.nt_iG[-1] # Use np.absolute instead of np.fabs self.dNt = self.gd.integrate(np.absolute(R_G)) self.R_iG.append(R_G) self.dD_iap.append([]) for D_p, D_ip in zip(D_ap, self.D_iap[-1]): self.dD_iap[-1].append(D_p - D_ip) # Update matrix: A_ii = np.zeros((iold, iold)) i1 = 0 i2 = iold - 1 if self.metric is None: mR_G = R_G else: mR_G = self.mR_G self.metric(R_G, mR_G, phase_cd=phase_cd) for R_1G in self.R_iG: # Inner product between new and old residues # XXX For now, use only real part of residues # For complex quantities a .conjugate should be added ?? a = self.gd.comm.sum(np.vdot(R_1G.real, mR_G.real)) if self.dtype == complex: a += self.gd.comm.sum(np.vdot(R_1G.imag, mR_G.imag)) A_ii[i1, i2] = a A_ii[i2, i1] = a i1 += 1 A_ii[:i2, :i2] = self.A_ii[-i2:, -i2:] self.A_ii = A_ii try: B_ii = np.linalg.inv(A_ii) except np.linalg.LinAlgError: alpha_i = np.zeros(iold) alpha_i[-1] = 1.0 else: alpha_i = B_ii.sum(1) try: # Normalize: alpha_i /= alpha_i.sum() except ZeroDivisionError: alpha_i[:] = 0.0 alpha_i[-1] = 1.0 # Calculate new input density: nt_G[:] = 0.0 for D in D_ap: D[:] = 0.0 beta = self.beta for i, alpha in enumerate(alpha_i): axpy(alpha, self.nt_iG[i], nt_G) axpy(alpha * beta, self.R_iG[i], nt_G) for D_p, D_ip, dD_ip in zip(D_ap, self.D_iap[i], self.dD_iap[i]): axpy(alpha, D_ip, D_p) axpy(alpha * beta, dD_ip, D_p) # Store new input density (and new atomic density matrices): self.nt_iG.append(nt_G.copy()) self.D_iap.append([]) for D_p in D_ap: self.D_iap[-1].append(D_p.copy())
def add_orbital_density(self, nt_G, kpt, n): axpy(1.0, abs(self.pd.ifft(kpt.psit_nG[n], kpt.q))**2, nt_G)
def multi_zaxpy(a,x,y, nvec): for i in range(nvec): axpy(a[i]*(1+0J), x[i], y[i])
def multi_zaxpy(a, x, y, nvec): for i in range(nvec): axpy(a[i] * (1 + 0J), x[i], y[i])
def mix(self, nt_G, D_ap): if self.step > 2: del self.d_nt_G[0] for d_Dp in self.d_D_ap: del d_Dp[0] if self.step > 0: self.d_nt_G.append(nt_G - self.nt_iG[-1]) for d_Dp, D_p, D_ip in zip(self.d_D_ap, D_ap, self.D_iap): d_Dp.append(D_p - D_ip[-1]) fmin_G = self.gd.integrate(self.d_nt_G[-1] * self.d_nt_G[-1]) self.dNt = self.gd.integrate(np.fabs(self.d_nt_G[-1])) if self.verbose: print('Mixer: broydn: fmin_G = %f fmin_D = %f' % fmin_G) if self.step == 0: self.eta_G = np.empty(nt_G.shape) self.eta_D = [] for D_p in D_ap: self.eta_D.append(0) self.u_D.append([]) self.D_iap.append([]) self.d_D_ap.append([]) else: if self.step >= 2: del self.c_G[:] if len(self.v_G) >= self.nmaxold: del self.u_G[0] del self.v_G[0] for u_D in self.u_D: del u_D[0] temp_nt_G = self.d_nt_G[1] - self.d_nt_G[0] self.v_G.append(temp_nt_G / self.gd.integrate(temp_nt_G * temp_nt_G)) if len(self.v_G) < self.nmaxold: nstep = self.step - 1 else: nstep = self.nmaxold for i in range(nstep): self.c_G.append( self.gd.integrate(self.v_G[i] * self.d_nt_G[1])) self.u_G.append(self.beta * temp_nt_G + self.nt_iG[1] - self.nt_iG[0]) for d_Dp, u_D, D_ip in zip(self.d_D_ap, self.u_D, self.D_iap): temp_D_ap = d_Dp[1] - d_Dp[0] u_D.append(self.beta * temp_D_ap + D_ip[1] - D_ip[0]) usize = len(self.u_G) for i in range(usize - 1): a_G = self.gd.integrate(self.v_G[i] * temp_nt_G) axpy(-a_G, self.u_G[i], self.u_G[usize - 1]) for u_D in self.u_D: axpy(-a_G, u_D[i], u_D[usize - 1]) self.eta_G = self.beta * self.d_nt_G[-1] for i, d_Dp in enumerate(self.d_D_ap): self.eta_D[i] = self.beta * d_Dp[-1] usize = len(self.u_G) for i in range(usize): axpy(-self.c_G[i], self.u_G[i], self.eta_G) for eta_D, u_D in zip(self.eta_D, self.u_D): axpy(-self.c_G[i], u_D[i], eta_D) axpy(-1.0, self.d_nt_G[-1], nt_G) axpy(1.0, self.eta_G, nt_G) for D_p, d_Dp, eta_D in zip(D_ap, self.d_D_ap, self.eta_D): axpy(-1.0, d_Dp[-1], D_p) axpy(1.0, eta_D, D_p) if self.step >= 2: del self.nt_iG[0] for D_ip in self.D_iap: del D_ip[0] self.nt_iG.append(np.copy(nt_G)) for D_ip, D_p in zip(self.D_iap, D_ap): D_ip.append(np.copy(D_p)) self.step += 1
for i in range(3): for j in range(3): a[i, j] = a[j, i].conj() a[i, i] = np.real(a[i, i]) b = alpha * np.outer(x.conj(), x) + a czher(alpha, x, a) for i in range(3): for j in range(i, 3): a[j, i] = a[i, j].conj() assert np.abs(b - a).sum() < 1e-14 # testing speed t_czher = 0 t_axpy = 0 for i in np.arange(1000): t0 = time() czher(alpha, x, a) t_czher += time() - t0 t0 = time() xx = np.outer(x.conj(), x) axpy(alpha, xx, a) t_axpy += time() - t0 print("t_czher:", t_czher) print("t_axpy:", t_axpy)
def iterate_one_k_point(self, ham, wfs, kpt): """Do a single RMM-DIIS iteration for the kpoint""" self.subspace_diagonalize(ham, wfs, kpt) psit = kpt.psit # psit2 = psit.new(buf=wfs.work_array) P = kpt.projections P2 = P.new() # dMP = P.new() # M_nn = wfs.work_matrix_nn # dS = wfs.setups.dS R = psit.new(buf=self.Htpsit_nG) self.timer.start('RMM-DIIS') if self.keep_htpsit: with self.timer('Calculate residuals'): self.calculate_residuals(kpt, wfs, ham, psit, P, kpt.eps_n, R, P2) def integrate(a_G, b_G): return np.real(wfs.integrate(a_G, b_G, global_integral=False)) comm = wfs.gd.comm B = self.blocksize dR = R.new(dist=None, nbands=B) dpsit = dR.new() P = P.new(bcomm=None, nbands=B) P2 = P.new() errors_x = np.zeros(B) # Arrays needed for DIIS step if self.niter > 1: psit_diis_nxG = wfs.empty(B * self.niter, q=kpt.q) R_diis_nxG = wfs.empty(B * self.niter, q=kpt.q) weights = self.weights(kpt) Ht = partial(wfs.apply_pseudo_hamiltonian, kpt, ham) error = 0.0 for n1 in range(0, wfs.bd.mynbands, B): n2 = n1 + B if n2 > wfs.bd.mynbands: n2 = wfs.bd.mynbands B = n2 - n1 P = P.new(nbands=B) P2 = P.new() dR = dR.new(nbands=B, dist=None) dpsit = dR.new() n_x = np.arange(n1, n2) psitb = psit.view(n1, n2) with self.timer('Calculate residuals'): Rb = R.view(n1, n2) if not self.keep_htpsit: psitb.apply(Ht, out=Rb) psitb.matrix_elements(wfs.pt, out=P) self.calculate_residuals(kpt, wfs, ham, psitb, P, kpt.eps_n[n_x], Rb, P2, n_x) errors_x[:] = 0.0 for n in range(n1, n2): weight = weights[n] errors_x[n - n1] = weight * integrate(Rb.array[n - n1], Rb.array[n - n1]) comm.sum(errors_x) error += np.sum(errors_x) # Insert first vectors and residuals for DIIS step if self.niter > 1: # Save the previous vectors contiguously for each band # in the block psit_diis_nxG[:B * self.niter:self.niter] = psitb.array R_diis_nxG[:B * self.niter:self.niter] = Rb.array # Precondition the residual: with self.timer('precondition'): ekin_x = self.preconditioner.calculate_kinetic_energy( psitb.array, kpt) self.preconditioner(Rb.array, kpt, ekin_x, out=dpsit.array) # Calculate the residual of dpsit_G, dR_G = (H - e S) dpsit_G: # self.timer.start('Apply Hamiltonian') dpsit.apply(Ht, out=dR) # self.timer.stop('Apply Hamiltonian') with self.timer('projections'): dpsit.matrix_elements(wfs.pt, out=P) with self.timer('Calculate residuals'): self.calculate_residuals(kpt, wfs, ham, dpsit, P, kpt.eps_n[n_x], dR, P2, n_x, calculate_change=True) # Find lam that minimizes the norm of R'_G = R_G + lam dR_G with self.timer('Find lambda'): RdR_x = np.array([ integrate(dR_G, R_G) for R_G, dR_G in zip(Rb.array, dR.array) ]) dRdR_x = np.array([integrate(dR_G, dR_G) for dR_G in dR.array]) comm.sum(RdR_x) comm.sum(dRdR_x) lam_x = -RdR_x / dRdR_x # Limit abs(lam) to [0.15, 1.0] if self.limit_lambda: upper = self.limit_lambda['upper'] lower = self.limit_lambda['lower'] if self.limit_lambda.get('absolute', False): lam_x = np.where( np.abs(lam_x) < lower, lower * np.sign(lam_x), lam_x) lam_x = np.where( np.abs(lam_x) > upper, upper * np.sign(lam_x), lam_x) else: lam_x = np.where(lam_x < lower, lower, lam_x) lam_x = np.where(lam_x > upper, upper, lam_x) # lam_x[:] = 0.1 # New trial wavefunction and residual with self.timer('Update psi'): for lam, psit_G, dpsit_G, R_G, dR_G in zip( lam_x, psitb.array, dpsit.array, Rb.array, dR.array): axpy(lam, dpsit_G, psit_G) # psit_G += lam * dpsit_G axpy(lam, dR_G, R_G) # R_G += lam** dR_G self.timer.start('DIIS step') # DIIS step for nit in range(1, self.niter): # Do not perform DIIS if error is small # if abs(error_block / B) < self.rtol: # break # Update the subspace psit_diis_nxG[nit:B * self.niter:self.niter] = psitb.array R_diis_nxG[nit:B * self.niter:self.niter] = Rb.array # XXX Only integrals of nit old psits would be needed # self.timer.start('projections') # wfs.pt.integrate(psit_diis_nxG, P_diis_anxi, kpt.q) # self.timer.stop('projections') if nit > 1 or self.limit_lambda: for ib in range(B): istart = ib * self.niter iend = istart + nit + 1 # Residual matrix self.timer.start('Construct matrix') R_nn = wfs.integrate(R_diis_nxG[istart:iend], R_diis_nxG[istart:iend], global_integral=True) # Full matrix A_nn = -np.ones((nit + 2, nit + 2), wfs.dtype) A_nn[:nit + 1, :nit + 1] = R_nn[:] A_nn[-1, -1] = 0.0 x_n = np.zeros(nit + 2, wfs.dtype) x_n[-1] = -1.0 self.timer.stop('Construct matrix') with self.timer('Linear solve'): alpha_i = np.linalg.solve(A_nn, x_n)[:-1] self.timer.start('Update trial vectors') psitb.array[ib] = alpha_i[nit] * psit_diis_nxG[istart + nit] Rb.array[ib] = alpha_i[nit] * R_diis_nxG[istart + nit] for i in range(nit): # axpy(alpha_i[i], psit_diis_nxG[istart + i], # psit_diis_nxG[istart + nit]) # axpy(alpha_i[i], R_diis_nxG[istart + i], # R_diis_nxG[istart + nit]) axpy(alpha_i[i], psit_diis_nxG[istart + i], psitb.array[ib]) axpy(alpha_i[i], R_diis_nxG[istart + i], Rb.array[ib]) self.timer.stop('Update trial vectors') if nit < self.niter - 1: with self.timer('precondition'): self.preconditioner(Rb.array, kpt, ekin_x, out=dpsit.array) for psit_G, lam, dpsit_G in zip(psitb.array, lam_x, dpsit.array): axpy(lam, dpsit_G, psit_G) # Calculate the new residuals self.timer.start('Calculate residuals') psitb.apply(Ht, out=Rb) psitb.matrix_elements(wfs.pt, out=P) self.calculate_residuals(kpt, wfs, ham, psitb, P, kpt.eps_n[n_x], Rb, P2, n_x, calculate_change=True) self.timer.stop('Calculate residuals') self.timer.stop('DIIS step') # Final trial step with self.timer('precondition'): self.preconditioner(Rb.array, kpt, ekin_x, out=dpsit.array) self.timer.start('Update psi') if self.trial_step is not None: lam_x[:] = self.trial_step for lam, psit_G, dpsit_G in zip(lam_x, psitb.array, dpsit.array): axpy(lam, dpsit_G, psit_G) # psit_G += lam * dpsit_G self.timer.stop('Update psi') self.timer.stop('RMM-DIIS') return error
def add_orbital_density(self, nt_G, kpt, n): if self.dtype == float: axpy(1.0, kpt.psit_nG[n]**2, nt_G) else: axpy(1.0, kpt.psit_nG[n].real**2, nt_G) axpy(1.0, kpt.psit_nG[n].imag**2, nt_G)
def mix(self, nt_G, D_ap): iold = len(self.nt_iG) if iold > 0: if iold > self.nmaxold: # Throw away too old stuff: del self.nt_iG[0] del self.R_iG[0] del self.D_iap[0] del self.dD_iap[0] # for D_p, D_ip, dD_ip in self.D_a: # del D_ip[0] # del dD_ip[0] iold = self.nmaxold # Calculate new residual (difference between input and # output density): R_G = nt_G - self.nt_iG[-1] self.dNt = self.calculate_charge_sloshing(R_G) self.R_iG.append(R_G) self.dD_iap.append([]) for D_p, D_ip in zip(D_ap, self.D_iap[-1]): self.dD_iap[-1].append(D_p - D_ip) # Update matrix: A_ii = np.zeros((iold, iold)) i2 = iold - 1 if self.metric is None: mR_G = R_G else: mR_G = self.mR_G self.metric(R_G, mR_G) for i1, R_1G in enumerate(self.R_iG): a = self.gd.comm.sum(self.dotprod(R_1G, mR_G, self.dD_iap[i1], self.dD_iap[-1])) A_ii[i1, i2] = a A_ii[i2, i1] = a A_ii[:i2, :i2] = self.A_ii[-i2:, -i2:] self.A_ii = A_ii try: B_ii = np.linalg.inv(A_ii) except np.linalg.LinAlgError: alpha_i = np.zeros(iold) alpha_i[-1] = 1.0 else: alpha_i = B_ii.sum(1) try: # Normalize: alpha_i /= alpha_i.sum() except ZeroDivisionError: alpha_i[:] = 0.0 alpha_i[-1] = 1.0 # Calculate new input density: nt_G[:] = 0.0 #for D_p, D_ip, dD_ip in self.D_a: for D in D_ap: D[:] = 0.0 beta = self.beta for i, alpha in enumerate(alpha_i): axpy(alpha, self.nt_iG[i], nt_G) axpy(alpha * beta, self.R_iG[i], nt_G) for D_p, D_ip, dD_ip in zip(D_ap, self.D_iap[i], self.dD_iap[i]): axpy(alpha, D_ip, D_p) axpy(alpha * beta, dD_ip, D_p) # Store new input density (and new atomic density matrices): self.nt_iG.append(nt_G.copy()) self.D_iap.append([]) for D_p in D_ap: self.D_iap[-1].append(D_p.copy())
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do conjugate gradient iterations for the k-point""" niter = self.niter phi_G = wfs.empty(q=kpt.q) phi_old_G = wfs.empty(q=kpt.q) comm = wfs.gd.comm psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! R_nG = reshape(self.Htpsit_nG, psit_nG.shape) Htphi_G = R_nG[0] R_nG[:] = Htpsit_nG self.timer.start('Residuals') self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Residuals') self.timer.start('CG') total_error = 0.0 for n in range(self.nbands): if extra_parameters.get('PK', False): N = n+1 else: N = psit_nG.shape[0]+1 R_G = R_nG[n] Htpsit_G = Htpsit_nG[n] gamma_old = 1.0 phi_old_G[:] = 0.0 error = np.real(wfs.integrate(R_G, R_G)) for nit in range(niter): if (error * Hartree**2 < self.tolerance / self.nbands): break ekin = self.preconditioner.calculate_kinetic_energy( psit_nG[n:n + 1], kpt) pR_G = self.preconditioner(R_nG[n:n + 1], kpt, ekin) # New search direction gamma = comm.sum(np.vdot(pR_G, R_G).real) phi_G[:] = -pR_G - gamma / gamma_old * phi_old_G gamma_old = gamma phi_old_G[:] = phi_G[:] # Calculate projections P2_ai = wfs.pt.dict() wfs.pt.integrate(phi_G, P2_ai, kpt.q) # Orthonormalize phi_G to all bands self.timer.start('CG: orthonormalize') self.timer.start('CG: overlap') overlap_n = wfs.integrate(psit_nG[:N], phi_G, global_integral=False) self.timer.stop('CG: overlap') self.timer.start('CG: overlap2') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] dO_ii = wfs.setups[a].dO_ii gemv(1.0, P_ni[:N].conjugate(), np.inner(dO_ii, P2_i), 1.0, overlap_n) self.timer.stop('CG: overlap2') comm.sum(overlap_n) # phi_G -= overlap_n * kpt.psit_nG wfs.matrixoperator.gd.gemv(-1.0, psit_nG[:N], overlap_n, 1.0, phi_G, 'n') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] gemv(-1.0, P_ni[:N], overlap_n, 1.0, P2_i, 'n') norm = wfs.integrate(phi_G, phi_G, global_integral=False) for a, P2_i in P2_ai.items(): dO_ii = wfs.setups[a].dO_ii norm += np.vdot(P2_i, np.inner(dO_ii, P2_i)) norm = comm.sum(np.real(norm).item()) phi_G /= sqrt(norm) for P2_i in P2_ai.values(): P2_i /= sqrt(norm) self.timer.stop('CG: orthonormalize') # find optimum linear combination of psit_G and phi_G an = kpt.eps_n[n] wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, phi_G.reshape((1,) + phi_G.shape), Htphi_G.reshape((1,) + Htphi_G.shape)) b = wfs.integrate(phi_G, Htpsit_G, global_integral=False) c = wfs.integrate(phi_G, Htphi_G, global_integral=False) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) b += dot(P2_i, dot(dH_ii, P_i.conj())) c += dot(P2_i, dot(dH_ii, P2_i.conj())) b = comm.sum(np.real(b).item()) c = comm.sum(np.real(c).item()) theta = 0.5 * atan2(2 * b, an - c) enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) # theta can correspond either minimum or maximum if (enew - kpt.eps_n[n]) > 0.0: # we were at maximum theta += pi / 2.0 enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) kpt.eps_n[n] = enew psit_nG[n] *= cos(theta) # kpt.psit_nG[n] += sin(theta) * phi_G axpy(sin(theta), phi_G, psit_nG[n]) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] P_i *= cos(theta) P_i += sin(theta) * P2_i if nit < niter - 1: Htpsit_G *= cos(theta) # Htpsit_G += sin(theta) * Htphi_G axpy(sin(theta), Htphi_G, Htpsit_G) #adjust residuals R_G[:] = Htpsit_G - kpt.eps_n[n] * psit_nG[n] coef_ai = wfs.pt.dict() for a, coef_i in coef_ai.items(): P_i = kpt.P_ani[a][n] dO_ii = wfs.setups[a].dO_ii dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) coef_i[:] = (dot(P_i, dH_ii) - dot(P_i * kpt.eps_n[n], dO_ii)) wfs.pt.add(R_G, coef_ai, kpt.q) error_new = np.real(wfs.integrate(R_G, R_G)) if error_new / error < self.rtol: # print >> self.f, "cg:iters", n, nit+1 break if (self.nbands_converge == 'occupied' and kpt.f_n is not None and kpt.f_n[n] == 0.0): # print >> self.f, "cg:iters", n, nit+1 break error = error_new if kpt.f_n is None: weight = 1.0 else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': weight = kpt.weight * float(n < self.nbands_converge) total_error += weight * error # if nit == 3: # print >> self.f, "cg:iters", n, nit+1 self.timer.stop('CG') return total_error, psit_nG