def check(self, l): basis = self.aea.channels[0].basis eps = basis.eps alpha_B = basis.alpha_B basis = GaussianBasis(l, alpha_B, self.rgd, eps) H_bb = basis.calculate_potential_matrix(self.vtr_g) H_bb += basis.T_bb S_bb = np.eye(len(basis)) n0 = 0 if l < len(self.waves_l): waves = self.waves_l[l] if len(waves) > 0: P_bn = self.rgd.integrate(basis.basis_bg[:, None] * waves.pt_ng) / (4 * pi) H_bb += np.dot(np.dot(P_bn, waves.dH_nn), P_bn.T) S_bb += np.dot(np.dot(P_bn, waves.dS_nn), P_bn.T) n0 = waves.n_n[0] - l - 1 if n0 < 0 and l < len(self.aea.channels): n0 = (self.aea.channels[l].f_n > 0).sum() elif l < len(self.aea.channels): n0 = (self.aea.channels[l].f_n > 0).sum() e_b = np.empty(len(basis)) general_diagonalize(H_bb, e_b, S_bb) return e_b, n0
def check(self, l): basis = self.aea.channels[0].basis eps = basis.eps alpha_B = basis.alpha_B basis = GaussianBasis(l, alpha_B, self.rgd, eps) H_bb = basis.calculate_potential_matrix(self.vtr_g) H_bb += basis.T_bb S_bb = np.eye(len(basis)) n0 = 0 if l < len(self.waves_l): waves = self.waves_l[l] if len(waves) > 0: P_bn = self.rgd.integrate( basis.basis_bg[:, None] * waves.pt_ng) / (4 * pi) H_bb += np.dot(np.dot(P_bn, waves.dH_nn), P_bn.T) S_bb += np.dot(np.dot(P_bn, waves.dS_nn), P_bn.T) n0 = waves.n_n[0] - l - 1 if n0 < 0 and l < len(self.aea.channels): n0 = (self.aea.channels[l].f_n > 0).sum() elif l < len(self.aea.channels): n0 = (self.aea.channels[l].f_n > 0).sum() e_b = np.empty(len(basis)) general_diagonalize(H_bb, e_b, S_bb) return e_b, n0
def diagonalize(self, h): ng = 350 t = self.text t() t('Diagonalizing with gridspacing h=%.3f' % h) R = h * np.arange(1, ng + 1) G = (self.N * R / (self.beta + R) + 0.5).astype(int) G = np.clip(G, 1, self.N - 2) R1 = np.take(self.r, G - 1) R2 = np.take(self.r, G) R3 = np.take(self.r, G + 1) x1 = (R - R2) * (R - R3) / (R1 - R2) / (R1 - R3) x2 = (R - R1) * (R - R3) / (R2 - R1) / (R2 - R3) x3 = (R - R1) * (R - R2) / (R3 - R1) / (R3 - R2) def interpolate(f): f1 = np.take(f, G - 1) f2 = np.take(f, G) f3 = np.take(f, G + 1) return f1 * x1 + f2 * x2 + f3 * x3 vt = interpolate(self.vt) t() t('state all-electron PAW') t('-------------------------------') for l in range(4): if l <= self.lmax: q_n = np.array([interpolate(q) for q in self.q_ln[l]]) H = np.dot(np.transpose(q_n), np.dot(self.dH_lnn[l], q_n)) * h S = np.dot(np.transpose(q_n), np.dot(self.dO_lnn[l], q_n)) * h else: H = np.zeros((ng, ng)) S = np.zeros((ng, ng)) H.ravel()[::ng + 1] += vt + 1.0 / h**2 + l * (l + 1) / 2.0 / R**2 H.ravel()[1::ng + 1] -= 0.5 / h**2 H.ravel()[ng::ng + 1] -= 0.5 / h**2 S.ravel()[::ng + 1] += 1.0 e_n = np.zeros(ng) general_diagonalize(H, e_n, S) ePAW = e_n[0] if l <= self.lmax and self.n_ln[l][0] > 0: eAE = self.e_ln[l][0] t('%d%s: %12.6f %12.6f' % (self.n_ln[l][0], 'spdf'[l], eAE, ePAW), end='') if abs(eAE - ePAW) > 0.014: t(' GHOST-STATE!') self.ghost = True else: t() else: t('*%s: %12.6f' % ('spdf'[l], ePAW), end='') if ePAW < self.emax: t(' GHOST-STATE!') self.ghost = True else: t() t('-------------------------------')
def iterate(self, hamiltonian, wfs): if not self.initialized: self.initialize(wfs) r = self.gd.r_g h = r[0] N = len(r) lmax = len(self.f_sln[0]) - 1 setup = wfs.setups[0] e_n = np.zeros(N) for s in range(wfs.nspins): dH_ii = unpack(hamiltonian.dH_asp[0][s]) kpt = wfs.kpt_u[s] N1 = 0 for l in range(lmax + 1): H = self.T_l[l] + np.diag(hamiltonian.vt_sg[s]) i1 = 0 for pt1, l1 in zip(self.pt_j, setup.l_j): i2 = 0 for pt2, l2 in zip(self.pt_j, setup.l_j): if l1 == l2 == l: H += (h * dH_ii[i1, i2] * np.outer(pt1 * r, pt2 * r)) i2 += 2 * l2 + 1 i1 += 2 * l1 + 1 general_diagonalize(H, e_n, self.S_l[l].copy()) for n in range(len(self.f_sln[s][l])): N2 = N1 + 2 * l + 1 kpt.eps_n[N1:N2] = e_n[n] kpt.psit_nG[N1:N2] = H[n] / r / sqrt(h) i1 = 0 for pt, ll in zip(self.pt_j, setup.l_j): i2 = i1 + 2 * ll + 1 if ll == l: P = np.dot(kpt.psit_nG[N1], pt * r**2) * h kpt.P_ani[0][N1:N2, i1:i2] = P * np.eye(2 * l + 1) i1 = i2 N1 = N2
def iterate_one_k_point(self, ham, wfs, kpt, Vt_xdMM): assert wfs.gd.comm.size == 1, 'No quite sure this works!' if wfs.bd.comm.size > 1 and wfs.bd.strided: raise NotImplementedError H_xMM = [] for x in range(4): kpt.s = x H_MM = self.calculate_hamiltonian_matrix(ham, wfs, kpt, Vt_xdMM[x], root=0, add_kinetic=(x == 0)) H_xMM.append(H_MM) kpt.s = None S_MM = wfs.S_qMM[kpt.q] M = len(S_MM) S2_MM = np.zeros((2 * M, 2 * M), complex) H2_MM = np.zeros((2 * M, 2 * M), complex) S2_MM[:M, :M] = S_MM S2_MM[M:, M:] = S_MM H2_MM[:M, :M] = H_xMM[0] + H_xMM[3] H2_MM[M:, M:] = H_xMM[0] - H_xMM[3] kpt.eps_n = np.empty(2 * wfs.bd.mynbands) diagonalization_string = repr(self.diagonalizer) wfs.timer.start(diagonalization_string) from gpaw.utilities.lapack import general_diagonalize general_diagonalize(H2_MM, kpt.eps_n, S2_MM) kpt.C_nM = H2_MM #self.diagonalizer.diagonalize(H2_MM, kpt.C_nM, kpt.eps_n, S2_MM) wfs.timer.stop(diagonalization_string) kpt.C_nM.shape = (wfs.bd.mynbands * 4, M)
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do Davidson iterations for the kpoint""" niter = self.niter nbands = self.nbands gd = wfs.matrixoperator.gd psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! H_2n2n = self.H_2n2n S_2n2n = self.S_2n2n eps_2n = self.eps_2n psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape) self.timer.start("Davidson") R_nG = Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) def integrate(a_G, b_G): return np.real(wfs.integrate(a_G, b_G, global_integral=False)) for nit in range(niter): H_2n2n[:] = 0.0 S_2n2n[:] = 0.0 norm_n = np.zeros(nbands) error = 0.0 for n in range(nbands): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != "occupied": if n < self.nbands_converge: weight = kpt.weight else: weight = 0.0 error += weight * integrate(R_nG[n], R_nG[n]) ekin = self.preconditioner.calculate_kinetic_energy(psit_nG[n : n + 1], kpt) psit2_nG[n] = self.preconditioner(R_nG[n : n + 1], kpt, ekin) if self.normalize: norm_n[n] = integrate(psit2_nG[n], psit2_nG[n]) H_2n2n[n, n] = kpt.eps_n[n] S_2n2n[n, n] = 1.0 if self.normalize: gd.comm.sum(norm_n) for norm, psit2_G in zip(norm_n, psit2_nG): psit2_G *= norm ** -0.5 # Calculate projections P2_ani = wfs.pt.dict(nbands) wfs.pt.integrate(psit2_nG, P2_ani, kpt.q) # Hamiltonian matrix # <psi2 | H | psi> wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit2_nG, Htpsit_nG) gd.integrate(psit_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn) # gemm(1.0, psit_nG, Htpsit_nG, 0.0, self.H_nn, 'c') for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj())) gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, :nbands] = self.H_nn # <psi2 | H | psi2> gd.integrate(psit2_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn) # r2k(0.5 * gd.dv, psit2_nG, Htpsit_nG, 0.0, self.H_nn) for a, P2_ni in P2_ani.items(): dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj())) gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, nbands:] = self.H_nn # Overlap matrix # <psi2 | S | psi> gd.integrate(psit_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn) # gemm(1.0, psit_nG, psit2_nG, 0.0, self.S_nn, 'c') for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj())) gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, :nbands] = self.S_nn # <psi2 | S | psi2> gd.integrate(psit2_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn) # rk(gd.dv, psit2_nG, 0.0, self.S_nn) for a, P2_ni in P2_ani.items(): dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj())) gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, nbands:] = self.S_nn if gd.comm.rank == 0: m = 0 if self.smin: s_N, U_NN = np.linalg.eigh(S_2n2n) m = int((s_N < self.smin).sum()) if m == 0: general_diagonalize(H_2n2n, eps_2n, S_2n2n) else: T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:] ** -0.5)) H_2n2n[:nbands, nbands:] = H_2n2n[nbands:, :nbands].conj().T eps_2n[:-m], P_nn = np.linalg.eigh(np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn)) H_2n2n[:-m] = np.dot(T_Nn, P_nn).T gd.comm.broadcast(H_2n2n, 0) gd.comm.broadcast(eps_2n, 0) kpt.eps_n[:] = eps_2n[:nbands] # Rotate psit_nG gd.gemm(1.0, psit_nG, H_2n2n[:nbands, :nbands], 0.0, Htpsit_nG) gd.gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:], 1.0, Htpsit_nG) psit_nG, Htpsit_nG = Htpsit_nG, psit_nG # Rotate P_uni: for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni) gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni) if nit < niter - 1: wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, Htpsit_nG) R_nG = Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop("Davidson") error = gd.comm.sum(error) return error, psit_nG
def _diagonalize(self, H_MM, S_MM, eps_M): """Serial diagonalize via LAPACK.""" # This is replicated computation but ultimately avoids # additional communication general_diagonalize(H_MM, eps_M, S_MM)
def main(N=73, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if (dtype==complex): epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: glob = grid.new_descriptor(N, N, N, N) # print globA.asarray() # Populate matrices local to master: H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) C0 = glob.empty(dtype=dtype) if rank == 0: # Complex case must have real numbers on the diagonal. # We make a simple complex Hermitian matrix below. H0 = H0 + epsilon * (0.1*np.tri(N, N, k= -N // nprocs) + 0.3*np.tri(N, N, k=-1)) S0 = S0 + epsilon * (0.2*np.tri(N, N, k= -N // nprocs) + 0.4*np.tri(N, N, k=-1)) # Make matrices symmetric rk(1.0, H0.copy(), 0.0, H0) rk(1.0, S0.copy(), 0.0, S0) # Overlap matrix must be semi-positive definite S0 = S0 + 50.0*np.eye(N, N, 0) # Hamiltonian is usually diagonally dominant H0 = H0 + 75.0*np.eye(N, N, 0) C0 = S0.copy() # Local result matrices W0 = np.empty((N),dtype=float) W0_g = np.empty((N),dtype=float) # Calculate eigenvalues if rank == 0: diagonalize(H0.copy(), W0) general_diagonalize(H0.copy(), W0_g, S0.copy()) inverse_cholesky(C0) # result returned in lower triangle # tri2full(C0) # symmetrize assert glob.check(H0) and glob.check(S0) and glob.check(C0) # Create distributed destriptors with various block sizes: dist = grid.new_descriptor(N, N, 8, 8) # Distributed matrices: # We can use empty here, but end up with garbage on # on the other half of the triangle when we redistribute. # This is fine because ScaLAPACK does not care. H = dist.empty(dtype=dtype) S = dist.empty(dtype=dtype) Z = dist.empty(dtype=dtype) C = dist.empty(dtype=dtype) # Eigenvalues are non-BLACS matrices W = np.empty((N), dtype=float) W_dc = np.empty((N), dtype=float) W_mr3 = np.empty((N), dtype=float) W_g = np.empty((N), dtype=float) W_g_dc = np.empty((N), dtype=float) W_g_mr3 = np.empty((N), dtype=float) Glob2dist = Redistributor(world, glob, dist) Glob2dist.redistribute(H0, H, uplo='L') Glob2dist.redistribute(S0, S, uplo='L') Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten # we don't test the expert drivers anymore since there # might be a buffer overflow error ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L') scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L') ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L') ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L') scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L') ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L') scalapack_inverse_cholesky(dist, C, 'L') # Undo redistribute C_test = glob.empty(dtype=dtype) Dist2glob = Redistributor(world, dist, glob) Dist2glob.redistribute(C, C_test) if rank == 0: ## diag_ex_err = abs(W - W0).max() diag_dc_err = abs(W_dc - W0).max() ## diag_mr3_err = abs(W_mr3 - W0).max() ## general_diag_ex_err = abs(W_g - W0_g).max() general_diag_dc_err = abs(W_g_dc - W0_g).max() ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max() inverse_chol_err = abs(C_test-C0).max() ## print 'diagonalize ex err', diag_ex_err print 'diagonalize dc err', diag_dc_err ## print 'diagonalize mr3 err', diag_mr3_err ## print 'general diagonalize ex err', general_diag_ex_err print 'general diagonalize dc err', general_diag_dc_err ## print 'general diagonalize mr3 err', general_diag_mr3_err print 'inverse chol err', inverse_chol_err else: ## diag_ex_err = 0.0 diag_dc_err = 0.0 ## diag_mr3_err = 0.0 ## general_diag_ex_err = 0.0 general_diag_dc_err = 0.0 ## general_diag_mr3_err = 0.0 inverse_chol_err = 0.0 # We don't like exceptions on only one cpu ## diag_ex_err = world.sum(diag_ex_err) diag_dc_err = world.sum(diag_dc_err) ## diag_mr3_err = world.sum(diag_mr3_err) ## general_diag_ex_err = world.sum(general_diag_ex_err) general_diag_dc_err = world.sum(general_diag_dc_err) ## general_diag_mr3_err = world.sum(general_diag_mr3_err) inverse_chol_err = world.sum(inverse_chol_err) ## assert diag_ex_err < tol assert diag_dc_err < tol ## assert diag_mr3_err < tol ## assert general_diag_ex_err < tol assert general_diag_dc_err < tol ## assert general_diag_mr3_err < tol assert inverse_chol_err < tol
def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M, UL='L', iu=None): general_diagonalize(H_mm, eps_M, S_mm, iu=iu) C_mm[:] = H_mm
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do Davidson iterations for the kpoint""" niter = self.niter nbands = self.nbands self.subspace_diagonalize(hamiltonian, wfs, kpt) H_2n2n = self.H_2n2n S_2n2n = self.S_2n2n eps_2n = self.eps_2n psit2_nG = wfs.matrixoperator.suggest_temporary_buffer() self.timer.start('Davidson') R_nG = self.Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG, kpt.P_ani, kpt.eps_n, R_nG) for nit in range(niter): H_2n2n[:] = 0.0 S_2n2n[:] = 0.0 error = 0.0 for n in range(nbands): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': if n < self.nbands_converge: weight = kpt.weight else: weight = 0.0 error += weight * np.vdot(R_nG[n], R_nG[n]).real H_2n2n[n,n] = kpt.eps_n[n] S_2n2n[n,n] = 1.0 psit2_nG[n] = self.preconditioner(R_nG[n], kpt) # Calculate projections P2_ani = wfs.pt.dict(nbands) wfs.pt.integrate(psit2_nG, P2_ani, kpt.q) # Hamiltonian matrix # <psi2 | H | psi> wfs.kin.apply(psit2_nG, self.Htpsit_nG, kpt.phase_cd) hamiltonian.apply_local_potential(psit2_nG, self.Htpsit_nG, kpt.s) gemm(self.gd.dv, kpt.psit_nG, self.Htpsit_nG, 0.0, self.H_nn, 'c') for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj())) self.gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, :nbands] = self.H_nn # <psi2 | H | psi2> r2k(0.5 * self.gd.dv, psit2_nG, self.Htpsit_nG, 0.0, self.H_nn) for a, P2_ni in P2_ani.items(): dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj())) self.gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, nbands:] = self.H_nn # Overlap matrix # <psi2 | S | psi> gemm(self.gd.dv, kpt.psit_nG, psit2_nG, 0.0, self.S_nn, "c") for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj())) self.gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, :nbands] = self.S_nn # <psi2 | S | psi2> rk(self.gd.dv, psit2_nG, 0.0, self.S_nn) for a, P2_ni in P2_ani.items(): dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj())) self.gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, nbands:] = self.S_nn if self.gd.comm.rank == 0: general_diagonalize(H_2n2n, eps_2n, S_2n2n) self.gd.comm.broadcast(H_2n2n, 0) self.gd.comm.broadcast(eps_2n, 0) kpt.eps_n[:] = eps_2n[:nbands] # Rotate psit_nG gemm(1.0, kpt.psit_nG, H_2n2n[:nbands, :nbands], 0.0, self.Htpsit_nG) gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:], 1.0, self.Htpsit_nG) kpt.psit_nG, self.Htpsit_nG = self.Htpsit_nG, kpt.psit_nG # Rotate P_uni: for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni) gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni) if nit < niter - 1 : wfs.kin.apply(kpt.psit_nG, self.Htpsit_nG, kpt.phase_cd) hamiltonian.apply_local_potential(kpt.psit_nG, self.Htpsit_nG, kpt.s) R_nG = self.Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Davidson') error = self.gd.comm.sum(error) return error
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do Davidson iterations for the kpoint""" niter = self.niter nbands = self.nbands mynbands = self.mynbands gd = wfs.matrixoperator.gd bd = self.operator.bd psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! H_2n2n = self.H_2n2n S_2n2n = self.S_2n2n eps_2n = self.eps_2n self.timer.start('Davidson') if self.keep_htpsit: R_nG = Htpsit_nG psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape) else: R_nG = wfs.empty(mynbands, q=kpt.q) psit2_nG = wfs.empty(mynbands, q=kpt.q) wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, R_nG) wfs.pt.integrate(psit_nG, kpt.P_ani, kpt.q) self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) def integrate(a_G, b_G): return np.real(wfs.integrate(a_G, b_G, global_integral=False)) # Note on band parallelization # The "large" H_2n2n and S_2n2n matrices are at the moment # global and replicated over band communicator, and the # general diagonalization is performed in serial i.e. without # scalapack for nit in range(niter): H_2n2n[:] = 0.0 S_2n2n[:] = 0.0 norm_n = np.zeros(mynbands) error = 0.0 for n in range(mynbands): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': if n < self.nbands_converge: weight = kpt.weight else: weight = 0.0 error += weight * integrate(R_nG[n], R_nG[n]) ekin = self.preconditioner.calculate_kinetic_energy( psit_nG[n:n + 1], kpt) psit2_nG[n] = self.preconditioner(R_nG[n:n + 1], kpt, ekin) if self.normalize: norm_n[n] = integrate(psit2_nG[n], psit2_nG[n]) N = bd.global_index(n) H_2n2n[N, N] = kpt.eps_n[n] S_2n2n[N, N] = 1.0 bd.comm.sum(H_2n2n) bd.comm.sum(S_2n2n) if self.normalize: gd.comm.sum(norm_n) for norm, psit2_G in zip(norm_n, psit2_nG): psit2_G *= norm**-0.5 # Calculate projections P2_ani = wfs.pt.dict(mynbands) wfs.pt.integrate(psit2_nG, P2_ani, kpt.q) self.timer.start('calc. matrices') # Hamiltonian matrix # <psi2 | H | psi> def H(psit_xG): result_xG = R_nG wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_xG, result_xG) return result_xG def dH(a, P_ni): return np.dot(P_ni, unpack(hamiltonian.dH_asp[a][kpt.s])) H_nn = self.operator.calculate_matrix_elements(psit_nG, kpt.P_ani, H, dH, psit2_nG, P2_ani) H_2n2n[nbands:, :nbands] = H_nn # <psi2 | H | psi2> def H(psit_xG): # H | psi2 > already calculated in previous step result_xG = R_nG return result_xG def dH(a, P_ni): return np.dot(P_ni, unpack(hamiltonian.dH_asp[a][kpt.s])) H_nn = self.operator.calculate_matrix_elements(psit2_nG, P2_ani, H, dH) H_2n2n[nbands:, nbands:] = H_nn # Overlap matrix # <psi2 | S | psi> def S(psit_G): return psit_G def dS(a, P_ni): return np.dot(P_ni, wfs.setups[a].dO_ii) S_nn = self.operator.calculate_matrix_elements(psit_nG, kpt.P_ani, S, dS, psit2_nG, P2_ani) S_2n2n[nbands:, :nbands] = S_nn # <psi2 | S | psi2> S_nn = self.operator.calculate_matrix_elements(psit2_nG, P2_ani, S, dS) S_2n2n[nbands:, nbands:] = S_nn self.timer.stop('calc. matrices') self.timer.start('diagonalize') if gd.comm.rank == 0 and bd.comm.rank == 0: m = 0 if self.smin: s_N, U_NN = np.linalg.eigh(S_2n2n) m = int((s_N < self.smin).sum()) if m == 0: general_diagonalize(H_2n2n, eps_2n, S_2n2n) else: T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:]**-0.5)) H_2n2n[:nbands, nbands:] = \ H_2n2n[nbands:, :nbands].conj().T eps_2n[:-m], P_nn = np.linalg.eigh( np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn)) H_2n2n[:-m] = np.dot(T_Nn, P_nn).T gd.comm.broadcast(H_2n2n, 0) gd.comm.broadcast(eps_2n, 0) bd.comm.broadcast(H_2n2n, 0) bd.comm.broadcast(eps_2n, 0) self.operator.bd.distribute(eps_2n[:nbands], kpt.eps_n[:]) self.timer.stop('diagonalize') self.timer.start('rotate_psi') # Rotate psit_nG # Memory references during rotate: # Case 1, no band parallelization: # Before 1. matrix multiply: psit_nG -> operator.work1_xG # After 1. matrix multiply: psit_nG -> R_nG # After 2. matrix multiply: tmp_nG -> work1_xG # # Case 2, band parallelization # Work arrays used only in send/recv buffers, # psit_nG -> psit_nG # tmp_nG -> psit2_nG psit_nG = self.operator.matrix_multiply(H_2n2n[:nbands, :nbands], psit_nG, kpt.P_ani, out_nG=R_nG) tmp_nG = self.operator.matrix_multiply(H_2n2n[:nbands, nbands:], psit2_nG, P2_ani) if bd.comm.size > 1: psit_nG += tmp_nG else: tmp_nG += psit_nG psit_nG, R_nG = tmp_nG, psit_nG for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] P_ni += P2_ni self.timer.stop('rotate_psi') if nit < niter - 1: wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, R_nG) self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Davidson') error = gd.comm.sum(error) return error, psit_nG
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do Davidson iterations for the kpoint""" niter = self.niter nbands = self.nbands gd = wfs.matrixoperator.gd psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! H_2n2n = self.H_2n2n S_2n2n = self.S_2n2n eps_2n = self.eps_2n psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape) self.timer.start('Davidson') R_nG = Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) def integrate(a_G, b_G): return np.real(wfs.integrate(a_G, b_G, global_integral=False)) for nit in range(niter): H_2n2n[:] = 0.0 S_2n2n[:] = 0.0 norm_n = np.zeros(nbands) error = 0.0 for n in range(nbands): if kpt.f_n is None: weight = kpt.weight else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': if n < self.nbands_converge: weight = kpt.weight else: weight = 0.0 error += weight * integrate(R_nG[n], R_nG[n]) ekin = self.preconditioner.calculate_kinetic_energy( psit_nG[n:n + 1], kpt) psit2_nG[n] = self.preconditioner(R_nG[n:n + 1], kpt, ekin) if self.normalize: norm_n[n] = integrate(psit2_nG[n], psit2_nG[n]) H_2n2n[n, n] = kpt.eps_n[n] S_2n2n[n, n] = 1.0 if self.normalize: gd.comm.sum(norm_n) for norm, psit2_G in zip(norm_n, psit2_nG): psit2_G *= norm**-0.5 # Calculate projections P2_ani = wfs.pt.dict(nbands) wfs.pt.integrate(psit2_nG, P2_ani, kpt.q) # Hamiltonian matrix # <psi2 | H | psi> wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit2_nG, Htpsit_nG) gd.integrate(psit_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn) # gemm(1.0, psit_nG, Htpsit_nG, 0.0, self.H_nn, 'c') for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj())) gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, :nbands] = self.H_nn # <psi2 | H | psi2> gd.integrate(psit2_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn) # r2k(0.5 * gd.dv, psit2_nG, Htpsit_nG, 0.0, self.H_nn) for a, P2_ni in P2_ani.items(): dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj())) gd.comm.sum(self.H_nn, 0) H_2n2n[nbands:, nbands:] = self.H_nn # Overlap matrix # <psi2 | S | psi> gd.integrate(psit_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn) # gemm(1.0, psit_nG, psit2_nG, 0.0, self.S_nn, 'c') for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj())) gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, :nbands] = self.S_nn # <psi2 | S | psi2> gd.integrate(psit2_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn) # rk(gd.dv, psit2_nG, 0.0, self.S_nn) for a, P2_ni in P2_ani.items(): dO_ii = wfs.setups[a].dO_ii self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj())) gd.comm.sum(self.S_nn, 0) S_2n2n[nbands:, nbands:] = self.S_nn if gd.comm.rank == 0: m = 0 if self.smin: s_N, U_NN = np.linalg.eigh(S_2n2n) m = int((s_N < self.smin).sum()) if m == 0: general_diagonalize(H_2n2n, eps_2n, S_2n2n) else: T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:]**-0.5)) H_2n2n[:nbands, nbands:] = \ H_2n2n[nbands:, :nbands].conj().T eps_2n[:-m], P_nn = np.linalg.eigh( np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn)) H_2n2n[:-m] = np.dot(T_Nn, P_nn).T gd.comm.broadcast(H_2n2n, 0) gd.comm.broadcast(eps_2n, 0) kpt.eps_n[:] = eps_2n[:nbands] # Rotate psit_nG gd.gemm(1.0, psit_nG, H_2n2n[:nbands, :nbands], 0.0, Htpsit_nG) gd.gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:], 1.0, Htpsit_nG) psit_nG, Htpsit_nG = Htpsit_nG, psit_nG # Rotate P_uni: for a, P_ni in kpt.P_ani.items(): P2_ni = P2_ani[a] gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni) gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni) if nit < niter - 1: wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, Htpsit_nG) R_nG = Htpsit_nG self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Davidson') error = gd.comm.sum(error) return error, psit_nG
def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M, UL='L'): general_diagonalize(H_mm, eps_M, S_mm) C_mm[:] = H_mm
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if (dtype == complex): epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: glob = grid.new_descriptor(N, N, N, N) # print globA.asarray() # Populate matrices local to master: H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) C0 = glob.empty(dtype=dtype) if rank == 0: # Complex case must have real numbers on the diagonal. # We make a simple complex Hermitian matrix below. H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) + 0.3 * np.tri(N, N, k=-1)) S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) + 0.4 * np.tri(N, N, k=-1)) # Make matrices symmetric rk(1.0, H0.copy(), 0.0, H0) rk(1.0, S0.copy(), 0.0, S0) # Overlap matrix must be semi-positive definite S0 = S0 + 50.0 * np.eye(N, N, 0) # Hamiltonian is usually diagonally dominant H0 = H0 + 75.0 * np.eye(N, N, 0) C0 = S0.copy() S0_inv = S0.copy() # Local result matrices W0 = np.empty((N), dtype=float) W0_g = np.empty((N), dtype=float) # Calculate eigenvalues / other serial results if rank == 0: diagonalize(H0.copy(), W0) general_diagonalize(H0.copy(), W0_g, S0.copy()) inverse_cholesky(C0) # result returned in lower triangle tri2full(S0_inv, 'L') S0_inv = inv(S0_inv) # tri2full(C0) # symmetrize assert glob.check(H0) and glob.check(S0) and glob.check(C0) # Create distributed destriptors with various block sizes: dist = grid.new_descriptor(N, N, 8, 8) # Distributed matrices: # We can use empty here, but end up with garbage on # on the other half of the triangle when we redistribute. # This is fine because ScaLAPACK does not care. H = dist.empty(dtype=dtype) S = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) Z = dist.empty(dtype=dtype) C = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) # Eigenvalues are non-BLACS matrices W = np.empty((N), dtype=float) W_dc = np.empty((N), dtype=float) W_mr3 = np.empty((N), dtype=float) W_g = np.empty((N), dtype=float) W_g_dc = np.empty((N), dtype=float) W_g_mr3 = np.empty((N), dtype=float) Glob2dist = Redistributor(world, glob, dist) Glob2dist.redistribute(H0, H, uplo='L') Glob2dist.redistribute(S0, S, uplo='L') Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten Glob2dist.redistribute(S0, Sinv, uplo='L') # we don't test the expert drivers anymore since there # might be a buffer overflow error ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L') scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L') ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L') ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L') scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L') ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L') scalapack_inverse_cholesky(dist, C, 'L') if dtype == complex: # Only supported for complex for now scalapack_inverse(dist, Sinv, 'L') # Undo redistribute C_test = glob.empty(dtype=dtype) Sinv_test = glob.empty(dtype=dtype) Dist2glob = Redistributor(world, dist, glob) Dist2glob.redistribute(C, C_test) Dist2glob.redistribute(Sinv, Sinv_test) if rank == 0: ## diag_ex_err = abs(W - W0).max() diag_dc_err = abs(W_dc - W0).max() ## diag_mr3_err = abs(W_mr3 - W0).max() ## general_diag_ex_err = abs(W_g - W0_g).max() general_diag_dc_err = abs(W_g_dc - W0_g).max() ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max() inverse_chol_err = abs(C_test - C0).max() tri2full(Sinv_test, 'L') inverse_err = abs(Sinv_test - S0_inv).max() ## print 'diagonalize ex err', diag_ex_err print('diagonalize dc err', diag_dc_err) ## print 'diagonalize mr3 err', diag_mr3_err ## print 'general diagonalize ex err', general_diag_ex_err print('general diagonalize dc err', general_diag_dc_err) ## print 'general diagonalize mr3 err', general_diag_mr3_err print('inverse chol err', inverse_chol_err) if dtype == complex: print('inverse err', inverse_err) else: ## diag_ex_err = 0.0 diag_dc_err = 0.0 ## diag_mr3_err = 0.0 ## general_diag_ex_err = 0.0 general_diag_dc_err = 0.0 ## general_diag_mr3_err = 0.0 inverse_chol_err = 0.0 inverse_err = 0.0 # We don't like exceptions on only one cpu ## diag_ex_err = world.sum(diag_ex_err) diag_dc_err = world.sum(diag_dc_err) ## diag_mr3_err = world.sum(diag_mr3_err) ## general_diag_ex_err = world.sum(general_diag_ex_err) general_diag_dc_err = world.sum(general_diag_dc_err) ## general_diag_mr3_err = world.sum(general_diag_mr3_err) inverse_chol_err = world.sum(inverse_chol_err) inverse_err = world.sum(inverse_err) ## assert diag_ex_err < tol assert diag_dc_err < tol ## assert diag_mr3_err < tol ## assert general_diag_ex_err < tol assert general_diag_dc_err < tol ## assert general_diag_mr3_err < tol assert inverse_chol_err < tol if dtype == complex: assert inverse_err < tol