def get_phi_aGp(self): setups = self.calc.wfs.setups spos_ac = self.calc.atoms.get_scaled_positions() kk_Gv = gemmdot(self.q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0) phi_aGp = {} for a, id in enumerate(setups.id_a): phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a]) for iG in range(self.npw): phi_aGp[a][iG] *= np.exp( -1j * 2. * pi * np.dot(self.q_c + self.Gvec_Gc[iG], spos_ac[a])) # For optical limit, G == 0 part should change if self.optical_limit: for a, id in enumerate(setups.id_a): nabla_iiv = setups[a].nabla_iiv phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, self.qq_v)).ravel() self.phi_aGp = phi_aGp self.printtxt('') self.printtxt('Finished phi_Gp !') return
def get_phi_aGp(self, q_c=None, parallel=True, alldir=False): if q_c is None: q_c = self.q_c qq_v = self.qq_v optical_limit = self.optical_limit else: optical_limit = False if np.abs(q_c).sum() < 1e-8: q_c = np.array([0.0001, 0, 0]) optical_limit = True qq_v = np.dot(q_c, self.bcell_cv) setups = self.calc.wfs.setups spos_ac = self.calc.atoms.get_scaled_positions() kk_Gv = gemmdot(q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0) phi_aGp = {} phiG0_avp = {} if parallel: from gpaw.response.parallel import parallel_partition npw, npw_local, Gstart, Gend = parallel_partition(self.npw, self.comm.rank, self.comm.size, reshape=False) else: Gstart = 0 Gend = self.npw for a, id in enumerate(setups.id_a): phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a], Gstart, Gend) for iG in range(Gstart, Gend): phi_aGp[a][iG] *= np.exp( -1j * 2. * pi * np.dot(q_c + self.Gvec_Gc[iG], spos_ac[a])) if parallel: self.comm.sum(phi_aGp[a]) # For optical limit, G == 0 part should change if optical_limit: for a, id in enumerate(setups.id_a): nabla_iiv = setups[a].nabla_iiv phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, qq_v)).ravel() phiG0_avp[a] = np.zeros((3, len(phi_aGp[a][0])), complex) for dir in range(3): # 3 dimension q2_c = np.diag((1, 1, 1))[dir] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c phiG0_avp[a][dir] = -1j * (np.dot(nabla_iiv, qq2_v)).ravel() if alldir: return phi_aGp, phiG0_avp else: return phi_aGp
def calculate_local_kernel(self): # Standard ALDA exchange kernel # Use with care. Results are very difficult to converge # Sensitive to density_cut ns = self.calc.wfs.nspins gd = self.gd pd = self.pd cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) fxc_sg = ns * self.get_fxc_g(ns * self.n_g) fxc_sg[np.where(self.n_g < self.density_cut)] = 0.0 r_vg = gd.get_grid_point_coordinates() for iq in range(len(self.ibzq_qc)): Gvec_Gc = np.dot(pd.get_reciprocal_vectors(q=iq, add_q=False), cell_cv / (2 * np.pi)) npw = len(Gvec_Gc) l_pw_size = -(-npw // mpi.world.size) l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) fhxc_sGsG = np.zeros((ns * npw, ns * npw), dtype=complex) for s in range(ns): for iG in l_pw_range: for jG in range(npw): fxc = fxc_sg[s].copy() dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_v = np.dot(dG_c, icell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) ft_fxc = gd.integrate(np.exp(-1j * dGr_g) * fxc) fhxc_sGsG[s * npw + iG, s * npw + jG] = ft_fxc mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol Gq2_G = self.pd.G2_qG[iq] if (self.ibzq_qc[iq] == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def get_phi_aGp(self, q_c=None, parallel=True, alldir=False): if q_c is None: q_c = self.q_c qq_v = self.qq_v optical_limit = self.optical_limit else: optical_limit = False if np.abs(q_c).sum() < 1e-8: q_c = np.array([0.0001, 0, 0]) optical_limit = True qq_v = np.dot(q_c, self.bcell_cv) setups = self.calc.wfs.setups spos_ac = self.calc.atoms.get_scaled_positions() kk_Gv = gemmdot(q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0) phi_aGp = {} phiG0_avp = {} if parallel: from gpaw.response.parallel import parallel_partition npw, npw_local, Gstart, Gend = parallel_partition( self.npw, self.comm.rank, self.comm.size, reshape=False) else: Gstart = 0 Gend = self.npw for a, id in enumerate(setups.id_a): phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a], Gstart, Gend) for iG in range(Gstart, Gend): phi_aGp[a][iG] *= np.exp(-1j * 2. * pi * np.dot(q_c + self.Gvec_Gc[iG], spos_ac[a]) ) if parallel: self.comm.sum(phi_aGp[a]) # For optical limit, G == 0 part should change if optical_limit: for a, id in enumerate(setups.id_a): nabla_iiv = setups[a].nabla_iiv phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, qq_v)).ravel() phiG0_avp[a] = np.zeros((3, len(phi_aGp[a][0])), complex) for dir in range(3): # 3 dimension q2_c = np.diag((1,1,1))[dir] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c phiG0_avp[a][dir] = -1j * (np.dot(nabla_iiv, qq2_v)).ravel() if alldir: return phi_aGp, phiG0_avp else: return phi_aGp
def full_hilbert_transform(specfunc_wGG, Nw, dw, eta): NwS = specfunc_wGG.shape[0] tmp_ww = np.zeros((Nw, NwS), dtype=complex) for iw in range(Nw): w = iw * dw for jw in range(NwS): ww = jw * dw tmp_ww[iw, jw] = 1. / (w - ww - 1j*eta) - 1. / (w + ww + 1j*eta) chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta = 0.) return chi0_wGG * dw
def full_hilbert_transform(specfunc_wGG, Nw, dw, eta): NwS = specfunc_wGG.shape[0] tmp_ww = np.zeros((Nw, NwS), dtype=complex) for iw in range(Nw): w = iw * dw for jw in range(NwS): ww = jw * dw tmp_ww[iw, jw] = 1. / (w - ww - 1j * eta) - 1. / (w + ww + 1j * eta) chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta=0.) return chi0_wGG * dw
def hilbert_transform(specfunc_wGG, w_w, Nw, dw, eta, fullresponse=False): NwS = specfunc_wGG.shape[0] tmp_ww = np.zeros((Nw, NwS), dtype=complex) ww_w = np.linspace(0., (NwS-1)*dw, NwS) for iw in range(Nw): if fullresponse is False: tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j*eta) - 1. / (w_w[iw] + ww_w + 1j*eta) else: tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j*eta) - 1. / (w_w[iw] + ww_w - 1j*eta) chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta = 0.) return chi0_wGG * dw
def hilbert_transform(specfunc_wGG, w_w, Nw, dw, eta, fullresponse=False): NwS = specfunc_wGG.shape[0] tmp_ww = np.zeros((Nw, NwS), dtype=complex) ww_w = np.linspace(0., (NwS - 1) * dw, NwS) for iw in range(Nw): if fullresponse is False: tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j * eta) - 1. / (w_w[iw] + ww_w + 1j * eta) else: tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j * eta) - 1. / (w_w[iw] + ww_w - 1j * eta) chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta=0.) return chi0_wGG * dw
def get_all_electron_IPR(paw): density = paw.density wfs = paw.wfs n_G = wfs.gd.empty() n_g = density.finegd.empty() print() print('inverse participation function') print('-' * 35) print('%5s %5s %10s %10s' % ('k', 'band', 'eps', 'ipr')) print('-' * 35) for k, kpt in enumerate(paw.wfs.kpt_u): for n, (eps, psit_G) in enumerate(zip(kpt.eps_n, kpt.psit_nG)): n_G[:] = 0.0 wfs.add_orbital_density(n_G, kpt, n) density.interpolator.apply(n_G, n_g) norm = density.finegd.integrate(n_g) n_g = n_g**2 ipr = density.finegd.integrate(n_g) for a in kpt.P_ani: # Get xccorr for atom a setup = paw.density.setups[a] xccorr = setup.xc_correction # Get D_sp for atom a D_sp = np.array(wfs.get_orbital_density_matrix(a, kpt, n)) # density a function of L and partial wave radial pair # density coefficient D_sLq = gemmdot(D_sp, xccorr.B_Lqp, trans='t') # Create pseudo/ae density iterators for integration n_iter = xccorr.expand_density(D_sLq, xccorr.n_qg, None) nt_iter = xccorr.expand_density(D_sLq, xccorr.nt_qg, None) # Take the spherical average of smooth and ae radial # xc potentials for n_sg, nt_sg, integrator in zip( n_iter, nt_iter, xccorr.get_integrator(None)): ipr += integrator.weight * np.sum( (n_sg[0]**2 - nt_sg[0]**2) * xccorr.rgd.dv_g) norm += integrator.weight * np.sum( (n_sg[0] - nt_sg[0]) * xccorr.rgd.dv_g) print('%5i %5i %10.5f %10.5f' % (k, n, eps, ipr / norm**2)) print('-' * 35)
def get_all_electron_IPR(paw): density = paw.density wfs = paw.wfs n_G = wfs.gd.empty() n_g = density.finegd.empty() print print "inverse participation function" print "-"*35 print "%5s %5s %10s %10s" % ("k","band","eps","ipr") print "-"*35 for k, kpt in enumerate(paw.wfs.kpt_u): for n, (eps, psit_G) in enumerate(zip(kpt.eps_n, kpt.psit_nG)): n_G[:] = 0.0 wfs.add_orbital_density(n_G, kpt, n) density.interpolator.apply(n_G, n_g) norm = density.finegd.integrate(n_g) n_g = n_g ** 2 ipr = density.finegd.integrate(n_g) for a in kpt.P_ani: # Get xccorr for atom a setup = paw.density.setups[a] xccorr = setup.xc_correction # Get D_sp for atom a D_sp = np.array(wfs.get_orbital_density_matrix(a, kpt, n)) # density a function of L and partial wave radial pair density coefficient D_sLq = gemmdot(D_sp, xccorr.B_Lqp, trans='t') # Create pseudo/ae density iterators for integration n_iter = xccorr.expand_density(D_sLq, xccorr.n_qg, None) nt_iter = xccorr.expand_density(D_sLq, xccorr.nt_qg, None) # Take the spherical average of smooth and ae radial xc potentials for n_sg, nt_sg, integrator in izip(n_iter, nt_iter, xccorr.get_integrator(None)): ipr += integrator.weight * np.sum((n_sg[0]**2-nt_sg[0]**2) * xccorr.rgd.dv_g) norm += integrator.weight * np.sum((n_sg[0]-nt_sg[0]) * xccorr.rgd.dv_g) print "%5i %5i %10.5f %10.5f" % (k, n, eps, ipr/norm**2) print "-"*35
def get_phi_aGp(self): setups = self.calc.wfs.setups spos_ac = self.calc.atoms.get_scaled_positions() kk_Gv = gemmdot(self.q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0) phi_aGp = {} for a, id in enumerate(setups.id_a): phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a]) for iG in range(self.npw): phi_aGp[a][iG] *= np.exp(-1j * 2. * pi * np.dot(self.q_c + self.Gvec_Gc[iG], spos_ac[a]) ) # For optical limit, G == 0 part should change if self.optical_limit: for a, id in enumerate(setups.id_a): nabla_iiv = setups[a].nabla_iiv phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, self.qq_v)).ravel() self.phi_aGp = phi_aGp self.printtxt('') self.printtxt('Finished phi_Gp !') return
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups, D_asp): """LDA kernel""" # The soft part assert np.abs(nt_sG[0].shape - nG).sum() == 0 xc = XC('LDA') fxc_sg = np.zeros_like(nt_sG) xc.calculate_fxc(gd, nt_sG, fxc_sg) fxc_g = fxc_sg[0] # FFT fxc(r) nG0 = nG[0] * nG[1] * nG[2] tmp_g = np.fft.fftn(fxc_g) * vol / nG0 r_vg = gd.get_grid_point_coordinates() Kxc_GG = np.zeros((npw, npw), dtype=complex) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] if (nG / 2 - np.abs(dG_c) > 0).all(): index = (dG_c + nG) % nG Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]] else: # not in the fft index dG_v = np.dot(dG_c, bcell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j * dGr_g) * fxc_g) KxcPAW_GG = np.zeros_like(Kxc_GG) # The PAW part dG_GGv = np.zeros((npw, npw, 3)) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv) for a, setup in enumerate(setups): if rank == a % size: rgd = setup.xc_correction.rgd n_qg = setup.xc_correction.n_qg nt_qg = setup.xc_correction.nt_qg nc_g = setup.xc_correction.nc_g nct_g = setup.xc_correction.nct_g Y_nL = setup.xc_correction.Y_nL dv_g = rgd.dv_g D_sp = D_asp[a] B_pqL = setup.xc_correction.B_pqL D_sLq = np.inner(D_sp, B_pqL.T) nspins = len(D_sp) assert nspins == 1 f_sg = rgd.empty(nspins) ft_sg = rgd.empty(nspins) n_sLg = np.dot(D_sLq, n_qg) nt_sLg = np.dot(D_sLq, nt_qg) # Add core density n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a])) for n, Y_L in enumerate(Y_nL): w = weight_n[n] f_sg[:] = 0.0 n_sg = np.dot(Y_L, n_sLg) xc.calculate_fxc(rgd, n_sg, f_sg) ft_sg[:] = 0.0 nt_sg = np.dot(Y_L, nt_sLg) xc.calculate_fxc(rgd, nt_sg, ft_sg) coef_GGg = np.exp( -1j * np.outer(np.inner(dG_GGv, R_nv[n]), rgd.r_g)).reshape( npw, npw, rgd.ng) KxcPAW_GG += w * np.dot( coef_GGg, (f_sg[0] - ft_sg[0]) * dv_g) * coefatoms_GG world.sum(KxcPAW_GG) Kxc_GG += KxcPAW_GG return Kxc_GG / vol
def parallel_transport(calc, direction=0, spinors=True, name=None, scale=1.0, bands=None, theta=0.0, phi=0.0): if isinstance(calc, str): calc = GPAW(calc, txt=None, communicator=serial_comm) if bands is None: nv = int(calc.get_number_of_electrons()) bands = range(nv) cell_cv = calc.wfs.gd.cell_cv icell_cv = (2 * np.pi) * np.linalg.inv(cell_cv).T r_g = calc.wfs.gd.get_grid_point_coordinates() Ng = np.prod(np.shape(r_g)[1:]) * (spinors + 1) dO_aii = [] for ia in calc.wfs.kpt_u[0].P_ani.keys(): dO_ii = calc.wfs.setups[ia].dO_ii if spinors: # Spinor projections require doubling of the (identical) orbitals dO_jj = np.zeros((2 * len(dO_ii), 2 * len(dO_ii)), complex) dO_jj[::2, ::2] = dO_ii dO_jj[1::2, 1::2] = dO_ii dO_aii.append(dO_jj) else: dO_aii.append(dO_ii) N_c = calc.wfs.kd.N_c assert 1 in np.delete(N_c, direction) Nkx = N_c[0] Nky = N_c[1] Nkz = N_c[2] Nk = Nkx * Nky * Nkz Nloc = N_c[direction] Npar = Nk // Nloc # Parallelization stuff myKsize = -(-Npar // (world.size)) myKrange = range(rank * myKsize, min((rank + 1) * myKsize, Npar)) myKsize = len(myKrange) # Get array of k-point indices of the path. q index is loc direction kpts_kq = [] for k in range(Npar): if direction == 0: kpts_kq.append(list(range(k, Nkx * Nky, Nky))) if direction == 1: if Nkz == 1: kpts_kq.append(list(range(k * Nky, (k + 1) * Nky))) else: kpts_kq.append(list(range(k, Nkz * Nky, Nkz))) if direction == 2: kpts_kq.append(list(range(k * Nloc, (k + 1) * Nloc))) G_c = np.array([0, 0, 0]) G_c[direction] = 1 G_v = np.dot(G_c, icell_cv) kpts_kc = calc.get_bz_k_points() kpts_kv = np.dot(kpts_kc, icell_cv) if Nloc > 1: b_c = kpts_kc[kpts_kq[0][1]] - kpts_kc[kpts_kq[0][0]] b_v = np.dot(b_c, icell_cv) else: b_v = G_v e_mk, v_knm = get_spinorbit_eigenvalues(calc, return_wfs=True, scale=scale, theta=theta, phi=phi) phi_km = np.zeros((Npar, len(bands)), float) S_km = np.zeros((Npar, len(bands)), float) # Loop over the direction parallel components for k in myKrange: U_qmm = [np.eye(len(bands))] print(k) qpts_q = kpts_kq[k] # Loop over kpoints in the phase direction for q in range(Nloc - 1): iq1 = qpts_q[q] iq2 = qpts_q[q + 1] # print(kpts_kc[iq1], kpts_kc[iq2]) if q == 0: u1_nsG = get_spinorbit_wavefunctions(calc, iq1, v_knm[iq1])[bands] # Transform from psi-like to u-like u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq1], r_g, beta=0.0)) P1_ani = get_spinorbit_projections(calc, iq1, v_knm[iq1]) u2_nsG = get_spinorbit_wavefunctions(calc, iq2, v_knm[iq2])[bands] u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq2], r_g, beta=0.0)) P2_ani = get_spinorbit_projections(calc, iq2, v_knm[iq2]) M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)), np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani, P2_ani, dO_aii, b_v) V_mm, sing_m, W_mm = np.linalg.svd(M_mm) U_mm = np.dot(V_mm, W_mm).conj() u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3)) u_nxsyz = np.swapaxes(u_nysxz, 1, 3) u_nsxyz = np.swapaxes(u_nxsyz, 1, 2) u2_nsG = u_nsxyz for a in range(len(calc.atoms)): P2_ni = P2_ani[a][bands] P2_ni = np.dot(U_mm, P2_ni) P2_ani[a][bands] = P2_ni U_qmm.append(U_mm) u1_nsG = u2_nsG P1_ani = P2_ani U_qmm = np.array(U_qmm) # Fix phases for last point iq0 = qpts_q[0] if Nloc == 1: u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands] u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0)) P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0]) u2_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands] u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0)) u2_nsG[:] *= np.exp(-1.0j * gemmdot(G_v, r_g, beta=0.0)) P2_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0]) for a in range(len(calc.atoms)): P2_ni = P2_ani[a][bands] # P2_ni *= np.exp(-1.0j * np.dot(G_v, r_av[a])) P2_ani[a][bands] = P2_ni M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)), np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani, P2_ani, dO_aii, b_v) V_mm, sing_m, W_mm = np.linalg.svd(M_mm) U_mm = np.dot(V_mm, W_mm).conj() u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3)) u_nxsyz = np.swapaxes(u_nysxz, 1, 3) u_nsxyz = np.swapaxes(u_nxsyz, 1, 2) u2_nsG = u_nsxyz for a in range(len(calc.atoms)): P2_ni = P2_ani[a][bands] P2_ni = np.dot(U_mm, P2_ni) P2_ani[a][bands] = P2_ni # Get overlap between first kpts and its smoothly translated image u2_nsG[:] *= np.exp(1.0j * gemmdot(G_v, r_g, beta=0.0)) for a in range(len(calc.atoms)): P2_ni = P2_ani[a][bands] # P2_ni *= np.exp(1.0j * np.dot(G_v, r_av[a])) P2_ani[a][bands] = P2_ni u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands] u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0)) P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0]) M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)), np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani, P2_ani, dO_aii, np.array([0.0, 0.0, 0.0])) l_m, l_mm = np.linalg.eig(M_mm) phi_km[k] = np.angle(l_m) print(phi_km[k] / 2 / np.pi) A_mm = np.zeros_like(l_mm, complex) for q in range(Nloc): iq = qpts_q[q] U_mm = U_qmm[q] v_nm = U_mm.dot(v_knm[iq][:, bands].T).T A_mm += np.dot(v_nm[::2].T.conj(), v_nm[::2]) A_mm -= np.dot(v_nm[1::2].T.conj(), v_nm[1::2]) A_mm /= Nloc S_km[k] = np.diag(l_mm.T.conj().dot(A_mm).dot(l_mm)).real world.sum(phi_km) world.sum(S_km) np.savez('phases_%s.npz' % name, phi_km=phi_km, S_km=S_km)
def initialize(self): self.eta /= Hartree self.ecut /= Hartree calc = self.calc self.nspins = self.calc.wfs.nspins # kpoint init self.kd = kd = calc.wfs.kd self.nikpt = kd.nibzkpts self.ftol /= kd.nbzkpts # cell init self.acell_cv = calc.wfs.gd.cell_cv self.acell_cv, self.bcell_cv, self.vol, self.BZvol = \ get_primitive_cell(self.acell_cv,rpad=self.rpad) # grid init gd = calc.wfs.gd.new_descriptor(comm=serial_comm) self.pbc = gd.pbc_c self.gd = gd self.nG0 = np.prod(gd.N_c) # Number of grid points and volume including zero padding self.nGrpad = gd.N_c * self.rpad self.nG0rpad = np.prod(self.nGrpad) self.d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] # obtain eigenvalues, occupations nibzkpt = kd.nibzkpts kweight_k = kd.weight_k self.eFermi = self.calc.occupations.get_fermi_level() try: self.e_skn self.printtxt('Use eigenvalues from user.') except: self.printtxt('Use eigenvalues from the calculator.') self.e_skn = {} self.f_skn = {} for ispin in range(self.nspins): self.e_skn[ispin] = np.array([calc.get_eigenvalues(kpt=k, spin=ispin) for k in range(nibzkpt)]) / Hartree self.f_skn[ispin] = np.array([calc.get_occupation_numbers(kpt=k, spin=ispin) / kweight_k[k] for k in range(nibzkpt)]) / kd.nbzkpts #self.printtxt('Eigenvalues(k=0) are:') #print >> self.txt, self.e_skn[0][0] * Hartree self.enoshift_skn = {} for ispin in range(self.nspins): self.enoshift_skn[ispin] = self.e_skn[ispin].copy() if self.eshift is not None: self.add_discontinuity(self.eshift) self.printtxt('Shift unoccupied bands by %f eV' % (self.eshift)) # k + q init if self.q_c is not None: self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c if self.optical_limit: kq_k = np.arange(kd.nbzkpts) self.expqr_g = 1. else: r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(self.qq_v, r_vg, beta=0.0) self.expqr_g = np.exp(-1j * qr_g) del r_vg, qr_g kq_k = kd.find_k_plus_q(self.q_c) self.kq_k = kq_k # Plane wave init if self.G_plus_q: self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv, self.bcell_cv, self.gd.N_c, self.ecut, q=self.q_c) else: self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv, self.bcell_cv, self.gd.N_c, self.ecut) # band init if self.nbands is None: self.nbands = calc.wfs.bd.nbands self.nvalence = calc.wfs.nvalence # Projectors init setups = calc.wfs.setups self.spos_ac = calc.atoms.get_scaled_positions() if self.pwmode: self.pt = PWLFC([setup.pt_j for setup in setups], self.calc.wfs.pd) self.pt.set_positions(self.spos_ac) else: self.pt = LFC(gd, [setup.pt_j for setup in setups], KPointDescriptor(self.kd.bzk_kc), dtype=complex, forces=True) self.pt.set_positions(self.spos_ac) # Printing calculation information self.print_stuff() return
t = time.time() for n in range(numreps): BY1_pq = np.dot(B_pqL, Y_L) t = time.time() - t performance = numflop * numreps / t print 'dot : %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.) assert np.abs(BY0_pq - BY1_pq).max() < 5e-12 del BY1_pq if test_gemmdot: BY2_pq = np.empty((P, Q), dtype) t = time.time() for n in range(numreps): BY2_pq.fill(0.0) gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq) t = time.time() - t performance = numflop * numreps / t print 'gemmdot: %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.) assert np.abs(BY0_pq - BY2_pq).max() < 5e-12 del BY2_pq BY3_pq = np.empty((P, Q), dtype) t = time.time() for n in range(numreps): BY3_pq.fill(0.0) gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't') t = time.time() - t performance = numflop * numreps / t print 'gemvT : %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.) assert np.abs(BY0_pq - BY3_pq).max() < 5e-12
def get_self_energy(self, df, W_wGG): Sigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float) dSigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float) wcomm = df.wcomm if self.static: W_wGG = np.array([W_wGG]) if not self.hilbert_trans: #method 1 Wbackup_wG0 = W_wGG[:, :, 0].copy() Wbackup_w0G = W_wGG[:, 0, :].copy() else: #method 2, perform Hilbert transform nG = np.shape(W_wGG)[1] coords = np.zeros(wcomm.size, dtype=int) nG_local = nG**2 // wcomm.size if wcomm.rank == wcomm.size - 1: nG_local = nG**2 - (wcomm.size - 1) * nG_local wcomm.all_gather(np.array([nG_local]), coords) W_Wg = SliceAlongFrequency(W_wGG, coords, wcomm) ng = np.shape(W_Wg)[1] Nw = int(self.w_w[-1] / self.dw) w1_ww = np.zeros((Nw, df.Nw), dtype=complex) for iw in range(Nw): w1 = iw * self.dw w1_ww[iw] = 1. / (w1 + self.w_w + 1j * self.eta_w) + 1. / ( w1 - self.w_w + 1j * self.eta_w) w1_ww[iw, 0] -= 1. / (w1 + 1j * self.eta_w[0]) # correct w'=0 w1_ww[iw] *= self.dw_w Cplus_Wg = np.zeros((Nw, ng), dtype=complex) Cminus_Wg = np.zeros((Nw, ng), dtype=complex) Cplus_Wg = gemmdot(w1_ww, W_Wg, beta=0.0) Cminus_Wg = gemmdot(w1_ww.conj(), W_Wg, beta=0.0) for s in range(self.nspins): for i, k in enumerate(self.gwkpt_k): # k is bzk index if df.optical_limit: kq_c = df.kd.bzk_kc[k] else: kq_c = df.kd.bzk_kc[k] - df.q_c # k - q kq = df.kd.where_is_q(kq_c, df.kd.bzk_kc) assert df.kq_k[kq] == k ibzkpt1 = df.kd.bz2ibz_k[k] ibzkpt2 = df.kd.bz2ibz_k[kq] for j, n in enumerate(self.bands): for m in range(self.m_start, self.m_end): if self.e_skn[s][ibzkpt2, m] > self.eFermi: sign = 1. else: sign = -1. rho_G = df.density_matrix(m, n, kq, spin1=s, spin2=s) if not self.hilbert_trans: #method 1 W_wGG[:, :, 0] = Wbackup_wG0 W_wGG[:, 0, :] = Wbackup_w0G # w1 = w - epsilon_m,k-q w1 = self.e_skn[s][ibzkpt1, n] - self.e_skn[s][ibzkpt2, m] if self.ppa: # analytical expression for Plasmon Pole Approximation W_GG = sign * W_wGG[0] * ( 1. / (w1 + self.wt_GG - 1j * self.eta) - 1. / (w1 - self.wt_GG + 1j * self.eta)) W_GG -= W_wGG[0] * ( 1. / (w1 + self.wt_GG + 1j * self.eta * sign) + 1. / (w1 - self.wt_GG + 1j * self.eta * sign)) W_G = gemmdot(W_GG, rho_G, beta=0.0) Sigma_skn[s, i, j] += np.real( gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) W_GG = sign * W_wGG[0] * ( 1. / (w1 - self.wt_GG + 1j * self.eta)**2 - 1. / (w1 + self.wt_GG - 1j * self.eta)**2) W_GG += W_wGG[0] * ( 1. / (w1 - self.wt_GG + 1j * self.eta * sign)**2 + 1. / (w1 + self.wt_GG + 1j * self.eta * sign)**2 ) W_G = gemmdot(W_GG, rho_G, beta=0.0) dSigma_skn[s, i, j] += np.real( gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) elif self.static: W1_GG = W_wGG[0] - np.eye(df.npw) * self.Kc_GG W2_GG = W_wGG[0] # perform W_GG * np.outer(rho_G.conj(), rho_G).sum(GG) W_G = gemmdot(W1_GG, rho_G, beta=0.0) # Coulomb Hole Sigma_skn[s, i, j] += np.real( gemmdot(W_G, rho_G, alpha=self.alpha * pi / 1j, beta=0.0, trans='c')) if sign == -1: W_G = gemmdot( W2_GG, rho_G, beta=0.0) # Screened Exchange Sigma_skn[s, i, j] -= np.real( gemmdot(W_G, rho_G, alpha=2 * self.alpha * pi / 1j, beta=0.0, trans='c')) del W1_GG, W2_GG, W_G, rho_G else: # perform W_wGG * np.outer(rho_G.conj(), rho_G).sum(GG) W_wG = gemmdot(W_wGG, rho_G, beta=0.0) C_wlocal = gemmdot(W_wG, rho_G, alpha=self.alpha, beta=0.0, trans='c') del W_wG, rho_G C_w = np.zeros(df.Nw, dtype=complex) wcomm.all_gather(C_wlocal, C_w) del C_wlocal # calculate self energy w1_w = 1. / (w1 - self.w_w + 1j * self.eta_w * sign) + 1. / (w1 + self.w_w + 1j * self.eta_w * sign) w1_w[0] -= 1. / (w1 + 1j * self.eta_w[0] * sign ) # correct w'=0 w1_w *= self.dw_w Sigma_skn[s, i, j] += np.real( gemmdot(C_w, w1_w, beta=0.0)) # calculate derivate of self energy with respect to w w1_w = 1. / (w1 - self.w_w + 1j * self.eta_w * sign)**2 + 1. / ( w1 + self.w_w + 1j * self.eta_w * sign)**2 w1_w[0] -= 1. / (w1 + 1j * self.eta_w[0] * sign)**2 # correct w'=0 w1_w *= self.dw_w dSigma_skn[s, i, j] -= np.real( gemmdot(C_w, w1_w, beta=0.0)) else: #method 2 if not np.abs(self.e_skn[s][ibzkpt2, m] - self.e_skn[s][ibzkpt1, n]) < 1e-10: sign *= np.sign(self.e_skn[s][ibzkpt1, n] - self.e_skn[s][ibzkpt2, m]) # find points on frequency grid w0 = self.e_skn[s][ibzkpt1, n] - self.e_skn[s][ibzkpt2, m] w0_id = np.abs(int(w0 / self.dw)) w1 = w0_id * self.dw w2 = (w0_id + 1) * self.dw # choose plus or minus, treat optical limit: if sign == 1: C_Wg = Cplus_Wg[ w0_id:w0_id + 2] # only two grid points needed for each w0 if sign == -1: C_Wg = Cminus_Wg[ w0_id:w0_id + 2] # only two grid points needed for each w0 C_wGG = GatherOrbitals(C_Wg, coords, wcomm).copy() del C_Wg # special treat of w0 = 0 (degenerate states): if w0_id == 0: Cplustmp_GG = GatherOrbitals( Cplus_Wg[1], coords, wcomm).copy() Cminustmp_GG = GatherOrbitals( Cminus_Wg[1], coords, wcomm).copy() # perform C_wGG * np.outer(rho_G.conj(), rho_G).sum(GG) if w0_id == 0: Sw0_G = gemmdot(C_wGG[0], rho_G, beta=0.0) Sw0 = np.real( gemmdot(Sw0_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw1_G = gemmdot(Cplustmp_GG, rho_G, beta=0.0) Sw1 = np.real( gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw2_G = gemmdot(Cminustmp_GG, rho_G, beta=0.0) Sw2 = np.real( gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sigma_skn[s, i, j] += Sw0 dSigma_skn[s, i, j] += (Sw1 + Sw2) / (2 * self.dw) else: Sw1_G = gemmdot(C_wGG[0], rho_G, beta=0.0) Sw1 = np.real( gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw2_G = gemmdot(C_wGG[1], rho_G, beta=0.0) Sw2 = np.real( gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw0 = (w2 - np.abs(w0)) / self.dw * Sw1 + ( np.abs(w0) - w1) / self.dw * Sw2 Sigma_skn[s, i, j] += np.sign( self.e_skn[s][ibzkpt1, n] - self.e_skn[s][ibzkpt2, m]) * Sw0 dSigma_skn[s, i, j] += (Sw2 - Sw1) / self.dw self.ncomm.barrier() self.ncomm.sum(Sigma_skn) self.ncomm.sum(dSigma_skn) return Sigma_skn, dSigma_skn
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups, D_asp, functional='ALDA', density_cut=None): """ALDA kernel""" # The soft part #assert np.abs(nt_sG[0].shape - nG).sum() == 0 if functional == 'ALDA_X': x_only = True A_x = -3. / 4. * (3. / np.pi)**(1. / 3.) nspins = len(nt_sG) assert nspins in [1, 2] fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.) else: assert len(nt_sG) == 1 x_only = False fxc_sg = np.zeros_like(nt_sG) xc = XC(functional[1:]) xc.calculate_fxc(gd, nt_sG, fxc_sg) if density_cut is not None: fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0 # FFT fxc(r) nG0 = nG[0] * nG[1] * nG[2] tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))] r_vg = gd.get_grid_point_coordinates() Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex) for s in range(len(fxc_sg)): for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] if (nG / 2 - np.abs(dG_c) > 0).all(): index = (dG_c + nG) % nG Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0], index[1], index[2]] else: # not in the fft index dG_v = np.dot(dG_c, bcell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) Kxc_sGG[s, iG, jG] = gd.integrate(np.exp(-1j * dGr_g) * fxc_sg[s]) # The PAW part KxcPAW_sGG = np.zeros_like(Kxc_sGG) dG_GGv = np.zeros((npw, npw, 3)) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv) for a, setup in enumerate(setups): if rank == a % size: rgd = setup.xc_correction.rgd n_qg = setup.xc_correction.n_qg nt_qg = setup.xc_correction.nt_qg nc_g = setup.xc_correction.nc_g nct_g = setup.xc_correction.nct_g Y_nL = setup.xc_correction.Y_nL dv_g = rgd.dv_g D_sp = D_asp[a] B_pqL = setup.xc_correction.B_pqL D_sLq = np.inner(D_sp, B_pqL.T) nspins = len(D_sp) f_sg = rgd.empty(nspins) ft_sg = rgd.empty(nspins) n_sLg = np.dot(D_sLq, n_qg) nt_sLg = np.dot(D_sLq, nt_qg) # Add core density n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a])) for n, Y_L in enumerate(Y_nL): w = weight_n[n] f_sg[:] = 0.0 n_sg = np.dot(Y_L, n_sLg) if x_only: f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.) else: xc.calculate_fxc(rgd, n_sg, f_sg) ft_sg[:] = 0.0 nt_sg = np.dot(Y_L, nt_sLg) if x_only: ft_sg = nspins * (4 / 9.) * (A_x * (nspins * nt_sg)**(-2 / 3.)) else: xc.calculate_fxc(rgd, nt_sg, ft_sg) for i in range(len(rgd.r_g)): coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n]) * rgd.r_g[i]) for s in range(len(f_sg)): KxcPAW_sGG[s] += w * np.dot(coef_GG, (f_sg[s, i] - ft_sg[s, i]) * dv_g[i]) * coefatoms_GG world.sum(KxcPAW_sGG) Kxc_sGG += KxcPAW_sGG return Kxc_sGG / vol
def initialize(self): self.eta /= Hartree self.ecut /= Hartree calc = self.calc self.nspins = self.calc.wfs.nspins # kpoint init self.kd = kd = calc.wfs.kd self.nikpt = kd.nibzkpts self.ftol /= kd.nbzkpts # cell init self.acell_cv = calc.wfs.gd.cell_cv self.acell_cv, self.bcell_cv, self.vol, self.BZvol = \ get_primitive_cell(self.acell_cv,rpad=self.rpad) # grid init gd = calc.wfs.gd.new_descriptor(comm=serial_comm) self.pbc = gd.pbc_c self.gd = gd self.nG0 = np.prod(gd.N_c) # Number of grid points and volume including zero padding self.nGrpad = gd.N_c * self.rpad self.nG0rpad = np.prod(self.nGrpad) self.d_c = [ Gradient(gd, i, n=4, dtype=complex).apply for i in range(3) ] # obtain eigenvalues, occupations nibzkpt = kd.nibzkpts kweight_k = kd.weight_k self.eFermi = self.calc.occupations.get_fermi_level() try: self.e_skn self.printtxt('Use eigenvalues from user.') except: self.printtxt('Use eigenvalues from the calculator.') self.e_skn = {} self.f_skn = {} for ispin in range(self.nspins): self.e_skn[ispin] = np.array([ calc.get_eigenvalues(kpt=k, spin=ispin) for k in range(nibzkpt) ]) / Hartree self.f_skn[ispin] = np.array([ calc.get_occupation_numbers(kpt=k, spin=ispin) / kweight_k[k] for k in range(nibzkpt) ]) / kd.nbzkpts #self.printtxt('Eigenvalues(k=0) are:') #print >> self.txt, self.e_skn[0][0] * Hartree self.enoshift_skn = {} for ispin in range(self.nspins): self.enoshift_skn[ispin] = self.e_skn[ispin].copy() if self.eshift is not None: self.add_discontinuity(self.eshift) self.printtxt('Shift unoccupied bands by %f eV' % (self.eshift)) # k + q init if self.q_c is not None: self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c if self.optical_limit: kq_k = np.arange(kd.nbzkpts) self.expqr_g = 1. else: r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(self.qq_v, r_vg, beta=0.0) self.expqr_g = np.exp(-1j * qr_g) del r_vg, qr_g kq_k = kd.find_k_plus_q(self.q_c) self.kq_k = kq_k # Plane wave init if self.G_plus_q: self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv, self.bcell_cv, self.gd.N_c, self.ecut, q=self.q_c) else: self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors( self.acell_cv, self.bcell_cv, self.gd.N_c, self.ecut) # band init if self.nbands is None: self.nbands = calc.wfs.bd.nbands self.nvalence = calc.wfs.nvalence # Projectors init setups = calc.wfs.setups self.spos_ac = calc.atoms.get_scaled_positions() if self.pwmode: self.pt = PWLFC([setup.pt_j for setup in setups], self.calc.wfs.pd) self.pt.set_positions(self.spos_ac) else: self.pt = LFC(gd, [setup.pt_j for setup in setups], KPointDescriptor(self.kd.bzk_kc), dtype=complex, forces=True) self.pt.set_positions(self.spos_ac) # Printing calculation information self.print_stuff() return
import numpy as np from gpaw.utilities.blas import \ gemm, axpy, r2k, rk, gemmdot, dotc from gpaw.utilities.tools import tri2full a = np.arange(5 * 7).reshape(5, 7) + 4. a2 = np.arange(3 * 7).reshape(3, 7) + 3. b = np.arange(7) - 2. # Check gemmdot with floats assert np.all(np.dot(a, b) == gemmdot(a, b)) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t')) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='c')) assert np.dot(b, b) == gemmdot(b, b) # Check gemmdot with complex arrays a = a * (2 + 1.j) a2 = a2 * (-1 + 3.j) b = b * (3 - 2.j) assert np.all(np.dot(a, b) == gemmdot(a, b)) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t')) assert np.all(np.dot(a, a2.T.conj()) == gemmdot(a, a2, trans='c')) assert np.dot(b, b) == gemmdot(b, b, trans='n') assert np.dot(b, b.conj()) == gemmdot(b, b, trans='c') assert np.vdot(a, 5.j * a) == dotc(a, 5.j * a) # Check gemm for transa='n' a2 = np.arange(7 * 5 * 1 * 3).reshape(7, 5, 1, 3) * (-1. + 4.j) + 3. c = np.tensordot(a, a2, [1, 0]) gemm(1., a2, a, -1., c, 'n') assert not c.any()
def initialize(self): self.eta /= Hartree self.ecut /= Hartree calc = self.calc # kpoint init self.kd = kd = calc.wfs.kd self.bzk_kc = kd.bzk_kc self.ibzk_kc = kd.ibzk_kc self.nkpt = kd.nbzkpts self.ftol /= self.nkpt # band init if self.nbands is None: self.nbands = calc.wfs.nbands self.nvalence = calc.wfs.nvalence # cell init self.acell_cv = calc.atoms.cell / Bohr self.bcell_cv, self.vol, self.BZvol = get_primitive_cell(self.acell_cv) # grid init self.nG = calc.get_number_of_grid_points() self.nG0 = self.nG[0] * self.nG[1] * self.nG[2] gd = GridDescriptor(self.nG, calc.wfs.gd.cell_cv, pbc_c=True, comm=serial_comm) self.gd = gd self.h_cv = gd.h_cv # obtain eigenvalues, occupations nibzkpt = kd.nibzkpts kweight_k = kd.weight_k try: self.e_kn except: self.printtxt('Use eigenvalues from the calculator.') self.e_kn = np.array([calc.get_eigenvalues(kpt=k) for k in range(nibzkpt)]) / Hartree self.printtxt('Eigenvalues(k=0) are:') print >> self.txt, self.e_kn[0] * Hartree self.f_kn = np.array([calc.get_occupation_numbers(kpt=k) / kweight_k[k] for k in range(nibzkpt)]) / self.nkpt # k + q init assert self.q_c is not None self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c if self.optical_limit: kq_k = np.arange(self.nkpt) self.expqr_g = 1. else: r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(self.qq_v, r_vg, beta=0.0) self.expqr_g = np.exp(-1j * qr_g) del r_vg, qr_g kq_k = kd.find_k_plus_q(self.q_c) self.kq_k = kq_k # Plane wave init self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv, self.bcell_cv, self.nG, self.ecut) # Projectors init setups = calc.wfs.setups pt = LFC(gd, [setup.pt_j for setup in setups], dtype=calc.wfs.dtype, forces=True) spos_ac = calc.atoms.get_scaled_positions() pt.set_k_points(self.bzk_kc) pt.set_positions(spos_ac) self.pt = pt # Printing calculation information self.print_stuff() return
def calculate_forces(self, hamiltonian, F_av): self.timer.start('LCAO forces') spos_ac = self.tci.atoms.get_scaled_positions() % 1.0 ksl = self.ksl nao = ksl.nao mynao = ksl.mynao nq = len(self.kd.ibzk_qc) dtype = self.dtype tci = self.tci gd = self.gd bfs = self.basis_functions Mstart = ksl.Mstart Mstop = ksl.Mstop from gpaw.kohnsham_layouts import BlacsOrbitalLayouts isblacs = isinstance(ksl, BlacsOrbitalLayouts) # XXX if not isblacs: self.timer.start('TCI derivative') dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dPdR_aqvMi = {} for a in self.basis_functions.my_atom_indices: ni = self.setups[a].ni dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype) tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM, dPdR_aqvMi) gd.comm.sum(dThetadR_qvMM) gd.comm.sum(dTdR_qvMM) self.timer.stop('TCI derivative') my_atom_indices = bfs.my_atom_indices atom_indices = bfs.atom_indices def _slices(indices): for a in indices: M1 = bfs.M_a[a] - Mstart M2 = M1 + self.setups[a].nao if M2 > 0: yield a, max(0, M1), M2 def slices(): return _slices(atom_indices) def my_slices(): return _slices(my_atom_indices) # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('Initial') rhoT_uMM = [] ET_uMM = [] if not isblacs: if self.kpt_u[0].rho_MM is None: self.timer.start('Get density matrix') for kpt in self.kpt_u: rhoT_MM = ksl.get_transposed_density_matrix(kpt.f_n, kpt.C_nM) rhoT_uMM.append(rhoT_MM) ET_MM = ksl.get_transposed_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM) ET_uMM.append(ET_MM) if hasattr(kpt, 'c_on'): # XXX does this work with BLACS/non-BLACS/etc.? assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += ksl.get_transposed_density_matrix_delta(d_nn, kpt.C_nM) ET_MM += ksl.get_transposed_density_matrix_delta(d_nn * kpt.eps_n, kpt.C_nM) self.timer.stop('Get density matrix') else: rhoT_uMM = [] ET_uMM = [] for kpt in self.kpt_u: H_MM = self.eigensolver.calculate_hamiltonian_matrix(hamiltonian, self, kpt) tri2full(H_MM) S_MM = kpt.S_MM.copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() rhoT_uMM.append(rhoT_MM) ET_uMM.append(ET_MM) self.timer.stop('Initial') if isblacs: # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX from gpaw.blacs import BlacsGrid, Redistributor def get_density_matrix(f_n, C_nM, redistributor): rho1_mm = ksl.calculate_blocked_density_matrix(f_n, C_nM).conj() rho_mm = redistributor.redistribute(rho1_mm) return rho_mm pcutoff_a = [max([pt.get_cutoff() for pt in setup.pt_j]) for setup in self.setups] phicutoff_a = [max([phit.get_cutoff() for phit in setup.phit_j]) for setup in self.setups] # XXX should probably use bdsize x gdsize instead # That would be consistent with some existing grids grid = BlacsGrid(ksl.block_comm, self.gd.comm.size, self.bd.comm.size) blocksize1 = -(-nao // grid.nprow) blocksize2 = -(-nao // grid.npcol) # XXX what are rows and columns actually? desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2) rhoT_umm = [] ET_umm = [] redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc) Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): self.timer.start('Get density matrix') rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor) rhoT_umm.append(rhoT_mm) self.timer.stop('Get density matrix') self.timer.start('Potential') rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc) vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution(vt_G, rhoT_mM, kpt.q) del rhoT_mM self.timer.stop('Potential') self.timer.start('Get density matrix') for kpt in self.kpt_u: ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM, redistributor) ET_umm.append(ET_mm) self.timer.stop('Get density matrix') M1start = blocksize1 * grid.myrow M2start = blocksize2 * grid.mycol M1stop = min(M1start + blocksize1, nao) M2stop = min(M2start + blocksize2, nao) m1max = M1stop - M1start m2max = M2stop - M2start if not isblacs: # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] * rhoT_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Fkin_av[a, :] += 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) del dEdTrhoT_vMM # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Ftheta_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dThetadRE_vMM = (dThetadR_qvMM[kpt.q] * ET_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Ftheta_av[a, :] += -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) del dThetadRE_vMM if isblacs: from gpaw.lcao.overlap import TwoCenterIntegralCalculator self.timer.start('Prepare TCI loop') M_a = bfs.M_a Fkin2_av = np.zeros_like(F_av) Ftheta2_av = np.zeros_like(F_av) cell_cv = tci.atoms.cell spos_ac = tci.atoms.get_scaled_positions() % 1.0 overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc, derivative=False) def get_phases(offset): return overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) # XXX this is not parallel *AT ALL*. self.timer.start('Get neighbors') nl = tci.atompairs.pairs.neighbors r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv) atompairs = r_and_offset_aao.keys() atompairs.sort() self.timer.stop('Get neighbors') T_expansions = tci.T_expansions Theta_expansions = tci.Theta_expansions P_expansions = tci.P_expansions nq = len(self.ibzk_qc) dH_asp = hamiltonian.dH_asp self.timer.start('broadcast dH') alldH_asp = {} for a in range(len(self.setups)): gdrank = bfs.sphere_a[a].rank if gdrank == gd.rank: dH_sp = dH_asp[a] else: ni = self.setups[a].ni dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) gd.comm.broadcast(dH_sp, gdrank) # okay, now everyone gets copies of dH_sp alldH_asp[a] = dH_sp self.timer.stop('broadcast dH') # This will get sort of hairy. We need to account for some # three-center overlaps, such as: # # a1 # Phi ~a3 a3 ~a3 a2 a2,a1 # < ---- |p > dH <p |Phi > rho # dR # # To this end we will loop over all pairs of atoms (a1, a3), # and then a sub-loop over (a3, a2). from gpaw.lcao.overlap import DerivativeAtomicDisplacement class Displacement(DerivativeAtomicDisplacement): def __init__(self, a1, a2, R_c, offset): phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) DerivativeAtomicDisplacement.__init__(self, None, a1, a2, R_c, offset, phases) # Cache of Displacement objects with spherical harmonics with # evaluated spherical harmonics. disp_aao = {} def get_displacements(a1, a2, maxdistance): # XXX the way maxdistance is handled it can lead to # bad caching when different maxdistances are passed # to subsequent calls with same pair of atoms disp_o = disp_aao.get((a1, a2)) if disp_o is None: disp_o = [] for r, offset in r_and_offset_aao[(a1, a2)]: if np.linalg.norm(r) > maxdistance: continue disp = Displacement(a1, a2, r, offset) disp_o.append(disp) disp_aao[(a1, a2)] = disp_o return [disp for disp in disp_o if disp.r < maxdistance] self.timer.stop('Prepare TCI loop') self.timer.start('Not so complicated loop') for (a1, a2) in atompairs: if a1 >= a2: # Actually this leads to bad load balance. # We should take a1 > a2 or a1 < a2 equally many times. # Maybe decide which of these choices # depending on whether a2 % 1 == 0 continue m1start = M_a[a1] - M1start m2start = M_a[a2] - M2start if m1start >= blocksize1 or m2start >= blocksize2: continue T_expansion = T_expansions.get(a1, a2) Theta_expansion = Theta_expansions.get(a1, a2) P_expansion = P_expansions.get(a1, a2) nm1, nm2 = T_expansion.shape m1stop = min(m1start + nm1, m1max) m2stop = min(m2start + nm2, m2max) if m1stop <= 0 or m2stop <= 0: continue m1start = max(m1start, 0) m2start = max(m2start, 0) J1start = max(0, M1start - M_a[a1]) J2start = max(0, M2start - M_a[a2]) M1stop = J1start + m1stop - m1start J2stop = J2start + m2stop - m2start dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype) dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype) disp_o = get_displacements(a1, a2, phicutoff_a[a1] + phicutoff_a[a2]) for disp in disp_o: disp.evaluate_overlap(T_expansion, dTdR_qvmm) disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm) for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] Fkin_v = 2.0 * (dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * ET_mm[np.newaxis]).real.sum(-1).sum(-1) Fkin2_av[a1] += Fkin_v Fkin2_av[a2] -= Fkin_v Ftheta2_av[a1] -= Ftheta_v Ftheta2_av[a2] += Ftheta_v Fkin_av = Fkin2_av Ftheta_av = Ftheta2_av self.timer.stop('Not so complicated loop') dHP_and_dSP_aauim = {} a2values = {} for (a2, a3) in atompairs: if not a3 in a2values: a2values[a3] = [] a2values[a3].append(a2) Fatom_av = np.zeros_like(F_av) Frho_av = np.zeros_like(F_av) self.timer.start('Complicated loop') for a1, a3 in atompairs: if a1 == a3: continue m1start = M_a[a1] - M1start if m1start >= blocksize1: continue P_expansion = P_expansions.get(a1, a3) nm1 = P_expansion.shape[0] m1stop = min(m1start + nm1, m1max) if m1stop <= 0: continue m1start = max(m1start, 0) J1start = max(0, M1start - M_a[a1]) J1stop = J1start + m1stop - m1start disp_o = get_displacements(a1, a3, phicutoff_a[a1] + pcutoff_a[a3]) if len(disp_o) == 0: continue dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype) for disp in disp_o: disp.evaluate_overlap(P_expansion, dPdR_qvmi) dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() for a2 in a2values[a3]: m2start = M_a[a2] - M2start if m2start >= blocksize2: continue P_expansion2 = P_expansions.get(a2, a3) nm2 = P_expansion2.shape[0] m2stop = min(m2start + nm2, m2max) if m2stop <= 0: continue disp_o = get_displacements(a2, a3, phicutoff_a[a2] + pcutoff_a[a3]) if len(disp_o) == 0: continue m2start = max(m2start, 0) J2start = max(0, M2start - M_a[a2]) J2stop = J2start + m2stop - m2start if (a2, a3) in dHP_and_dSP_aauim: dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)] else: P_qmi = P_expansion2.zeros((nq,), dtype=dtype) for disp in disp_o: disp.evaluate_direct(P_expansion2, P_qmi) P_qmi = P_qmi[:, J2start:J2stop].copy() dH_sp = alldH_asp[a3] dS_ii = self.setups[a3].dO_ii dHP_uim = [] dSP_uim = [] for u, kpt in enumerate(self.kpt_u): dH_ii = unpack(dH_sp[kpt.s]) dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() # XXX only need nq of these dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() dHP_uim.append(dHP_im) dSP_uim.append(dSP_im) dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) Fatom_c = 2.0 * (dPdRdHP_vmm * rhoT_mm).real.sum(-1).sum(-1) Frho_c = 2.0 * (dPdRdSP_vmm * ET_mm).real.sum(-1).sum(-1) Fatom_av[a1] += Fatom_c Fatom_av[a3] -= Fatom_c Frho_av[a1] -= Frho_c Frho_av[a3] += Frho_c self.timer.stop('Complicated loop') if not isblacs: # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('Potential') Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution(vt_G, rhoT_uMM[u], kpt.q) self.timer.stop('Potential') # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('Paw correction') Frho_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((mynao, nao), dtype) ZE_MM = None for b in my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, dtype) dOP_iM = np.zeros((setup.ni, nao), dtype) gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c') for v in range(3): gemm(1.0, dOP_iM, dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0, work_MM, 'n') ZE_MM = (work_MM * ET_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term del work_MM, ZE_MM self.timer.stop('Paw correction') # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('Atomic Hamiltonian force') Fatom_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): for b in my_atom_indices: H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype) HP_iM = gemmdot(H_ii, np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj())) for v in range(3): dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop] ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('Atomic Hamiltonian force') F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av self.timer.start('Wait for sum') ksl.orbital_comm.sum(F_av) if self.bd.comm.rank == 0: self.kpt_comm.sum(F_av, 0) self.timer.stop('Wait for sum') self.timer.stop('LCAO forces')
def density_matrix(self, n, m, k, kq=None, spin1=0, spin2=0, phi_aGp=None, Gspace=True): gd = self.gd kd = self.kd optical_limit = False if kq is None: kq = self.kq_k[k] expqr_g = self.expqr_g q_v = self.qq_v optical_limit = self.optical_limit q_c = self.q_c else: q_c = kd.bzk_kc[kq] - kd.bzk_kc[k] q_c[np.where(q_c > 0.501)] -= 1 q_c[np.where(q_c < -0.499)] += 1 if (np.abs(q_c) < self.ftol).all(): optical_limit = True q_c = self.q_c q_v = np.dot(q_c, self.bcell_cv) r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(q_v, r_vg, beta=0.0) expqr_g = np.exp(-1j * qr_g) if optical_limit: expqr_g = 1 ibzkpt1 = kd.bz2ibz_k[k] ibzkpt2 = kd.bz2ibz_k[kq] psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2) psit2_g = kd.transform_wave_function(psitold_g, kq) if Gspace is False: return psit1_g.conj() * psit2_g * expqr_g else: tmp_g = psit1_g.conj() * psit2_g * expqr_g # zero padding is included through the FFT rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if optical_limit: dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * kd.bzk_kc[kq, :, np.newaxis]) for ix in range(3): self.d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(q_v, tmp) calc = self.calc pt = self.pt if not self.pwmode: if calc.wfs.world.size > 1 or kd.nbzkpts == 1: P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq) else: P1_ai = self.get_P_ai(k, n, spin1) P2_ai = self.get_P_ai(kq, m, spin2) else: # first calculate P_ai at ibzkpt, then rotate to k u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1) P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai) u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2) P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai) if phi_aGp is None: try: if not self.mode == 'RPA': if optical_limit: iq = kd.where_is_q(np.zeros(3), self.bzq_qc) else: iq = kd.where_is_q(q_c, self.bzq_qc) assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8 phi_aGp = self.load_phi_aGp(self.reader, iq) #phi_qaGp[iq] except AttributeError: phi_aGp = self.phi_aGp for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() phi_Gp = np.ascontiguousarray(phi_aGp[a], complex) gemv(1.0, phi_Gp, P_p, 1.0, rho_G) if optical_limit: if n == m: rho_G[0] = 1. elif np.abs(self.e_skn[spin2][ibzkpt2, m] - self.e_skn[spin1][ibzkpt1, n]) < 1e-5: rho_G[0] = 0. else: rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] - self.enoshift_skn[spin1][ibzkpt1, n]) return rho_G
def density_matrix(self, n, m, k, kq=None, spin1=0, spin2=0, phi_aGp=None, Gspace=True): gd = self.gd kd = self.kd optical_limit = False if kq is None: kq = self.kq_k[k] expqr_g = self.expqr_g q_v = self.qq_v optical_limit = self.optical_limit q_c = self.q_c else: q_c = kd.bzk_kc[kq] - kd.bzk_kc[k] q_c[np.where(q_c>0.501)] -= 1 q_c[np.where(q_c<-0.499)] += 1 if (np.abs(q_c) < self.ftol).all(): optical_limit = True q_c = self.q_c q_v = np.dot(q_c, self.bcell_cv) r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(q_v, r_vg, beta=0.0) expqr_g = np.exp(-1j * qr_g) if optical_limit: expqr_g = 1 ibzkpt1 = kd.bz2ibz_k[k] ibzkpt2 = kd.bz2ibz_k[kq] psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2) psit2_g = kd.transform_wave_function(psitold_g, kq) if Gspace is False: return psit1_g.conj() * psit2_g * expqr_g else: tmp_g = psit1_g.conj()* psit2_g * expqr_g # zero padding is included through the FFT rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if optical_limit: dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * kd.bzk_kc[kq, :, np.newaxis]) for ix in range(3): self.d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(q_v, tmp) calc = self.calc pt = self.pt if not self.pwmode: if calc.wfs.world.size > 1 or kd.nbzkpts == 1: P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq) else: P1_ai = self.get_P_ai(k, n, spin1) P2_ai = self.get_P_ai(kq, m, spin2) else: # first calculate P_ai at ibzkpt, then rotate to k u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1) P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai) u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2) P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai) if phi_aGp is None: try: if not self.mode == 'RPA': if optical_limit: iq = kd.where_is_q(np.zeros(3), self.bzq_qc) else: iq = kd.where_is_q(q_c, self.bzq_qc) assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8 phi_aGp = self.load_phi_aGp(self.reader, iq) #phi_qaGp[iq] except AttributeError: phi_aGp = self.phi_aGp for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() phi_Gp = np.ascontiguousarray(phi_aGp[a], complex) gemv(1.0, phi_Gp, P_p, 1.0, rho_G) if optical_limit: if n==m: rho_G[0] = 1. elif np.abs(self.e_skn[spin2][ibzkpt2, m] - self.e_skn[spin1][ibzkpt1, n]) < 1e-5: rho_G[0] = 0. else: rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] - self.enoshift_skn[spin1][ibzkpt1, n]) return rho_G
import numpy as np from gpaw.utilities.blas import \ gemm, axpy, r2k, rk, gemmdot, rotate, dotc, dotu from gpaw.utilities.tools import tri2full a = np.arange(5 * 7).reshape(5, 7) + 4. a2 = np.arange(3 * 7).reshape(3, 7) + 3. b = np.arange(7) - 2. # Check gemmdot with floats assert np.all(np.dot(a, b) == gemmdot(a, b)) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t')) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='c')) assert np.dot(b, b) == gemmdot(b, b) # Check gemmdot with complex arrays a = a * (2 + 1.j) a2 = a2 * (-1 + 3.j) b = b * (3 - 2.j) assert np.all(np.dot(a, b) == gemmdot(a, b)) assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t')) assert np.all(np.dot(a, a2.T.conj()) == gemmdot(a, a2, trans='c')) assert np.dot(b, b) == gemmdot(b, b, trans='n') assert np.dot(b, b.conj()) == gemmdot(b, b, trans='c') assert np.vdot(a, 5.j * a) == dotc(a, 5.j * a) # Check gemm for transa='n' a2 = np.arange(7 * 5 * 1 * 3).reshape(7, 5, 1, 3) * (-1. + 4.j) + 3. c = np.tensordot(a, a2, [1, 0]) gemm(1., a2, a, -1., c, 'n') assert not c.any()
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups, D_asp): """LDA kernel""" # The soft part assert np.abs(nt_sG[0].shape - nG).sum() == 0 xc = XC('LDA') fxc_sg = np.zeros_like(nt_sG) xc.calculate_fxc(gd, nt_sG, fxc_sg) fxc_g = fxc_sg[0] # FFT fxc(r) nG0 = nG[0] * nG[1] * nG[2] tmp_g = np.fft.fftn(fxc_g) * vol / nG0 r_vg = gd.get_grid_point_coordinates() Kxc_GG = np.zeros((npw, npw), dtype=complex) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] if (nG / 2 - np.abs(dG_c) > 0).all(): index = (dG_c + nG) % nG Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]] else: # not in the fft index dG_v = np.dot(dG_c, bcell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j*dGr_g)*fxc_g) KxcPAW_GG = np.zeros_like(Kxc_GG) # The PAW part dG_GGv = np.zeros((npw, npw, 3)) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv) for a, setup in enumerate(setups): if rank == a % size: rgd = setup.xc_correction.rgd n_qg = setup.xc_correction.n_qg nt_qg = setup.xc_correction.nt_qg nc_g = setup.xc_correction.nc_g nct_g = setup.xc_correction.nct_g Y_nL = setup.xc_correction.Y_nL dv_g = rgd.dv_g D_sp = D_asp[a] B_pqL = setup.xc_correction.B_pqL D_sLq = np.inner(D_sp, B_pqL.T) nspins = len(D_sp) assert nspins == 1 f_sg = rgd.empty(nspins) ft_sg = rgd.empty(nspins) n_sLg = np.dot(D_sLq, n_qg) nt_sLg = np.dot(D_sLq, nt_qg) # Add core density n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a])) for n, Y_L in enumerate(Y_nL): w = weight_n[n] f_sg[:] = 0.0 n_sg = np.dot(Y_L, n_sLg) xc.calculate_fxc(rgd, n_sg, f_sg) ft_sg[:] = 0.0 nt_sg = np.dot(Y_L, nt_sLg) xc.calculate_fxc(rgd, nt_sg, ft_sg) coef_GGg = np.exp(-1j * np.outer(np.inner(dG_GGv, R_nv[n]), rgd.r_g)).reshape(npw,npw,rgd.ng) KxcPAW_GG += w * np.dot(coef_GGg, (f_sg[0]-ft_sg[0]) * dv_g) * coefatoms_GG world.sum(KxcPAW_GG) Kxc_GG += KxcPAW_GG return Kxc_GG / vol
def calculate_rkernel(self): gd = self.gd ng_c = gd.N_c cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) ns = self.calc.wfs.nspins n_g = self.n_g # density on rough grid fx_g = ns * self.get_fxc_g(n_g) # local exchange kernel qc_g = (-4 * np.pi * ns / fx_g)**0.5 # cutoff functional flocal_g = qc_g**3 * fx_g / (6 * np.pi**2) # ren. x-kernel for r=r' Vlocal_g = 2 * qc_g / np.pi # ren. Hartree kernel for r=r' ng = np.prod(ng_c) # number of grid points r_vg = gd.get_grid_point_coordinates() rx_g = r_vg[0].flatten() ry_g = r_vg[1].flatten() rz_g = r_vg[2].flatten() prnt(' %d grid points and %d plane waves at the Gamma point' % (ng, self.pd.ngmax), file=self.fd) # Unit cells R_Rv = [] weight_R = [] nR_v = self.unit_cells nR = np.prod(nR_v) for i in range(-nR_v[0] + 1, nR_v[0]): for j in range(-nR_v[1] + 1, nR_v[1]): for h in range(-nR_v[2] + 1, nR_v[2]): R_Rv.append(i * cell_cv[0] + j * cell_cv[1] + h * cell_cv[2]) weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) * (nR_v[2] - abs(h)) / float(nR)) if nR > 1: # with more than one unit cell only the exchange kernel is # calculated on the grid. The bare Coulomb kernel is added # in PW basis and Vlocal_g only the exchange part dv = self.calc.density.gd.dv gc = (3 * dv / 4 / np.pi)**(1 / 3.) Vlocal_g -= 2 * np.pi * gc**2 / dv prnt(' Lattice point sampling: ' + '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) + ' Reduced to %s lattice points' % len(R_Rv), file=self.fd) l_g_size = -(-ng // mpi.world.size) l_g_range = range(mpi.world.rank * l_g_size, min((mpi.world.rank + 1) * l_g_size, ng)) fhxc_qsGr = {} for iq in range(len(self.ibzq_qc)): fhxc_qsGr[iq] = np.zeros( (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex) inv_error = np.seterr() np.seterr(invalid='ignore') np.seterr(divide='ignore') t0 = time() # Loop over Lattice points for i, R_v in enumerate(R_Rv): # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0]) if i == 1: prnt(' Finished 1 cell in %s seconds' % int(time() - t0) + ' - estimated %s seconds left' % int( (len(R_Rv) - 1) * (time() - t0)), file=self.fd) self.fd.flush() if len(R_Rv) > 5: if (i + 1) % (len(R_Rv) / 5 + 1) == 0: prnt(' Finished %s cells in %s seconds' % (i, int(time() - t0)) + ' - estimated %s seconds left' % int( (len(R_Rv) - i) * (time() - t0) / i), file=self.fd) self.fd.flush() for g in l_g_range: rx = rx_g[g] + R_v[0] ry = ry_g[g] + R_v[1] rz = rz_g[g] + R_v[2] # |r-r'-R_i| rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 + (r_vg[2] - rz)**2)**0.5 n_av = (n_g + n_g.flatten()[g]) / 2. fx_g = ns * self.get_fxc_g(n_av, index=g) qc_g = (-4 * np.pi * ns / fx_g)**0.5 x = qc_g * rr osc_x = np.sin(x) - x * np.cos(x) f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3) if nR > 1: # include only exchange part of the kernel here V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr else: # include the full kernel (also hartree part) V_rr = (sici(x)[0] * 2 / np.pi) / rr # Terms with r = r' if (np.abs(R_v) < 0.001).all(): tmp_flat = f_rr.flatten() tmp_flat[g] = flocal_g.flatten()[g] f_rr = tmp_flat.reshape(ng_c) tmp_flat = V_rr.flatten() tmp_flat[g] = Vlocal_g.flatten()[g] V_rr = tmp_flat.reshape(ng_c) del tmp_flat f_rr[np.where(n_av < self.density_cut)] = 0.0 V_rr[np.where(n_av < self.density_cut)] = 0.0 f_rr *= weight_R[i] V_rr *= weight_R[i] # r-r'-R_i r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz]) # Fourier transform of r for iq, q in enumerate(self.ibzq_qc): q_v = np.dot(q, icell_cv) e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0)) f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q if ns == 2: f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q mpi.world.barrier() np.seterr(**inv_error) for iq, q in enumerate(self.ibzq_qc): npw = len(self.pd.G2_qG[iq]) fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex) l_pw_size = -(-npw // mpi.world.size) # parallelize over PW below l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) if mpi.world.size > 1: # redistribute grid and plane waves in fhxc_qsGr[iq] bg1 = BlacsGrid(mpi.world, 1, mpi.world.size) bg2 = BlacsGrid(mpi.world, mpi.world.size, 1) bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size)) bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng) fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) if ns == 2: Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) r = Redistributor(bg1.comm, bd1, bd2) r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng) if ns == 2: r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng) else: fhxc_Glr = fhxc_qsGr[iq][0] if ns == 2: Koff_Glr = fhxc_qsGr[iq][1] # Fourier transform of r' for iG in range(len(l_pw_range)): f_g = fhxc_Glr[iG].reshape(ng_c) f_G = self.pd.fft(f_g.conj(), iq) * vol / ng fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj() if ns == 2: v_g = Koff_Glr[iG].reshape(ng_c) v_G = self.pd.fft(v_g.conj(), iq) * vol / ng fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj() if ns == 2: # f_00 = f_11 and f_01 = f_10 fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw] fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw] mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) if nR > 1: # add Hartree kernel evaluated in PW basis Gq2_G = self.pd.G2_qG[iq] if (q == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def get_self_energy(self, df, W_wGG): Sigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float) dSigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float) wcomm = df.wcomm if self.static: W_wGG = np.array([W_wGG]) if not self.hilbert_trans: #method 1 Wbackup_wG0 = W_wGG[:,:,0].copy() Wbackup_w0G = W_wGG[:,0,:].copy() else: #method 2, perform Hilbert transform nG = np.shape(W_wGG)[1] coords = np.zeros(wcomm.size, dtype=int) nG_local = nG**2 // wcomm.size if wcomm.rank == wcomm.size - 1: nG_local = nG**2 - (wcomm.size - 1) * nG_local wcomm.all_gather(np.array([nG_local]), coords) W_Wg = SliceAlongFrequency(W_wGG, coords, wcomm) ng = np.shape(W_Wg)[1] Nw = int(self.w_w[-1] / self.dw) w1_ww = np.zeros((Nw, df.Nw), dtype=complex) for iw in range(Nw): w1 = iw * self.dw w1_ww[iw] = 1./(w1 + self.w_w + 1j*self.eta_w) + 1./(w1 - self.w_w + 1j*self.eta_w) w1_ww[iw,0] -= 1./(w1 + 1j*self.eta_w[0]) # correct w'=0 w1_ww[iw] *= self.dw_w Cplus_Wg = np.zeros((Nw, ng), dtype=complex) Cminus_Wg = np.zeros((Nw, ng), dtype=complex) Cplus_Wg = gemmdot(w1_ww, W_Wg, beta=0.0) Cminus_Wg = gemmdot(w1_ww.conj(), W_Wg, beta=0.0) for s in range(self.nspins): for i, k in enumerate(self.gwkpt_k): # k is bzk index if df.optical_limit: kq_c = df.kd.bzk_kc[k] else: kq_c = df.kd.bzk_kc[k] - df.q_c # k - q kq = df.kd.where_is_q(kq_c, df.kd.bzk_kc) assert df.kq_k[kq] == k ibzkpt1 = df.kd.bz2ibz_k[k] ibzkpt2 = df.kd.bz2ibz_k[kq] for j, n in enumerate(self.bands): for m in range(self.m_start, self.m_end): if self.e_skn[s][ibzkpt2, m] > self.eFermi: sign = 1. else: sign = -1. rho_G = df.density_matrix(m, n, kq, spin1=s, spin2=s) if not self.hilbert_trans: #method 1 W_wGG[:,:,0] = Wbackup_wG0 W_wGG[:,0,:] = Wbackup_w0G # w1 = w - epsilon_m,k-q w1 = self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m] if self.ppa: # analytical expression for Plasmon Pole Approximation W_GG = sign * W_wGG[0] * (1./(w1 + self.wt_GG - 1j*self.eta) - 1./(w1 - self.wt_GG + 1j*self.eta)) W_GG -= W_wGG[0] * (1./(w1 + self.wt_GG + 1j*self.eta*sign) + 1./(w1 - self.wt_GG + 1j*self.eta*sign)) W_G = gemmdot(W_GG, rho_G, beta=0.0) Sigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0,trans='c')) W_GG = sign * W_wGG[0] * (1./(w1 - self.wt_GG + 1j*self.eta)**2 - 1./(w1 + self.wt_GG - 1j*self.eta)**2) W_GG += W_wGG[0] * (1./(w1 - self.wt_GG + 1j*self.eta*sign)**2 + 1./(w1 + self.wt_GG + 1j*self.eta*sign)**2) W_G = gemmdot(W_GG, rho_G, beta=0.0) dSigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0,trans='c')) elif self.static: W1_GG = W_wGG[0] - np.eye(df.npw)*self.Kc_GG W2_GG = W_wGG[0] # perform W_GG * np.outer(rho_G.conj(), rho_G).sum(GG) W_G = gemmdot(W1_GG, rho_G, beta=0.0) # Coulomb Hole Sigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha*pi/1j, beta=0.0,trans='c')) if sign == -1: W_G = gemmdot(W2_GG, rho_G, beta=0.0) # Screened Exchange Sigma_skn[s,i,j] -= np.real(gemmdot(W_G, rho_G, alpha=2*self.alpha*pi/1j, beta=0.0,trans='c')) del W1_GG, W2_GG, W_G, rho_G else: # perform W_wGG * np.outer(rho_G.conj(), rho_G).sum(GG) W_wG = gemmdot(W_wGG, rho_G, beta=0.0) C_wlocal = gemmdot(W_wG, rho_G, alpha=self.alpha, beta=0.0,trans='c') del W_wG, rho_G C_w = np.zeros(df.Nw, dtype=complex) wcomm.all_gather(C_wlocal, C_w) del C_wlocal # calculate self energy w1_w = 1./(w1 - self.w_w + 1j*self.eta_w*sign) + 1./(w1 + self.w_w + 1j*self.eta_w*sign) w1_w[0] -= 1./(w1 + 1j*self.eta_w[0]*sign) # correct w'=0 w1_w *= self.dw_w Sigma_skn[s,i,j] += np.real(gemmdot(C_w, w1_w, beta=0.0)) # calculate derivate of self energy with respect to w w1_w = 1./(w1 - self.w_w + 1j*self.eta_w*sign)**2 + 1./(w1 + self.w_w + 1j*self.eta_w*sign)**2 w1_w[0] -= 1./(w1 + 1j*self.eta_w[0]*sign)**2 # correct w'=0 w1_w *= self.dw_w dSigma_skn[s,i,j] -= np.real(gemmdot(C_w, w1_w, beta=0.0)) else: #method 2 if not np.abs(self.e_skn[s][ibzkpt2,m] - self.e_skn[s][ibzkpt1,n]) < 1e-10: sign *= np.sign(self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m]) # find points on frequency grid w0 = self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m] w0_id = np.abs(int(w0 / self.dw)) w1 = w0_id * self.dw w2 = (w0_id + 1) * self.dw # choose plus or minus, treat optical limit: if sign == 1: C_Wg = Cplus_Wg[w0_id:w0_id+2] # only two grid points needed for each w0 if sign == -1: C_Wg = Cminus_Wg[w0_id:w0_id+2] # only two grid points needed for each w0 C_wGG = GatherOrbitals(C_Wg, coords, wcomm).copy() del C_Wg # special treat of w0 = 0 (degenerate states): if w0_id == 0: Cplustmp_GG = GatherOrbitals(Cplus_Wg[1], coords, wcomm).copy() Cminustmp_GG = GatherOrbitals(Cminus_Wg[1], coords, wcomm).copy() # perform C_wGG * np.outer(rho_G.conj(), rho_G).sum(GG) if w0_id == 0: Sw0_G = gemmdot(C_wGG[0], rho_G, beta=0.0) Sw0 = np.real(gemmdot(Sw0_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw1_G = gemmdot(Cplustmp_GG, rho_G, beta=0.0) Sw1 = np.real(gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw2_G = gemmdot(Cminustmp_GG, rho_G, beta=0.0) Sw2 = np.real(gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sigma_skn[s,i,j] += Sw0 dSigma_skn[s,i,j] += (Sw1 + Sw2)/(2*self.dw) else: Sw1_G = gemmdot(C_wGG[0], rho_G, beta=0.0) Sw1 = np.real(gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw2_G = gemmdot(C_wGG[1], rho_G, beta=0.0) Sw2 = np.real(gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c')) Sw0 = (w2-np.abs(w0))/self.dw * Sw1 + (np.abs(w0)-w1)/self.dw * Sw2 Sigma_skn[s,i,j] += np.sign(self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m]) * Sw0 dSigma_skn[s,i,j] += (Sw2 - Sw1)/self.dw self.ncomm.barrier() self.ncomm.sum(Sigma_skn) self.ncomm.sum(dSigma_skn) return Sigma_skn, dSigma_skn
def initialize(self): self.eta /= Hartree self.ecut /= Hartree calc = self.calc # kpoint init self.kd = kd = calc.wfs.kd self.bzk_kc = kd.bzk_kc self.ibzk_kc = kd.ibzk_kc self.nkpt = kd.nbzkpts self.ftol /= self.nkpt # band init if self.nbands is None: self.nbands = calc.wfs.nbands self.nvalence = calc.wfs.nvalence # cell init self.acell_cv = calc.atoms.cell / Bohr self.bcell_cv, self.vol, self.BZvol = get_primitive_cell(self.acell_cv) # grid init self.nG = calc.get_number_of_grid_points() self.nG0 = self.nG[0] * self.nG[1] * self.nG[2] gd = GridDescriptor(self.nG, calc.wfs.gd.cell_cv, pbc_c=True, comm=serial_comm) self.gd = gd self.h_cv = gd.h_cv # obtain eigenvalues, occupations nibzkpt = kd.nibzkpts kweight_k = kd.weight_k try: self.e_kn except: self.printtxt('Use eigenvalues from the calculator.') self.e_kn = np.array( [calc.get_eigenvalues(kpt=k) for k in range(nibzkpt)]) / Hartree self.printtxt('Eigenvalues(k=0) are:') print >> self.txt, self.e_kn[0] * Hartree self.f_kn = np.array([ calc.get_occupation_numbers(kpt=k) / kweight_k[k] for k in range(nibzkpt) ]) / self.nkpt # k + q init assert self.q_c is not None self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c if self.optical_limit: kq_k = np.arange(self.nkpt) self.expqr_g = 1. else: r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(self.qq_v, r_vg, beta=0.0) self.expqr_g = np.exp(-1j * qr_g) del r_vg, qr_g kq_k = kd.find_k_plus_q(self.q_c) self.kq_k = kq_k # Plane wave init self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors( self.acell_cv, self.bcell_cv, self.nG, self.ecut) # Projectors init setups = calc.wfs.setups pt = LFC(gd, [setup.pt_j for setup in setups], dtype=calc.wfs.dtype, forces=True) spos_ac = calc.atoms.get_scaled_positions() pt.set_k_points(self.bzk_kc) pt.set_positions(spos_ac) self.pt = pt # Printing calculation information self.print_stuff() return
t = time.time() for n in range(numreps): BY1_pq = np.dot(B_pqL, Y_L) t = time.time()-t performance = numflop*numreps/t print 'dot : %8.5f s, %8.5f Mflops' % (t,performance/1024**2.) assert np.abs(BY0_pq-BY1_pq).max()<5e-12 del BY1_pq if test_gemmdot: BY2_pq = np.empty((P,Q), dtype) t = time.time() for n in range(numreps): BY2_pq.fill(0.0) gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq) t = time.time()-t performance = numflop*numreps/t print 'gemmdot: %8.5f s, %8.5f Mflops' % (t,performance/1024**2.) assert np.abs(BY0_pq-BY2_pq).max()<5e-12 del BY2_pq BY3_pq = np.empty((P,Q), dtype) t = time.time() for n in range(numreps): BY3_pq.fill(0.0) gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't') t = time.time()-t performance = numflop*numreps/t print 'gemvT : %8.5f s, %8.5f Mflops' % (t,performance/1024**2.) assert np.abs(BY0_pq-BY3_pq).max()<5e-12
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups, D_asp, functional='ALDA', density_cut=None): """ALDA kernel""" # The soft part #assert np.abs(nt_sG[0].shape - nG).sum() == 0 if functional == 'ALDA_X': x_only = True A_x = -3. / 4. * (3. / np.pi)**(1. / 3.) nspins = len(nt_sG) assert nspins in [1, 2] fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.) else: assert len(nt_sG) == 1 x_only = False fxc_sg = np.zeros_like(nt_sG) xc = XC(functional[1:]) xc.calculate_fxc(gd, nt_sG, fxc_sg) if density_cut is not None: fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0 # FFT fxc(r) nG0 = nG[0] * nG[1] * nG[2] tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))] r_vg = gd.get_grid_point_coordinates() Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex) for s in range(len(fxc_sg)): for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] if (nG / 2 - np.abs(dG_c) > 0).all(): index = (dG_c + nG) % nG Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0], index[1], index[2]] else: # not in the fft index dG_v = np.dot(dG_c, bcell_cv) dGr_g = gemmdot(dG_v, r_vg, beta=0.0) Kxc_sGG[s, iG, jG] = gd.integrate( np.exp(-1j * dGr_g) * fxc_sg[s]) # The PAW part KxcPAW_sGG = np.zeros_like(Kxc_sGG) dG_GGv = np.zeros((npw, npw, 3)) for iG in range(npw): for jG in range(npw): dG_c = Gvec_Gc[iG] - Gvec_Gc[jG] dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv) for a, setup in enumerate(setups): if rank == a % size: rgd = setup.xc_correction.rgd n_qg = setup.xc_correction.n_qg nt_qg = setup.xc_correction.nt_qg nc_g = setup.xc_correction.nc_g nct_g = setup.xc_correction.nct_g Y_nL = setup.xc_correction.Y_nL dv_g = rgd.dv_g D_sp = D_asp[a] B_pqL = setup.xc_correction.B_pqL D_sLq = np.inner(D_sp, B_pqL.T) nspins = len(D_sp) f_sg = rgd.empty(nspins) ft_sg = rgd.empty(nspins) n_sLg = np.dot(D_sLq, n_qg) nt_sLg = np.dot(D_sLq, nt_qg) # Add core density n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a])) for n, Y_L in enumerate(Y_nL): w = weight_n[n] f_sg[:] = 0.0 n_sg = np.dot(Y_L, n_sLg) if x_only: f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.) else: xc.calculate_fxc(rgd, n_sg, f_sg) ft_sg[:] = 0.0 nt_sg = np.dot(Y_L, nt_sLg) if x_only: ft_sg = nspins * (4 / 9.) * (A_x * (nspins * nt_sg)**(-2 / 3.)) else: xc.calculate_fxc(rgd, nt_sg, ft_sg) for i in range(len(rgd.r_g)): coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n]) * rgd.r_g[i]) for s in range(len(f_sg)): KxcPAW_sGG[s] += w * np.dot(coef_GG, (f_sg[s, i] - ft_sg[s, i]) * dv_g[i]) * coefatoms_GG world.sum(KxcPAW_sGG) Kxc_sGG += KxcPAW_sGG return Kxc_sGG / vol
def calculate_rkernel(self): gd = self.gd ng_c = gd.N_c cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) ns = self.calc.wfs.nspins n_g = self.n_g # density on rough grid fx_g = ns * self.get_fxc_g(n_g) # local exchange kernel qc_g = (-4 * np.pi * ns / fx_g)**0.5 # cutoff functional flocal_g = qc_g**3 * fx_g / (6 * np.pi**2) # ren. x-kernel for r=r' Vlocal_g = 2 * qc_g / np.pi # ren. Hartree kernel for r=r' ng = np.prod(ng_c) # number of grid points r_vg = gd.get_grid_point_coordinates() rx_g = r_vg[0].flatten() ry_g = r_vg[1].flatten() rz_g = r_vg[2].flatten() prnt(' %d grid points and %d plane waves at the Gamma point' % (ng, self.pd.ngmax), file=self.fd) # Unit cells R_Rv = [] weight_R = [] nR_v = self.unit_cells nR = np.prod(nR_v) for i in range(-nR_v[0] + 1, nR_v[0]): for j in range(-nR_v[1] + 1, nR_v[1]): for h in range(-nR_v[2] + 1, nR_v[2]): R_Rv.append(i * cell_cv[0] + j * cell_cv[1] + h * cell_cv[2]) weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) * (nR_v[2] - abs(h)) / float(nR)) if nR > 1: # with more than one unit cell only the exchange kernel is # calculated on the grid. The bare Coulomb kernel is added # in PW basis and Vlocal_g only the exchange part dv = self.calc.density.gd.dv gc = (3 * dv / 4 / np.pi)**(1 / 3.) Vlocal_g -= 2 * np.pi * gc**2 / dv prnt(' Lattice point sampling: ' + '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) + ' Reduced to %s lattice points' % len(R_Rv), file=self.fd) l_g_size = -(-ng // mpi.world.size) l_g_range = range(mpi.world.rank * l_g_size, min((mpi.world.rank+1) * l_g_size, ng)) fhxc_qsGr = {} for iq in range(len(self.ibzq_qc)): fhxc_qsGr[iq] = np.zeros((ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex) inv_error = np.seterr() np.seterr(invalid='ignore') np.seterr(divide='ignore') t0 = time() # Loop over Lattice points for i, R_v in enumerate(R_Rv): # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0]) if i == 1: prnt(' Finished 1 cell in %s seconds' % int(time() - t0) + ' - estimated %s seconds left' % int((len(R_Rv) - 1) * (time() - t0)), file=self.fd) self.fd.flush() if len(R_Rv) > 5: if (i+1) % (len(R_Rv) / 5 + 1) == 0: prnt(' Finished %s cells in %s seconds' % (i, int(time() - t0)) + ' - estimated %s seconds left' % int((len(R_Rv) - i) * (time() - t0) / i), file=self.fd) self.fd.flush() for g in l_g_range: rx = rx_g[g] + R_v[0] ry = ry_g[g] + R_v[1] rz = rz_g[g] + R_v[2] # |r-r'-R_i| rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 + (r_vg[2] - rz)**2)**0.5 n_av = (n_g + n_g.flatten()[g]) / 2. fx_g = ns * self.get_fxc_g(n_av, index=g) qc_g = (-4 * np.pi * ns / fx_g)**0.5 x = qc_g * rr osc_x = np.sin(x) - x*np.cos(x) f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3) if nR > 1: # include only exchange part of the kernel here V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr else: # include the full kernel (also hartree part) V_rr = (sici(x)[0] * 2 / np.pi) / rr # Terms with r = r' if (np.abs(R_v) < 0.001).all(): tmp_flat = f_rr.flatten() tmp_flat[g] = flocal_g.flatten()[g] f_rr = tmp_flat.reshape(ng_c) tmp_flat = V_rr.flatten() tmp_flat[g] = Vlocal_g.flatten()[g] V_rr = tmp_flat.reshape(ng_c) del tmp_flat f_rr[np.where(n_av < self.density_cut)] = 0.0 V_rr[np.where(n_av < self.density_cut)] = 0.0 f_rr *= weight_R[i] V_rr *= weight_R[i] # r-r'-R_i r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz]) # Fourier transform of r for iq, q in enumerate(self.ibzq_qc): q_v = np.dot(q, icell_cv) e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0)) f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q if ns == 2: f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q mpi.world.barrier() np.seterr(**inv_error) for iq, q in enumerate(self.ibzq_qc): npw = len(self.pd.G2_qG[iq]) fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex) l_pw_size = -(-npw // mpi.world.size) # parallelize over PW below l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) if mpi.world.size > 1: # redistribute grid and plane waves in fhxc_qsGr[iq] bg1 = BlacsGrid(mpi.world, 1, mpi.world.size) bg2 = BlacsGrid(mpi.world, mpi.world.size, 1) bd1 = bg1.new_descriptor(npw, ng, npw, - (-ng / mpi.world.size)) bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng) fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) if ns == 2: Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) r = Redistributor(bg1.comm, bd1, bd2) r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng) if ns == 2: r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng) else: fhxc_Glr = fhxc_qsGr[iq][0] if ns == 2: Koff_Glr = fhxc_qsGr[iq][1] # Fourier transform of r' for iG in range(len(l_pw_range)): f_g = fhxc_Glr[iG].reshape(ng_c) f_G = self.pd.fft(f_g.conj(), iq) * vol / ng fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj() if ns == 2: v_g = Koff_Glr[iG].reshape(ng_c) v_G = self.pd.fft(v_g.conj(), iq) * vol / ng fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj() if ns == 2: # f_00 = f_11 and f_01 = f_10 fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw] fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw] mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) if nR > 1: # add Hartree kernel evaluated in PW basis Gq2_G = self.pd.G2_qG[iq] if (q == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def write_overlaps(calc, seed=None, spin=0, v_knm=None): if seed is None: seed = calc.atoms.get_chemical_formula() if v_knm is None: spinors = False else: spinors = True bands = get_bands(seed) Nn = len(bands) kpts_kc = calc.get_bz_k_points() Nk = len(kpts_kc) nnkp = open(seed + '.nnkp', 'r') lines = nnkp.readlines() for il, line in enumerate(lines): if len(line.split()) > 1: if line.split()[0] == 'begin' and line.split()[1] == 'nnkpts': Nb = eval(lines[il + 1].split()[0]) i0 = il + 2 break f = open(seed + '.mmn', 'w') print('Kohn-Sham input generated from GPAW calculation', file=f) print('%10d %6d %6d' % (Nn, Nk, Nb), file=f) icell_cv = (2 * np.pi) * np.linalg.inv(calc.wfs.gd.cell_cv).T r_g = calc.wfs.gd.get_grid_point_coordinates() Ng = np.prod(np.shape(r_g)[1:]) * (spinors + 1) dO_aii = [] for ia in calc.wfs.kpt_u[0].P_ani.keys(): dO_ii = calc.wfs.setups[ia].dO_ii if spinors: # Spinor projections require doubling of the (identical) orbitals dO_jj = np.zeros((2 * len(dO_ii), 2 * len(dO_ii)), complex) dO_jj[::2, ::2] = dO_ii dO_jj[1::2, 1::2] = dO_ii dO_aii.append(dO_jj) else: dO_aii.append(dO_ii) wfs = calc.wfs u_knG = [] for ik in range(Nk): if spinors: # For spinors, G denotes spin and grid: G = (s, gx, gy, gz) u_nG = get_spinorbit_wavefunctions(calc, ik, v_knm[ik]) u_knG.append(u_nG[bands]) else: # For non-spinors, G denotes grid: G = (gx, gy, gz) u_knG.append( np.array( [wfs.get_wave_function_array(n, ik, spin) for n in bands])) P_kani = [] for ik in range(Nk): if spinors: P_kani.append(get_spinorbit_projections(calc, ik, v_knm[ik])) else: P_kani.append(calc.wfs.kpt_u[spin * Nk + ik].P_ani) for ik1 in range(Nk): u1_nG = u_knG[ik1] for ib in range(Nb): # b denotes nearest neighbor k-points line = lines[i0 + ik1 * Nb + ib].split() ik2 = int(line[1]) - 1 u2_nG = u_knG[ik2] G_c = np.array([int(line[i]) for i in range(2, 5)]) bG_c = kpts_kc[ik2] - kpts_kc[ik1] + G_c bG_v = np.dot(bG_c, icell_cv) u2_nG = u2_nG * np.exp(-1.0j * gemmdot(bG_v, r_g, beta=0.0)) M_mm = get_overlap(calc, bands, np.reshape(u1_nG, (len(u1_nG), Ng)), np.reshape(u2_nG, (len(u2_nG), Ng)), P_kani[ik1], P_kani[ik2], dO_aii, bG_v) indices = (ik1 + 1, ik2 + 1, G_c[0], G_c[1], G_c[2]) print('%3d %3d %4d %3d %3d' % indices, file=f) for m1 in range(len(M_mm)): for m2 in range(len(M_mm)): M = M_mm[m2, m1] print('%20.12f %20.12f' % (M.real, M.imag), file=f) f.close()
def calculate_forces_by_kpoint(self, kpt, hamiltonian, F_av, tci, P_aqMi, dThetadR_vMM, dTdR_vMM, dPdR_aqvMi): k = kpt.k q = kpt.q mynao = self.ksl.mynao nao = self.ksl.nao dtype = self.dtype Mstart = self.ksl.Mstart Mstop = self.ksl.Mstop basis_functions = self.basis_functions my_atom_indices = basis_functions.my_atom_indices atom_indices = basis_functions.atom_indices def _slices(indices): for a in indices: M1 = basis_functions.M_a[a] - Mstart M2 = M1 + self.setups[a].niAO yield a, M1, M2 def slices(): return _slices(atom_indices) def my_slices(): return _slices(my_atom_indices) # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('LCAO forces: initial') if kpt.rho_MM is None: rhoT_MM = self.ksl.get_transposed_density_matrix(kpt.f_n, kpt.C_nM) ET_MM = self.ksl.get_transposed_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM) if hasattr(kpt, 'c_on'): assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += self.ksl.get_transposed_density_matrix_delta(d_nn, kpt.C_nM) ET_MM+=self.ksl.get_transposed_density_matrix_delta(d_nn*kpt.eps_n, kpt.C_nM) else: H_MM = self.eigensolver.calculate_hamiltonian_matrix(hamiltonian, self, kpt) tri2full(H_MM) S_MM = self.S_qMM[q].copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() self.timer.stop('LCAO forces: initial') # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(F_av) dEdTrhoT_vMM = (dTdR_vMM * rhoT_MM[np.newaxis]).real for a, M1, M2 in my_slices(): Fkin_av[a, :] = 2 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) del dEdTrhoT_vMM # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('LCAO forces: potential') Fpot_av = np.zeros_like(F_av) vt_G = hamiltonian.vt_sG[kpt.s] DVt_vMM = np.zeros((3, mynao, nao), dtype) # Note that DVt_vMM contains dPhi(r) / dr = - dPhi(r) / dR^a basis_functions.calculate_potential_matrix_derivative(vt_G, DVt_vMM, q) for a, M1, M2 in slices(): for v in range(3): Fpot_av[a, v] = 2 * (DVt_vMM[v, M1:M2, :] * rhoT_MM[M1:M2, :]).real.sum() del DVt_vMM self.timer.stop('LCAO forces: potential') # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Frho_av = np.zeros_like(F_av) dThetadRE_vMM = (dThetadR_vMM * ET_MM[np.newaxis]).real for a, M1, M2 in my_slices(): Frho_av[a, :] = -2 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) del dThetadRE_vMM # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('LCAO forces: paw correction') dPdR_avMi = dict([(a, dPdR_aqvMi[a][q]) for a in my_atom_indices]) work_MM = np.zeros((mynao, nao), dtype) ZE_MM = None for b in my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, dtype) dOP_iM = np.zeros((setup.ni, nao), dtype) gemm(1.0, self.P_aqMi[b][q], dO_ii, 0.0, dOP_iM, 'c') for v in range(3): gemm(1.0, dOP_iM, dPdR_avMi[b][v][Mstart:Mstop], 0.0, work_MM, 'n') ZE_MM = (work_MM * ET_MM).real for a, M1, M2 in slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term del work_MM, ZE_MM self.timer.stop('LCAO forces: paw correction') # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('LCAO forces: atomic density') Fatom_av = np.zeros_like(F_av) for b in my_atom_indices: H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype) HP_iM = gemmdot(H_ii, np.conj(self.P_aqMi[b][q].T)) for v in range(3): dPdR_Mi = dPdR_avMi[b][v][Mstart:Mstop] ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_MM).real for a, M1, M2 in slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('LCAO forces: atomic density') F_av += Fkin_av + Fpot_av + Frho_av + Fatom_av
def calculate_forces(self, hamiltonian, F_av): self.timer.start('LCAO forces') spos_ac = self.tci.atoms.get_scaled_positions() % 1.0 ksl = self.ksl nao = ksl.nao mynao = ksl.mynao nq = len(self.kd.ibzk_qc) dtype = self.dtype tci = self.tci gd = self.gd bfs = self.basis_functions Mstart = ksl.Mstart Mstop = ksl.Mstop from gpaw.kohnsham_layouts import BlacsOrbitalLayouts isblacs = isinstance(ksl, BlacsOrbitalLayouts) # XXX if not isblacs: self.timer.start('TCI derivative') dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dPdR_aqvMi = {} for a in self.basis_functions.my_atom_indices: ni = self.setups[a].ni dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype) tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM, dPdR_aqvMi) gd.comm.sum(dThetadR_qvMM) gd.comm.sum(dTdR_qvMM) self.timer.stop('TCI derivative') my_atom_indices = bfs.my_atom_indices atom_indices = bfs.atom_indices def _slices(indices): for a in indices: M1 = bfs.M_a[a] - Mstart M2 = M1 + self.setups[a].nao if M2 > 0: yield a, max(0, M1), M2 def slices(): return _slices(atom_indices) def my_slices(): return _slices(my_atom_indices) # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('Initial') rhoT_uMM = [] ET_uMM = [] if not isblacs: if self.kpt_u[0].rho_MM is None: self.timer.start('Get density matrix') for kpt in self.kpt_u: rhoT_MM = ksl.get_transposed_density_matrix( kpt.f_n, kpt.C_nM) rhoT_uMM.append(rhoT_MM) ET_MM = ksl.get_transposed_density_matrix( kpt.f_n * kpt.eps_n, kpt.C_nM) ET_uMM.append(ET_MM) if hasattr(kpt, 'c_on'): # XXX does this work with BLACS/non-BLACS/etc.? assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += ksl.get_transposed_density_matrix_delta(\ d_nn, kpt.C_nM) ET_MM += ksl.get_transposed_density_matrix_delta(\ d_nn * kpt.eps_n, kpt.C_nM) self.timer.stop('Get density matrix') else: rhoT_uMM = [] ET_uMM = [] for kpt in self.kpt_u: H_MM = self.eigensolver.calculate_hamiltonian_matrix(\ hamiltonian, self, kpt) tri2full(H_MM) S_MM = kpt.S_MM.copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() rhoT_uMM.append(rhoT_MM) ET_uMM.append(ET_MM) self.timer.stop('Initial') if isblacs: # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX from gpaw.blacs import BlacsGrid, Redistributor def get_density_matrix(f_n, C_nM, redistributor): rho1_mm = ksl.calculate_blocked_density_matrix(f_n, C_nM).conj() rho_mm = redistributor.redistribute(rho1_mm) return rho_mm pcutoff_a = [ max([pt.get_cutoff() for pt in setup.pt_j]) for setup in self.setups ] phicutoff_a = [ max([phit.get_cutoff() for phit in setup.phit_j]) for setup in self.setups ] # XXX should probably use bdsize x gdsize instead # That would be consistent with some existing grids grid = BlacsGrid(ksl.block_comm, self.gd.comm.size, self.bd.comm.size) blocksize1 = -(-nao // grid.nprow) blocksize2 = -(-nao // grid.npcol) # XXX what are rows and columns actually? desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2) rhoT_umm = [] ET_umm = [] redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc) Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): self.timer.start('Get density matrix') rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor) rhoT_umm.append(rhoT_mm) self.timer.stop('Get density matrix') self.timer.start('Potential') rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc) vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_mM, kpt.q) del rhoT_mM self.timer.stop('Potential') self.timer.start('Get density matrix') for kpt in self.kpt_u: ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM, redistributor) ET_umm.append(ET_mm) self.timer.stop('Get density matrix') M1start = blocksize1 * grid.myrow M2start = blocksize2 * grid.mycol M1stop = min(M1start + blocksize1, nao) M2stop = min(M2start + blocksize2, nao) m1max = M1stop - M1start m2max = M2stop - M2start if not isblacs: # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] * rhoT_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Fkin_av[a, :] += \ 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) del dEdTrhoT_vMM # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Ftheta_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dThetadRE_vMM = (dThetadR_qvMM[kpt.q] * ET_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Ftheta_av[a, :] += \ -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) del dThetadRE_vMM if isblacs: from gpaw.lcao.overlap import TwoCenterIntegralCalculator self.timer.start('Prepare TCI loop') M_a = bfs.M_a Fkin2_av = np.zeros_like(F_av) Ftheta2_av = np.zeros_like(F_av) cell_cv = tci.atoms.cell spos_ac = tci.atoms.get_scaled_positions() % 1.0 overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc, derivative=False) # XXX this is not parallel *AT ALL*. self.timer.start('Get neighbors') nl = tci.atompairs.pairs.neighbors r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv) atompairs = r_and_offset_aao.keys() atompairs.sort() self.timer.stop('Get neighbors') T_expansions = tci.T_expansions Theta_expansions = tci.Theta_expansions P_expansions = tci.P_expansions nq = len(self.kd.ibzk_qc) dH_asp = hamiltonian.dH_asp self.timer.start('broadcast dH') alldH_asp = {} for a in range(len(self.setups)): gdrank = bfs.sphere_a[a].rank if gdrank == gd.rank: dH_sp = dH_asp[a] else: ni = self.setups[a].ni dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) gd.comm.broadcast(dH_sp, gdrank) # okay, now everyone gets copies of dH_sp alldH_asp[a] = dH_sp self.timer.stop('broadcast dH') # This will get sort of hairy. We need to account for some # three-center overlaps, such as: # # a1 # Phi ~a3 a3 ~a3 a2 a2,a1 # < ---- |p > dH <p |Phi > rho # dR # # To this end we will loop over all pairs of atoms (a1, a3), # and then a sub-loop over (a3, a2). from gpaw.lcao.overlap import DerivativeAtomicDisplacement class Displacement(DerivativeAtomicDisplacement): def __init__(self, a1, a2, R_c, offset): phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) DerivativeAtomicDisplacement.__init__( self, None, a1, a2, R_c, offset, phases) # Cache of Displacement objects with spherical harmonics with # evaluated spherical harmonics. disp_aao = {} def get_displacements(a1, a2, maxdistance): # XXX the way maxdistance is handled it can lead to # bad caching when different maxdistances are passed # to subsequent calls with same pair of atoms disp_o = disp_aao.get((a1, a2)) if disp_o is None: disp_o = [] for R_c, offset in r_and_offset_aao[(a1, a2)]: if np.linalg.norm(R_c) > maxdistance: continue disp = Displacement(a1, a2, R_c, offset) disp_o.append(disp) disp_aao[(a1, a2)] = disp_o return [disp for disp in disp_o if disp.r < maxdistance] self.timer.stop('Prepare TCI loop') self.timer.start('Not so complicated loop') for (a1, a2) in atompairs: if a1 >= a2: # Actually this leads to bad load balance. # We should take a1 > a2 or a1 < a2 equally many times. # Maybe decide which of these choices # depending on whether a2 % 1 == 0 continue m1start = M_a[a1] - M1start m2start = M_a[a2] - M2start if m1start >= blocksize1 or m2start >= blocksize2: continue # (we have only one block per CPU) T_expansion = T_expansions.get(a1, a2) Theta_expansion = Theta_expansions.get(a1, a2) #P_expansion = P_expansions.get(a1, a2) nm1, nm2 = T_expansion.shape m1stop = min(m1start + nm1, m1max) m2stop = min(m2start + nm2, m2max) if m1stop <= 0 or m2stop <= 0: continue m1start = max(m1start, 0) m2start = max(m2start, 0) J1start = max(0, M1start - M_a[a1]) J2start = max(0, M2start - M_a[a2]) M1stop = J1start + m1stop - m1start J2stop = J2start + m2stop - m2start dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype) dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype) disp_o = get_displacements(a1, a2, phicutoff_a[a1] + phicutoff_a[a2]) for disp in disp_o: disp.evaluate_overlap(T_expansion, dTdR_qvmm) disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm) for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] Fkin_v = 2.0 * ( dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * ET_mm[np.newaxis]).real.sum(-1).sum(-1) Fkin2_av[a1] += Fkin_v Fkin2_av[a2] -= Fkin_v Ftheta2_av[a1] -= Ftheta_v Ftheta2_av[a2] += Ftheta_v Fkin_av = Fkin2_av Ftheta_av = Ftheta2_av self.timer.stop('Not so complicated loop') dHP_and_dSP_aauim = {} a2values = {} for (a2, a3) in atompairs: if not a3 in a2values: a2values[a3] = [] a2values[a3].append(a2) Fatom_av = np.zeros_like(F_av) Frho_av = np.zeros_like(F_av) self.timer.start('Complicated loop') for a1, a3 in atompairs: if a1 == a3: # Functions reside on same atom, so their overlap # does not change when atom is displaced continue m1start = M_a[a1] - M1start if m1start >= blocksize1: continue P_expansion = P_expansions.get(a1, a3) nm1 = P_expansion.shape[0] m1stop = min(m1start + nm1, m1max) if m1stop <= 0: continue m1start = max(m1start, 0) J1start = max(0, M1start - M_a[a1]) J1stop = J1start + m1stop - m1start disp_o = get_displacements(a1, a3, phicutoff_a[a1] + pcutoff_a[a3]) if len(disp_o) == 0: continue dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype) for disp in disp_o: disp.evaluate_overlap(P_expansion, dPdR_qvmi) dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() for a2 in a2values[a3]: m2start = M_a[a2] - M2start if m2start >= blocksize2: continue P_expansion2 = P_expansions.get(a2, a3) nm2 = P_expansion2.shape[0] m2stop = min(m2start + nm2, m2max) if m2stop <= 0: continue disp_o = get_displacements(a2, a3, phicutoff_a[a2] + pcutoff_a[a3]) if len(disp_o) == 0: continue m2start = max(m2start, 0) J2start = max(0, M2start - M_a[a2]) J2stop = J2start + m2stop - m2start if (a2, a3) in dHP_and_dSP_aauim: dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)] else: P_qmi = P_expansion2.zeros((nq, ), dtype=dtype) for disp in disp_o: disp.evaluate_direct(P_expansion2, P_qmi) P_qmi = P_qmi[:, J2start:J2stop].copy() dH_sp = alldH_asp[a3] dS_ii = self.setups[a3].dO_ii dHP_uim = [] dSP_uim = [] for u, kpt in enumerate(self.kpt_u): dH_ii = unpack(dH_sp[kpt.s]) dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() # XXX only need nq of these dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() dHP_uim.append(dHP_im) dSP_uim.append(dSP_im) dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) Fatom_c = 2.0 * (dPdRdHP_vmm * rhoT_mm).real.sum(-1).sum(-1) Frho_c = 2.0 * (dPdRdSP_vmm * ET_mm).real.sum(-1).sum(-1) Fatom_av[a1] += Fatom_c Fatom_av[a3] -= Fatom_c Frho_av[a1] -= Frho_c Frho_av[a3] += Frho_c self.timer.stop('Complicated loop') if not isblacs: # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('Potential') Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_uMM[u], kpt.q) self.timer.stop('Potential') # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('Paw correction') Frho_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((mynao, nao), dtype) ZE_MM = None for b in my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, dtype) dOP_iM = np.zeros((setup.ni, nao), dtype) gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c') for v in range(3): gemm(1.0, dOP_iM, dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0, work_MM, 'n') ZE_MM = (work_MM * ET_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term del work_MM, ZE_MM self.timer.stop('Paw correction') # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('Atomic Hamiltonian force') Fatom_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): for b in my_atom_indices: H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype) HP_iM = gemmdot( H_ii, np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj())) for v in range(3): dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop] ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('Atomic Hamiltonian force') F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av self.timer.start('Wait for sum') ksl.orbital_comm.sum(F_av) if self.bd.comm.rank == 0: self.kd.comm.sum(F_av, 0) self.timer.stop('Wait for sum') self.timer.stop('LCAO forces')