def __init__(self, sl_lrtddft, nrows, lr_comms): self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft) self.lr_comms = lr_comms # original grid, ie, how matrix is stored self.matrix_grid = BlacsGrid(self.lr_comms.parent_comm, self.lr_comms.dd_comm.size, self.lr_comms.eh_comm.size) # diagonalization grid self.diag_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs, self.nprocs) # ----------------------------------------------------------------- # for SCALAPACK we need TRANSPOSED MATRIX (and vector) # # M = rows, N = cols M = nrows N = nrows mb = 1 nb = 1 self.matrix_descr = self.matrix_grid.new_descriptor(N, M, nb, mb) bs = self.block_size self.diag_descr = self.diag_grid.new_descriptor(N, M, bs, bs) self.diag_in_redist = Redistributor(self.lr_comms.parent_comm, self.matrix_descr, self.diag_descr) self.diag_out_redist = Redistributor(self.lr_comms.parent_comm, self.diag_descr, self.matrix_descr)
def scalapack_diagonalize(self, H_sS): mb = 32 N = self.nS g1 = BlacsGrid(world, size, 1) g2 = BlacsGrid(world, size // 2, 2) nndesc1 = g1.new_descriptor(N, N, self.nS_local, N) nndesc2 = g2.new_descriptor(N, N, mb, mb) A_ss = nndesc2.empty(dtype=H_sS.dtype) redistributor = Redistributor(world, nndesc1, nndesc2) redistributor.redistribute(H_sS, A_ss) # diagonalize v_ss = nndesc2.zeros(dtype=A_ss.dtype) w_S = np.zeros(N, dtype=float) nndesc2.diagonalize_dc(A_ss, v_ss, w_S, 'L') # distribute the eigenvectors to master v_sS = np.zeros_like(H_sS) redistributor = Redistributor(world, nndesc2, nndesc1) redistributor.redistribute(v_ss, v_sS) # v2_SS = np.zeros((self.nS, self.nS), dtype=complex) # world.all_gather(v_sS, v2_SS) return w_S, v_sS.conj()
def __init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, nao, timer=nulltimer): BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, timer) nbands = bd.nbands self.blocksize = blocksize self.mynbands = mynbands = bd.mynbands self.orbital_comm = self.bd.comm self.naoblocksize = naoblocksize = -((-nao) // self.orbital_comm.size) self.nao = nao # Range of basis functions for BLACS distribution of matrices: self.Mmax = nao self.Mstart = bd.comm.rank * naoblocksize self.Mstop = min(self.Mstart + naoblocksize, self.Mmax) self.mynao = self.Mstop - self.Mstart # Column layout for one matrix per band rank: self.columngrid = BlacsGrid(bd.comm, bd.comm.size, 1) self.mMdescriptor = self.columngrid.new_descriptor( nao, nao, naoblocksize, nao) self.nMdescriptor = self.columngrid.new_descriptor( nbands, nao, mynbands, nao) #parallelprint(world, (mynao, self.mMdescriptor.shape)) # Column layout for one matrix in total (only on grid masters): self.single_column_grid = BlacsGrid(self.column_comm, bd.comm.size, 1) self.mM_unique_descriptor = self.single_column_grid.new_descriptor( \ nao, nao, naoblocksize, nao) # nM_unique_descriptor is meant to hold the coefficients after # diagonalization. BLACS requires it to be nao-by-nao, but # we only fill meaningful data into the first nbands columns. # # The array will then be trimmed and broadcast across # the grid descriptor's communicator. self.nM_unique_descriptor = self.single_column_grid.new_descriptor( \ nbands, nao, mynbands, nao) # Fully blocked grid for diagonalization with many CPUs: self.mmdescriptor = self.blockgrid.new_descriptor( nao, nao, blocksize, blocksize) #self.nMdescriptor = nMdescriptor self.mM2mm = Redistributor(self.block_comm, self.mM_unique_descriptor, self.mmdescriptor) self.mm2nM = Redistributor(self.block_comm, self.mmdescriptor, self.nM_unique_descriptor)
def redistribute_H(self, H_sS): g1 = BlacsGrid(world, size, 1) g2 = BlacsGrid(world, 1, size) N = self.nS nndesc1 = g1.new_descriptor(N, N, self.nS_local, N) nndesc2 = g2.new_descriptor(N, N, N, self.nS_local) H_Ss = nndesc2.empty(dtype=H_sS.dtype) redistributor = Redistributor(world, nndesc1, nndesc2) redistributor.redistribute(H_sS, H_Ss) return H_Ss
def diagonalize(self): print('Diagonalizing Hamiltonian', file=self.fd) """The t and T represent local and global eigenstates indices respectively """ # Non-Hermitian matrix can only use linalg.eig if not self.td: print(' Using numpy.linalg.eig...', file=self.fd) print(' Eliminated %s pair orbitals' % len(self.excludef_S), file=self.fd) self.H_SS = self.collect_A_SS(self.H_sS) self.w_T = np.zeros(self.nS - len(self.excludef_S), complex) if world.rank == 0: self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=0) self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=1) self.w_T, self.v_ST = np.linalg.eig(self.H_SS) world.broadcast(self.w_T, 0) self.df_S = np.delete(self.df_S, self.excludef_S) self.rhoG0_S = np.delete(self.rhoG0_S, self.excludef_S) # Here the eigenvectors are returned as complex conjugated rows else: if world.size == 1: print(' Using lapack...', file=self.fd) from gpaw.utilities.lapack import diagonalize self.w_T = np.zeros(self.nS) diagonalize(self.H_sS, self.w_T) self.v_St = self.H_sS.conj().T else: print(' Using scalapack...', file=self.fd) nS = self.nS ns = -(-self.kd.nbzkpts // world.size) * (self.nv * self.nc * self.spins * (self.spinors + 1)**2) grid = BlacsGrid(world, world.size, 1) desc = grid.new_descriptor(nS, nS, ns, nS) desc2 = grid.new_descriptor(nS, nS, 2, 2) H_tmp = desc2.zeros(dtype=complex) r = Redistributor(world, desc, desc2) r.redistribute(self.H_sS, H_tmp) self.w_T = np.empty(nS) v_tmp = desc2.empty(dtype=complex) desc2.diagonalize_dc(H_tmp, v_tmp, self.w_T) r = Redistributor(grid.comm, desc2, desc) self.v_St = desc.zeros(dtype=complex) r.redistribute(v_tmp, self.v_St) self.v_St = self.v_St.conj().T if self.write_v and self.td: # Cannot use par_save without td self.par_save('v_TS.ulm', 'v_TS', self.v_St.T) return
def redistribute(self, in_wGG, out_x=None): """Redistribute array. Switch between two kinds of parallel distributions: 1) parallel over G-vectors (second dimension of in_wGG) 2) parallel over frequency (first dimension of in_wGG) Returns new array using the memory in the 1-d array out_x. """ comm = self.blockcomm if comm.size == 1: return in_wGG nw = len(self.omega_w) nG = in_wGG.shape[2] mynw = (nw + comm.size - 1) // comm.size mynG = (nG + comm.size - 1) // comm.size bg1 = BlacsGrid(comm, comm.size, 1) bg2 = BlacsGrid(comm, 1, comm.size) md1 = BlacsDescriptor(bg1, nw, nG**2, mynw, nG**2) md2 = BlacsDescriptor(bg2, nw, nG**2, nw, mynG * nG) if len(in_wGG) == nw: mdin = md2 mdout = md1 else: mdin = md1 mdout = md2 r = Redistributor(comm, mdin, mdout) outshape = (mdout.shape[0], mdout.shape[1] // nG, nG) if out_x is None: out_wGG = np.empty(outshape, complex) else: out_wGG = out_x[:np.product(outshape)].reshape(outshape) r.redistribute(in_wGG.reshape(mdin.shape), out_wGG.reshape(mdout.shape)) return out_wGG
def test(comm, M, N, mcpus, ncpus, mb, nb): grid0 = BlacsGrid(comm, 1, 1) desc0 = grid0.new_descriptor(M, N, M, N, 0, 0) A_mn = desc0.zeros(dtype=float) A_mn[:] = comm.size + 1 grid1 = BlacsGrid(comm, mcpus, ncpus) desc1 = grid1.new_descriptor(M, N, mb, nb, 0, 0) # ??? B_mn = desc1.zeros(dtype=float) B_mn[:] = comm.rank if comm.rank == 0: msg = 'Slices of global matrix indices by rank' print(msg) print('-' * len(msg)) for rank in range(comm.size): comm.barrier() if rank == comm.rank: print('Rank %d:' % rank) last_Mstart = -1 for Mstart, Mstop, Nstart, Nstop, block in desc1.my_blocks(B_mn): if Mstart > last_Mstart and last_Mstart >= 0: print() print('[%3d:%3d, %3d:%3d]' % (Mstart, Mstop, Nstart, Nstop), end=' ') last_Mstart = Mstart assert (block == comm.rank).all() #print block #print print() print() comm.barrier() redistributor = Redistributor(comm, desc1, desc0) redistributor.redistribute(B_mn, A_mn) if comm.rank == 0: msg = 'Rank where each element of the global matrix is stored' print(msg) print('-' * len(msg)) print(A_mn)
def __init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, buffer_size=None, timer=nulltimer): BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, timer) self.buffer_size = buffer_size nbands = bd.nbands self.mynbands = mynbands = bd.mynbands self.blocksize = blocksize # 1D layout - columns self.columngrid = BlacsGrid(self.column_comm, 1, bd.comm.size) self.Nndescriptor = self.columngrid.new_descriptor( nbands, nbands, nbands, mynbands) # 2D layout self.nndescriptor = self.blockgrid.new_descriptor( nbands, nbands, blocksize, blocksize) # 1D layout - rows self.rowgrid = BlacsGrid(self.column_comm, bd.comm.size, 1) self.nNdescriptor = self.rowgrid.new_descriptor( nbands, nbands, mynbands, nbands) # Only redistribute filled out half for Hermitian matrices self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor, self.nndescriptor) #self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor, # self.nndescriptor, 'L') #XXX faster but... # Resulting matrix will be used in dgemm which is symmetry obvlious self.nn2nN = Redistributor(self.block_comm, self.nndescriptor, self.nNdescriptor)
def distribute_frequencies(self, chi0_wGG): """Distribute frequencies to all cores.""" world = self.world comm = self.blockcomm if world.size == 1: return chi0_wGG nw = len(self.omega_w) nG = chi0_wGG.shape[2] mynw = (nw + world.size - 1) // world.size mynG = (nG + comm.size - 1) // comm.size wa = min(world.rank * mynw, nw) wb = min(wa + mynw, nw) if self.blockcomm.size == 1: return chi0_wGG[wa:wb].copy() if self.kncomm.rank == 0: bg1 = BlacsGrid(comm, 1, comm.size) in_wGG = chi0_wGG.reshape((nw, -1)) else: bg1 = DryRunBlacsGrid(mpi.serial_comm, 1, 1) in_wGG = np.zeros((0, 0), complex) md1 = BlacsDescriptor(bg1, nw, nG**2, nw, mynG * nG) bg2 = BlacsGrid(world, world.size, 1) md2 = BlacsDescriptor(bg2, nw, nG**2, mynw, nG**2) r = Redistributor(world, md1, md2) shape = (wb - wa, nG, nG) out_wGG = np.empty(shape, complex) r.redistribute(in_wGG, out_wGG.reshape((wb - wa, nG**2))) return out_wGG
def __init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, timer=nulltimer): KohnShamLayouts.__init__(self, gd, bd, block_comm, dtype, timer) # WARNING: Do not create the BlacsGrid on a communicator which does not # contain block_comm.rank = 0. This will break BlacsBandLayouts which # assume eps_M will be broadcast over block_comm. self.blocksize = blocksize self.blockgrid = BlacsGrid(self.block_comm, mcpus, ncpus)
def parallel_eigh(matrixfile, blacsgrid=(4, 2), blocksize=64): """Diagonalize matrix in parallel""" assert np.prod(blacsgrid) == world.size grid = BlacsGrid(world, *blacsgrid) if world.rank == MASTER: H_MM = np.load(matrixfile) assert H_MM.ndim == 2 assert H_MM.shape[0] == H_MM.shape[1] NM = len(H_MM) else: NM = 0 NM = world.sum(NM) # Distribute matrix shape to all nodes # descriptor for the individual blocks block_desc = grid.new_descriptor(NM, NM, blocksize, blocksize) # descriptor for global array on MASTER local_desc = grid.new_descriptor(NM, NM, NM, NM) # Make some dummy array on all the slaves if world.rank != MASTER: H_MM = local_desc.zeros() assert local_desc.check(H_MM) # The local version of the matrix H_mm = block_desc.empty() # Distribute global array to smaller blocks redistributor = Redistributor(world, local_desc, block_desc) redistributor.redistribute(H_MM, H_mm) # Allocate arrays for eigenvalues and -vectors eps_M = np.empty(NM) C_mm = block_desc.empty() block_desc.diagonalize_ex(H_mm, C_mm, eps_M) # Collect eigenvectors on MASTER C_MM = local_desc.empty() redistributor2 = Redistributor(world, block_desc, local_desc) redistributor2.redistribute(C_mm, C_MM) # Return eigenvalues and -vectors on Master if world.rank == MASTER: return eps_M, C_MM else: return None, None
def main(nbands=1000, mprocs=2, mb=64): # Set-up BlacsGrud grid = BlacsGrid(world, mprocs, mprocs) # Create descriptor nndesc = grid.new_descriptor(nbands, nbands, mb, mb) H_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero C_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero eps_N = np.empty((nbands), dtype=float) # replicated array on all MPI tasks # Fill ScaLAPACK array alpha = 0.1 # off-diagonal beta = 75.0 # diagonal uplo = 'L' # lower-triangular scalapack_set(nndesc, H_nn, alpha, beta, uplo) scalapack_zero(nndesc, H_nn, switch_uplo[uplo]) t1 = time() # either interface will work, we recommend use the latter interface # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L') nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) t2 = time() world.broadcast(eps_N, 0) # all MPI tasks now have eps_N world.barrier() # wait for everyone to finish if rank == 0: print('ScaLAPACK diagonalize_dc', t2 - t1) # Create replicated NumPy array diagonal = np.eye(nbands, dtype=float) offdiagonal = np.tril(np.ones((nbands, nbands)), -1) H0 = beta * diagonal + alpha * offdiagonal E0 = np.empty((nbands), dtype=float) t1 = time() diagonalize(H0, E0) t2 = time() if rank == 0: print('LAPACK diagonalize', t2 - t1) delta = abs(E0 - eps_N).max() if rank == 0: print(delta) assert delta < tol
def distribute_MM(wfs, a_MM): ksl = wfs.ksl if not ksl.using_blacs: return a_MM dtype = a_MM.dtype ksl_comm = ksl.block_comm NM = ksl.nao grid = BlacsGrid(ksl_comm, 1, 1) MM_descriptor = grid.new_descriptor(NM, NM, NM, NM) MM2mm = Redistributor(ksl_comm, MM_descriptor, ksl.mmdescriptor) if ksl_comm.rank != 0: a_MM = MM_descriptor.empty(dtype=dtype) a_mm = ksl.mmdescriptor.empty(dtype=dtype) MM2mm.redistribute(a_MM, a_mm) return a_mm
def scal_diagonalize(A, nodes='master'): # Diagonalize matrix A (size N*N) with scalapack # Usage: eps, B = scal_diagonalize(A) # eps and B and the eigenvalues and eigenvectors # nodes = 'master': eigenvectors only available on master node # nodes = 'all': eigenvectors broadcast to all nodes # make sure A is N*N, and hermitian N = A.shape[0] assert A.shape[0] == A.shape[1] for i in range(N): for j in range(i, N): assert A[i, j] == A[j, i].conj() # create blacs descriptor mb = 64 g = BlacsGrid(world, 2, size // 2) nndesc1 = g.new_descriptor(N, N, N, N) nndesc2 = g.new_descriptor(N, N, mb, mb) # distribute A to blacs grid A_ if rank != 0: A = nndesc1.zeros(dtype=A.dtype) A_ = nndesc2.empty(dtype=A.dtype) redistributor = Redistributor(world, nndesc1, nndesc2) redistributor.redistribute(A, A_) # diagonalize B_ = nndesc2.zeros(dtype=A.dtype) eps = np.zeros(N, dtype=A.dtype) nndesc2.diagonalize_dc(A_, B_, eps, 'L') # distribute the eigenvectors to master B = np.zeros_like(A) redistributor = Redistributor(world, nndesc2, nndesc1) redistributor.redistribute(B_, B) if nodes == 'master': return eps, B elif nodes == 'all': if rank != 0: B = np.zeros((N, N)) world.broadcast(B, 0) return eps, B
def collect_wuMM(wfs, a_wuMM, w, s, k): # This function is based on # gpaw/wavefunctions/base.py: WaveFunctions.collect_auxiliary() dtype = a_wuMM[0][0].dtype ksl = wfs.ksl NM = ksl.nao kpt_rank, u = wfs.kd.get_rank_and_index(s, k) ksl_comm = ksl.block_comm if wfs.kd.comm.rank == kpt_rank: a_MM = a_wuMM[w][u] # Collect within blacs grid if ksl.using_blacs: a_mm = a_MM grid = BlacsGrid(ksl_comm, 1, 1) MM_descriptor = grid.new_descriptor(NM, NM, NM, NM) mm2MM = Redistributor(ksl_comm, ksl.mmdescriptor, MM_descriptor) a_MM = MM_descriptor.empty(dtype=dtype) mm2MM.redistribute(a_mm, a_MM) # KSL master send a_MM to the global master if ksl_comm.rank == 0: if kpt_rank == 0: assert wfs.world.rank == 0 # I have it already return a_MM else: wfs.kd.comm.send(a_MM, 0, 2017) return None elif ksl_comm.rank == 0 and kpt_rank != 0: assert wfs.world.rank == 0 a_MM = np.empty((NM, NM), dtype=dtype) wfs.kd.comm.receive(a_MM, kpt_rank, 2017) return a_MM
# in trunk/gpaw/blacs.py for some discussions of # these idiosyncracies. import numpy as np from gpaw.blacs import BlacsGrid, parallelprint from gpaw.mpi import world from gpaw.utilities.scalapack import pblas_simple_gemm gen = np.random.RandomState(42) # simulate state-parallelization=2 and # domain-decomposition.prod=32 B = 2 D = 32 mb = 32 grid = BlacsGrid(world, B, D) nbands = 500 nG = 80**3 nGdesc = grid.new_descriptor(nbands, nG, nbands // B, nG // D) nndesc = grid.new_descriptor(nbands, nbands, mb, mb) psit_nG = gen.rand(*nGdesc.shape) A_nn = gen.rand(*nndesc.shape) assert nGdesc.check(psit_nG) assert nndesc.check(A_nn) parallelprint(world, (A_nn.shape, nndesc.shape, nndesc.lld))
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if dtype == complex: epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: globA = grid.new_descriptor(M, K, M, K) globB = grid.new_descriptor(K, N, K, N) globC = grid.new_descriptor(M, N, M, N) globZ = grid.new_descriptor(K, K, K, K) globX = grid.new_descriptor(K, 1, K, 1) globY = grid.new_descriptor(M, 1, M, 1) globD = grid.new_descriptor(M, K, M, K) globS = grid.new_descriptor(M, M, M, M) globU = grid.new_descriptor(M, M, M, M) globHEC = grid.new_descriptor(K, K, K, K) # print globA.asarray() # Populate matrices local to master: A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape) B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape) D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape) X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape) # HEC = HEA * B HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape) if world.rank == 0: HEA0 = HEA0 + HEA0.T.conjugate() # Make H0 hermitean HEA0 = np.ascontiguousarray(HEA0) # Local result matrices Y0 = globY.empty(dtype=dtype) C0 = globC.zeros(dtype=dtype) Z0 = globZ.zeros(dtype=dtype) S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC0 = globB.zeros(dtype=dtype) # Local reference matrix product: if rank == 0: # C0[:] = np.dot(A0, B0) gemm(1.0, B0, A0, 0.0, C0) # gemm(1.0, A0, A0, 0.0, Z0, transa='t') print(A0.shape, Z0.shape) Z0[:] = np.dot(A0.T, A0) # Y0[:] = np.dot(A0, X0) gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel()) r2k(1.0, A0, D0, 0.0, S0) rk(1.0, A0, 0.0, U0) HEC0[:] = np.dot(HEA0, B0) sM, sN = HEA0.shape # We don't use upper diagonal for i in range(sM): for j in range(sN): if i < j: HEA0[i][j] = 99999.0 if world.rank == 0: print(HEA0) assert globA.check(A0) and globB.check(B0) and globC.check(C0) assert globX.check(X0) and globY.check(Y0) assert globD.check(D0) and globS.check(S0) and globU.check(U0) # Create distributed destriptors with various block sizes: distA = grid.new_descriptor(M, K, 2, 2) distB = grid.new_descriptor(K, N, 2, 4) distC = grid.new_descriptor(M, N, 3, 2) distZ = grid.new_descriptor(K, K, 5, 7) distX = grid.new_descriptor(K, 1, 4, 1) distY = grid.new_descriptor(M, 1, 3, 1) distD = grid.new_descriptor(M, K, 2, 3) distS = grid.new_descriptor(M, M, 2, 2) distU = grid.new_descriptor(M, M, 2, 2) distHE = grid.new_descriptor(K, K, 2, 4) # Distributed matrices: A = distA.empty(dtype=dtype) B = distB.empty(dtype=dtype) C = distC.empty(dtype=dtype) Z = distZ.empty(dtype=dtype) X = distX.empty(dtype=dtype) Y = distY.empty(dtype=dtype) D = distD.empty(dtype=dtype) S = distS.zeros(dtype=dtype) # zeros needed for rank-updates U = distU.zeros(dtype=dtype) # zeros needed for rank-updates HEC = distB.zeros(dtype=dtype) HEA = distHE.zeros(dtype=dtype) Redistributor(world, globA, distA).redistribute(A0, A) Redistributor(world, globB, distB).redistribute(B0, B) Redistributor(world, globX, distX).redistribute(X0, X) Redistributor(world, globD, distD).redistribute(D0, D) Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA) pblas_simple_gemm(distA, distB, distC, A, B, C) pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T') pblas_simple_gemv(distA, distX, distY, A, X, Y) pblas_simple_r2k(distA, distD, distS, A, D, S) pblas_simple_rk(distA, distU, A, U) pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L') # Collect result back on master C1 = globC.empty(dtype=dtype) Y1 = globY.empty(dtype=dtype) S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC1 = globB.zeros(dtype=dtype) Redistributor(world, distC, globC).redistribute(C, C1) Redistributor(world, distY, globY).redistribute(Y, Y1) Redistributor(world, distS, globS).redistribute(S, S1) Redistributor(world, distU, globU).redistribute(U, U1) Redistributor(world, distB, globB).redistribute(HEC, HEC1) if rank == 0: gemm_err = abs(C1 - C0).max() gemv_err = abs(Y1 - Y0).max() r2k_err = abs(S1 - S0).max() rk_err = abs(U1 - U0).max() hemm_err = abs(HEC1 - HEC0).max() print('gemm err', gemm_err) print('gemv err', gemv_err) print('r2k err', r2k_err) print('rk_err', rk_err) print('hemm_err', hemm_err) else: gemm_err = 0.0 gemv_err = 0.0 r2k_err = 0.0 rk_err = 0.0 hemm_err = 0.0 gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu gemv_err = world.sum(gemv_err) r2k_err = world.sum(r2k_err) rk_err = world.sum(rk_err) hemm_err = world.sum(hemm_err) equal(gemm_err, 0, tol) equal(gemv_err, 0, tol) equal(r2k_err, 0, tol) equal(rk_err, 0, tol) equal(hemm_err, 0, tol)
def calculate_rkernel(self): gd = self.gd ng_c = gd.N_c cell_cv = gd.cell_cv icell_cv = 2 * np.pi * np.linalg.inv(cell_cv) vol = np.linalg.det(cell_cv) ns = self.calc.wfs.nspins n_g = self.n_g # density on rough grid fx_g = ns * self.get_fxc_g(n_g) # local exchange kernel qc_g = (-4 * np.pi * ns / fx_g)**0.5 # cutoff functional flocal_g = qc_g**3 * fx_g / (6 * np.pi**2) # ren. x-kernel for r=r' Vlocal_g = 2 * qc_g / np.pi # ren. Hartree kernel for r=r' ng = np.prod(ng_c) # number of grid points r_vg = gd.get_grid_point_coordinates() rx_g = r_vg[0].flatten() ry_g = r_vg[1].flatten() rz_g = r_vg[2].flatten() prnt(' %d grid points and %d plane waves at the Gamma point' % (ng, self.pd.ngmax), file=self.fd) # Unit cells R_Rv = [] weight_R = [] nR_v = self.unit_cells nR = np.prod(nR_v) for i in range(-nR_v[0] + 1, nR_v[0]): for j in range(-nR_v[1] + 1, nR_v[1]): for h in range(-nR_v[2] + 1, nR_v[2]): R_Rv.append(i * cell_cv[0] + j * cell_cv[1] + h * cell_cv[2]) weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) * (nR_v[2] - abs(h)) / float(nR)) if nR > 1: # with more than one unit cell only the exchange kernel is # calculated on the grid. The bare Coulomb kernel is added # in PW basis and Vlocal_g only the exchange part dv = self.calc.density.gd.dv gc = (3 * dv / 4 / np.pi)**(1 / 3.) Vlocal_g -= 2 * np.pi * gc**2 / dv prnt(' Lattice point sampling: ' + '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) + ' Reduced to %s lattice points' % len(R_Rv), file=self.fd) l_g_size = -(-ng // mpi.world.size) l_g_range = range(mpi.world.rank * l_g_size, min((mpi.world.rank + 1) * l_g_size, ng)) fhxc_qsGr = {} for iq in range(len(self.ibzq_qc)): fhxc_qsGr[iq] = np.zeros( (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex) inv_error = np.seterr() np.seterr(invalid='ignore') np.seterr(divide='ignore') t0 = time() # Loop over Lattice points for i, R_v in enumerate(R_Rv): # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0]) if i == 1: prnt(' Finished 1 cell in %s seconds' % int(time() - t0) + ' - estimated %s seconds left' % int( (len(R_Rv) - 1) * (time() - t0)), file=self.fd) self.fd.flush() if len(R_Rv) > 5: if (i + 1) % (len(R_Rv) / 5 + 1) == 0: prnt(' Finished %s cells in %s seconds' % (i, int(time() - t0)) + ' - estimated %s seconds left' % int( (len(R_Rv) - i) * (time() - t0) / i), file=self.fd) self.fd.flush() for g in l_g_range: rx = rx_g[g] + R_v[0] ry = ry_g[g] + R_v[1] rz = rz_g[g] + R_v[2] # |r-r'-R_i| rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 + (r_vg[2] - rz)**2)**0.5 n_av = (n_g + n_g.flatten()[g]) / 2. fx_g = ns * self.get_fxc_g(n_av, index=g) qc_g = (-4 * np.pi * ns / fx_g)**0.5 x = qc_g * rr osc_x = np.sin(x) - x * np.cos(x) f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3) if nR > 1: # include only exchange part of the kernel here V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr else: # include the full kernel (also hartree part) V_rr = (sici(x)[0] * 2 / np.pi) / rr # Terms with r = r' if (np.abs(R_v) < 0.001).all(): tmp_flat = f_rr.flatten() tmp_flat[g] = flocal_g.flatten()[g] f_rr = tmp_flat.reshape(ng_c) tmp_flat = V_rr.flatten() tmp_flat[g] = Vlocal_g.flatten()[g] V_rr = tmp_flat.reshape(ng_c) del tmp_flat f_rr[np.where(n_av < self.density_cut)] = 0.0 V_rr[np.where(n_av < self.density_cut)] = 0.0 f_rr *= weight_R[i] V_rr *= weight_R[i] # r-r'-R_i r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz]) # Fourier transform of r for iq, q in enumerate(self.ibzq_qc): q_v = np.dot(q, icell_cv) e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0)) f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q if ns == 2: f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q mpi.world.barrier() np.seterr(**inv_error) for iq, q in enumerate(self.ibzq_qc): npw = len(self.pd.G2_qG[iq]) fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex) l_pw_size = -(-npw // mpi.world.size) # parallelize over PW below l_pw_range = range(mpi.world.rank * l_pw_size, min((mpi.world.rank + 1) * l_pw_size, npw)) if mpi.world.size > 1: # redistribute grid and plane waves in fhxc_qsGr[iq] bg1 = BlacsGrid(mpi.world, 1, mpi.world.size) bg2 = BlacsGrid(mpi.world, mpi.world.size, 1) bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size)) bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng) fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) if ns == 2: Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex) r = Redistributor(bg1.comm, bd1, bd2) r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng) if ns == 2: r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng) else: fhxc_Glr = fhxc_qsGr[iq][0] if ns == 2: Koff_Glr = fhxc_qsGr[iq][1] # Fourier transform of r' for iG in range(len(l_pw_range)): f_g = fhxc_Glr[iG].reshape(ng_c) f_G = self.pd.fft(f_g.conj(), iq) * vol / ng fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj() if ns == 2: v_g = Koff_Glr[iG].reshape(ng_c) v_G = self.pd.fft(v_g.conj(), iq) * vol / ng fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj() if ns == 2: # f_00 = f_11 and f_01 = f_10 fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw] fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw] mpi.world.sum(fhxc_sGsG) fhxc_sGsG /= vol if mpi.rank == 0: w = Writer('fhxc_%s_%s_%s_%s.gpw' % (self.tag, self.xc, self.ecut, iq)) w.dimension('sG', ns * npw) w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex) if nR > 1: # add Hartree kernel evaluated in PW basis Gq2_G = self.pd.G2_qG[iq] if (q == 0).all(): Gq2_G[0] = 1. vq_G = 4 * np.pi / Gq2_G fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns)) w.fill(fhxc_sGsG) w.close() mpi.world.barrier() prnt(file=self.fd)
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if (dtype == complex): epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: glob = grid.new_descriptor(N, N, N, N) # print globA.asarray() # Populate matrices local to master: H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) C0 = glob.empty(dtype=dtype) if rank == 0: # Complex case must have real numbers on the diagonal. # We make a simple complex Hermitian matrix below. H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) + 0.3 * np.tri(N, N, k=-1)) S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) + 0.4 * np.tri(N, N, k=-1)) # Make matrices symmetric rk(1.0, H0.copy(), 0.0, H0) rk(1.0, S0.copy(), 0.0, S0) # Overlap matrix must be semi-positive definite S0 = S0 + 50.0 * np.eye(N, N, 0) # Hamiltonian is usually diagonally dominant H0 = H0 + 75.0 * np.eye(N, N, 0) C0 = S0.copy() S0_inv = S0.copy() # Local result matrices W0 = np.empty((N), dtype=float) W0_g = np.empty((N), dtype=float) # Calculate eigenvalues / other serial results if rank == 0: diagonalize(H0.copy(), W0) general_diagonalize(H0.copy(), W0_g, S0.copy()) inverse_cholesky(C0) # result returned in lower triangle tri2full(S0_inv, 'L') S0_inv = inv(S0_inv) # tri2full(C0) # symmetrize assert glob.check(H0) and glob.check(S0) and glob.check(C0) # Create distributed destriptors with various block sizes: dist = grid.new_descriptor(N, N, 8, 8) # Distributed matrices: # We can use empty here, but end up with garbage on # on the other half of the triangle when we redistribute. # This is fine because ScaLAPACK does not care. H = dist.empty(dtype=dtype) S = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) Z = dist.empty(dtype=dtype) C = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) # Eigenvalues are non-BLACS matrices W = np.empty((N), dtype=float) W_dc = np.empty((N), dtype=float) W_mr3 = np.empty((N), dtype=float) W_g = np.empty((N), dtype=float) W_g_dc = np.empty((N), dtype=float) W_g_mr3 = np.empty((N), dtype=float) Glob2dist = Redistributor(world, glob, dist) Glob2dist.redistribute(H0, H, uplo='L') Glob2dist.redistribute(S0, S, uplo='L') Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten Glob2dist.redistribute(S0, Sinv, uplo='L') # we don't test the expert drivers anymore since there # might be a buffer overflow error ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L') scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L') ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L') ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L') scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L') ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L') scalapack_inverse_cholesky(dist, C, 'L') if dtype == complex: # Only supported for complex for now scalapack_inverse(dist, Sinv, 'L') # Undo redistribute C_test = glob.empty(dtype=dtype) Sinv_test = glob.empty(dtype=dtype) Dist2glob = Redistributor(world, dist, glob) Dist2glob.redistribute(C, C_test) Dist2glob.redistribute(Sinv, Sinv_test) if rank == 0: ## diag_ex_err = abs(W - W0).max() diag_dc_err = abs(W_dc - W0).max() ## diag_mr3_err = abs(W_mr3 - W0).max() ## general_diag_ex_err = abs(W_g - W0_g).max() general_diag_dc_err = abs(W_g_dc - W0_g).max() ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max() inverse_chol_err = abs(C_test - C0).max() tri2full(Sinv_test, 'L') inverse_err = abs(Sinv_test - S0_inv).max() ## print 'diagonalize ex err', diag_ex_err print('diagonalize dc err', diag_dc_err) ## print 'diagonalize mr3 err', diag_mr3_err ## print 'general diagonalize ex err', general_diag_ex_err print('general diagonalize dc err', general_diag_dc_err) ## print 'general diagonalize mr3 err', general_diag_mr3_err print('inverse chol err', inverse_chol_err) if dtype == complex: print('inverse err', inverse_err) else: ## diag_ex_err = 0.0 diag_dc_err = 0.0 ## diag_mr3_err = 0.0 ## general_diag_ex_err = 0.0 general_diag_dc_err = 0.0 ## general_diag_mr3_err = 0.0 inverse_chol_err = 0.0 inverse_err = 0.0 # We don't like exceptions on only one cpu ## diag_ex_err = world.sum(diag_ex_err) diag_dc_err = world.sum(diag_dc_err) ## diag_mr3_err = world.sum(diag_mr3_err) ## general_diag_ex_err = world.sum(general_diag_ex_err) general_diag_dc_err = world.sum(general_diag_dc_err) ## general_diag_mr3_err = world.sum(general_diag_mr3_err) inverse_chol_err = world.sum(inverse_chol_err) inverse_err = world.sum(inverse_err) ## assert diag_ex_err < tol assert diag_dc_err < tol ## assert diag_mr3_err < tol ## assert general_diag_ex_err < tol assert general_diag_dc_err < tol ## assert general_diag_mr3_err < tol assert inverse_chol_err < tol if dtype == complex: assert inverse_err < tol
def __init__(self, sl_lrtddft, nrows, lr_comms): self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft) self.lr_comms = lr_comms # for SCALAPACK we need TRANSPOSED MATRIX (and vector) # # ----------------------------------------------------------------- # matrix # original grid, ie, how matrix is stored self.orig_matrix_grid = BlacsGrid(self.lr_comms.parent_comm, self.lr_comms.dd_comm.size, self.lr_comms.eh_comm.size) # solve grid self.solve_matrix_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs, self.nprocs) # M = rows, N = cols M = nrows * 4 N = nrows * 4 mb = 4 nb = 4 self.orig_matrix_descr = self.orig_matrix_grid.new_descriptor( N, M, nb, mb) bs = self.block_size self.solve_matrix_descr = self.solve_matrix_grid.new_descriptor( N, M, bs, bs) self.matrix_in_redist = Redistributor(self.lr_comms.parent_comm, self.orig_matrix_descr, self.solve_matrix_descr) # ----------------------------------------------------------------- # vector # original grid, ie, how vector is stored self.orig_vector_grid = BlacsGrid( self.lr_comms.parent_comm, 1, (self.lr_comms.dd_comm.size * self.lr_comms.eh_comm.size)) # solve grid #self.solve_vector_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs, self.nprocs) # M = rows, N = cols M = nrows * 4 Nrhs = 1 mb = 4 nb = 1 self.orig_vector_descr = self.orig_vector_grid.new_descriptor( Nrhs, M, nb, mb) bs = self.block_size self.solve_vector_descr = self.solve_matrix_grid.new_descriptor( Nrhs, M, 1, bs) self.vector_in_redist = Redistributor(self.lr_comms.parent_comm, self.orig_vector_descr, self.solve_vector_descr) self.vector_out_redist = Redistributor(self.lr_comms.parent_comm, self.solve_vector_descr, self.orig_vector_descr)
import numpy as np from gpaw.utilities.elpa import LibElpa from gpaw.blacs import BlacsGrid from gpaw.mpi import world rng = np.random.RandomState(87878787) if world.size == 1: shape = 1, 1 else: shape = world.size // 2, 2 bg = BlacsGrid(world, *shape) M = 8 blocksize = 2 desc = bg.new_descriptor(M, M, blocksize, blocksize) sdesc = desc.as_serial() Aserial = sdesc.zeros() if world.rank == 0: Aserial[:] = rng.rand(*Aserial.shape) Aserial += Aserial.T.copy() A = desc.distribute_from_master(Aserial) C1 = desc.zeros() C2 = desc.zeros() eps1 = np.zeros(M) eps2 = np.zeros(M) elpa = LibElpa(desc) print(elpa)
def __init__(self, sl_lrtddft, nkq, dd_comm, eh_comm): mcpus, ncpus, blocksize = tuple(sl_lrtddft) self.world = eh_comm.parent self.dd_comm = dd_comm if self.world is None: self.world = self.dd_comm # All the ranks within domain communicator contain the omega matrix # construct new communicator only on domain masters eh_ranks = np.arange(eh_comm.size) * dd_comm.size self.eh_comm2 = self.world.new_communicator(eh_ranks) self.eh_grid = BlacsGrid(self.eh_comm2, eh_comm.size, 1) self.eh_descr = self.eh_grid.new_descriptor(nkq, nkq, 1, nkq) self.diag_grid = BlacsGrid(self.world, mcpus, ncpus) self.diag_descr = self.diag_grid.new_descriptor( nkq, nkq, blocksize, blocksize) self.redistributor_in = Redistributor(self.world, self.eh_descr, self.diag_descr) self.redistributor_out = Redistributor(self.world, self.diag_descr, self.eh_descr) """ # ----------------------------------------------------------------- # for SCALAPACK we need TRANSPOSED MATRIX (and vector) # ----------------------------------------------------------------- # M = rows, N = cols M = nkq*4; N = nkq*4; mb = nkq*4; nb = 4; Nrhs = 1 # Matrix, mp=1, np=eh_comm.size self.eh_grid2a = BlacsGrid(self.eh_comm2, eh_comm.size, 1) # Vector, mp=eh_comm.size, np=1 self.eh_grid2b = BlacsGrid(self.eh_comm2, 1, eh_comm.size) self.eh_descr2a = self.eh_grid2a.new_descriptor(N, M, nb, mb) self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N, 1, nb) self.solve_descr2a =self.diag_grid.new_descriptor(N, M, blocksize, blocksize) self.solve_descr2b =self.diag_grid.new_descriptor(Nrhs, N, 1, blocksize) self.redist_solve_in_2a = Redistributor(self.world, self.eh_descr2a, self.solve_descr2a) self.redist_solve_in_2b = Redistributor(self.world, self.eh_descr2b, self.solve_descr2b) self.redist_solve_out_2a = Redistributor(self.world, self.solve_descr2a, self.eh_descr2a) self.redist_solve_out_2b = Redistributor(self.world, self.solve_descr2b, self.eh_descr2b) """ # ----------------------------------------------------------------- # for SCALAPACK we need TRANSPOSED MATRIX (and vector) # ----------------------------------------------------------------- # M = rows, N = cols M = nkq * 4 N = nkq * 4 mb = 4 nb = 4 Nrhs = 1 # Matrix, mp=1, np=eh_comm.size self.eh_grid2a = BlacsGrid(self.world, dd_comm.size, eh_comm.size) # Vector, mp=eh_comm.size, np=1 self.eh_grid2b = BlacsGrid(self.world, 1, dd_comm.size * eh_comm.size) self.eh_descr2a = self.eh_grid2a.new_descriptor(N, M, nb, mb) self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N, Nrhs, nb) self.solve_descr2a = self.diag_grid.new_descriptor( N, M, blocksize, blocksize) self.solve_descr2b = self.diag_grid.new_descriptor( Nrhs, N, Nrhs, blocksize) self.redist_solve_in_2a = Redistributor(self.world, self.eh_descr2a, self.solve_descr2a) self.redist_solve_in_2b = Redistributor(self.world, self.eh_descr2b, self.solve_descr2b) self.redist_solve_out_2a = Redistributor(self.world, self.solve_descr2a, self.eh_descr2a) self.redist_solve_out_2b = Redistributor(self.world, self.solve_descr2b, self.eh_descr2b)
def diagonalize_full_hamiltonian(self, ham, atoms, occupations, txt, nbands=None, scalapack=None, expert=False): assert self.dtype == complex if nbands is None: nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size else: assert nbands <= self.pd.ngmin if expert: iu = nbands else: iu = None self.bd = bd = BandDescriptor(nbands, self.bd.comm) p = functools.partial(print, file=txt) p('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands)) p('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin, self.pd.ngmax)) mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2 p('Approximate memory usage per core: {0:.3f} MB'.format(mem)) if bd.comm.size > 1: if isinstance(scalapack, (list, tuple)): nprow, npcol, b = scalapack else: nprow = int(round(bd.comm.size**0.5)) while bd.comm.size % nprow != 0: nprow -= 1 npcol = bd.comm.size // nprow b = 64 p('ScaLapack grid: {0}x{1},'.format(nprow, npcol), 'block-size:', b) bg = BlacsGrid(bd.comm, bd.comm.size, 1) bg2 = BlacsGrid(bd.comm, nprow, npcol) scalapack = True else: nprow = npcol = 1 scalapack = False self.pt.set_positions(atoms.get_scaled_positions()) self.kpt_u[0].P_ani = None self.allocate_arrays_for_projections(self.pt.my_atom_indices) myslice = bd.get_slice() pb = ProgressBar(txt) nkpt = len(self.kpt_u) for u, kpt in enumerate(self.kpt_u): pb.update(u / nkpt) npw = len(self.pd.Q_qG[kpt.q]) if scalapack: mynpw = -(-npw // bd.comm.size) md = BlacsDescriptor(bg, npw, npw, mynpw, npw) md2 = BlacsDescriptor(bg2, npw, npw, b, b) else: md = md2 = MatrixDescriptor(npw, npw) with self.timer('Build H and S'): H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md) if scalapack: r = Redistributor(bd.comm, md, md2) H_GG = r.redistribute(H_GG) S_GG = r.redistribute(S_GG) psit_nG = md2.empty(dtype=complex) eps_n = np.empty(npw) with self.timer('Diagonalize'): if not scalapack: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, iu=iu) else: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n) del H_GG, S_GG kpt.eps_n = eps_n[myslice].copy() if scalapack: md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw) r = Redistributor(bd.comm, md2, md3) psit_nG = r.redistribute(psit_nG) kpt.psit_nG = psit_nG[:bd.mynbands].copy() del psit_nG with self.timer('Projections'): self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q) kpt.f_n = None pb.finish() occupations.calculate(self)
def calculate_forces(self, hamiltonian, F_av): self.timer.start('LCAO forces') spos_ac = self.tci.atoms.get_scaled_positions() % 1.0 ksl = self.ksl nao = ksl.nao mynao = ksl.mynao nq = len(self.kd.ibzk_qc) dtype = self.dtype tci = self.tci gd = self.gd bfs = self.basis_functions Mstart = ksl.Mstart Mstop = ksl.Mstop from gpaw.kohnsham_layouts import BlacsOrbitalLayouts isblacs = isinstance(ksl, BlacsOrbitalLayouts) # XXX if not isblacs: self.timer.start('TCI derivative') dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dPdR_aqvMi = {} for a in self.basis_functions.my_atom_indices: ni = self.setups[a].ni dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype) tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM, dPdR_aqvMi) gd.comm.sum(dThetadR_qvMM) gd.comm.sum(dTdR_qvMM) self.timer.stop('TCI derivative') my_atom_indices = bfs.my_atom_indices atom_indices = bfs.atom_indices def _slices(indices): for a in indices: M1 = bfs.M_a[a] - Mstart M2 = M1 + self.setups[a].nao if M2 > 0: yield a, max(0, M1), M2 def slices(): return _slices(atom_indices) def my_slices(): return _slices(my_atom_indices) # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('Initial') rhoT_uMM = [] ET_uMM = [] if not isblacs: if self.kpt_u[0].rho_MM is None: self.timer.start('Get density matrix') for kpt in self.kpt_u: rhoT_MM = ksl.get_transposed_density_matrix( kpt.f_n, kpt.C_nM) rhoT_uMM.append(rhoT_MM) ET_MM = ksl.get_transposed_density_matrix( kpt.f_n * kpt.eps_n, kpt.C_nM) ET_uMM.append(ET_MM) if hasattr(kpt, 'c_on'): # XXX does this work with BLACS/non-BLACS/etc.? assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += ksl.get_transposed_density_matrix_delta(\ d_nn, kpt.C_nM) ET_MM += ksl.get_transposed_density_matrix_delta(\ d_nn * kpt.eps_n, kpt.C_nM) self.timer.stop('Get density matrix') else: rhoT_uMM = [] ET_uMM = [] for kpt in self.kpt_u: H_MM = self.eigensolver.calculate_hamiltonian_matrix(\ hamiltonian, self, kpt) tri2full(H_MM) S_MM = kpt.S_MM.copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() rhoT_uMM.append(rhoT_MM) ET_uMM.append(ET_MM) self.timer.stop('Initial') if isblacs: # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX from gpaw.blacs import BlacsGrid, Redistributor def get_density_matrix(f_n, C_nM, redistributor): rho1_mm = ksl.calculate_blocked_density_matrix(f_n, C_nM).conj() rho_mm = redistributor.redistribute(rho1_mm) return rho_mm pcutoff_a = [ max([pt.get_cutoff() for pt in setup.pt_j]) for setup in self.setups ] phicutoff_a = [ max([phit.get_cutoff() for phit in setup.phit_j]) for setup in self.setups ] # XXX should probably use bdsize x gdsize instead # That would be consistent with some existing grids grid = BlacsGrid(ksl.block_comm, self.gd.comm.size, self.bd.comm.size) blocksize1 = -(-nao // grid.nprow) blocksize2 = -(-nao // grid.npcol) # XXX what are rows and columns actually? desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2) rhoT_umm = [] ET_umm = [] redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc) Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): self.timer.start('Get density matrix') rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor) rhoT_umm.append(rhoT_mm) self.timer.stop('Get density matrix') self.timer.start('Potential') rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc) vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_mM, kpt.q) del rhoT_mM self.timer.stop('Potential') self.timer.start('Get density matrix') for kpt in self.kpt_u: ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM, redistributor) ET_umm.append(ET_mm) self.timer.stop('Get density matrix') M1start = blocksize1 * grid.myrow M2start = blocksize2 * grid.mycol M1stop = min(M1start + blocksize1, nao) M2stop = min(M2start + blocksize2, nao) m1max = M1stop - M1start m2max = M2stop - M2start if not isblacs: # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] * rhoT_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Fkin_av[a, :] += \ 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) del dEdTrhoT_vMM # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Ftheta_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dThetadRE_vMM = (dThetadR_qvMM[kpt.q] * ET_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Ftheta_av[a, :] += \ -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) del dThetadRE_vMM if isblacs: from gpaw.lcao.overlap import TwoCenterIntegralCalculator self.timer.start('Prepare TCI loop') M_a = bfs.M_a Fkin2_av = np.zeros_like(F_av) Ftheta2_av = np.zeros_like(F_av) cell_cv = tci.atoms.cell spos_ac = tci.atoms.get_scaled_positions() % 1.0 overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc, derivative=False) # XXX this is not parallel *AT ALL*. self.timer.start('Get neighbors') nl = tci.atompairs.pairs.neighbors r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv) atompairs = r_and_offset_aao.keys() atompairs.sort() self.timer.stop('Get neighbors') T_expansions = tci.T_expansions Theta_expansions = tci.Theta_expansions P_expansions = tci.P_expansions nq = len(self.kd.ibzk_qc) dH_asp = hamiltonian.dH_asp self.timer.start('broadcast dH') alldH_asp = {} for a in range(len(self.setups)): gdrank = bfs.sphere_a[a].rank if gdrank == gd.rank: dH_sp = dH_asp[a] else: ni = self.setups[a].ni dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) gd.comm.broadcast(dH_sp, gdrank) # okay, now everyone gets copies of dH_sp alldH_asp[a] = dH_sp self.timer.stop('broadcast dH') # This will get sort of hairy. We need to account for some # three-center overlaps, such as: # # a1 # Phi ~a3 a3 ~a3 a2 a2,a1 # < ---- |p > dH <p |Phi > rho # dR # # To this end we will loop over all pairs of atoms (a1, a3), # and then a sub-loop over (a3, a2). from gpaw.lcao.overlap import DerivativeAtomicDisplacement class Displacement(DerivativeAtomicDisplacement): def __init__(self, a1, a2, R_c, offset): phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) DerivativeAtomicDisplacement.__init__( self, None, a1, a2, R_c, offset, phases) # Cache of Displacement objects with spherical harmonics with # evaluated spherical harmonics. disp_aao = {} def get_displacements(a1, a2, maxdistance): # XXX the way maxdistance is handled it can lead to # bad caching when different maxdistances are passed # to subsequent calls with same pair of atoms disp_o = disp_aao.get((a1, a2)) if disp_o is None: disp_o = [] for R_c, offset in r_and_offset_aao[(a1, a2)]: if np.linalg.norm(R_c) > maxdistance: continue disp = Displacement(a1, a2, R_c, offset) disp_o.append(disp) disp_aao[(a1, a2)] = disp_o return [disp for disp in disp_o if disp.r < maxdistance] self.timer.stop('Prepare TCI loop') self.timer.start('Not so complicated loop') for (a1, a2) in atompairs: if a1 >= a2: # Actually this leads to bad load balance. # We should take a1 > a2 or a1 < a2 equally many times. # Maybe decide which of these choices # depending on whether a2 % 1 == 0 continue m1start = M_a[a1] - M1start m2start = M_a[a2] - M2start if m1start >= blocksize1 or m2start >= blocksize2: continue # (we have only one block per CPU) T_expansion = T_expansions.get(a1, a2) Theta_expansion = Theta_expansions.get(a1, a2) #P_expansion = P_expansions.get(a1, a2) nm1, nm2 = T_expansion.shape m1stop = min(m1start + nm1, m1max) m2stop = min(m2start + nm2, m2max) if m1stop <= 0 or m2stop <= 0: continue m1start = max(m1start, 0) m2start = max(m2start, 0) J1start = max(0, M1start - M_a[a1]) J2start = max(0, M2start - M_a[a2]) M1stop = J1start + m1stop - m1start J2stop = J2start + m2stop - m2start dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype) dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype) disp_o = get_displacements(a1, a2, phicutoff_a[a1] + phicutoff_a[a2]) for disp in disp_o: disp.evaluate_overlap(T_expansion, dTdR_qvmm) disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm) for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] Fkin_v = 2.0 * ( dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * ET_mm[np.newaxis]).real.sum(-1).sum(-1) Fkin2_av[a1] += Fkin_v Fkin2_av[a2] -= Fkin_v Ftheta2_av[a1] -= Ftheta_v Ftheta2_av[a2] += Ftheta_v Fkin_av = Fkin2_av Ftheta_av = Ftheta2_av self.timer.stop('Not so complicated loop') dHP_and_dSP_aauim = {} a2values = {} for (a2, a3) in atompairs: if not a3 in a2values: a2values[a3] = [] a2values[a3].append(a2) Fatom_av = np.zeros_like(F_av) Frho_av = np.zeros_like(F_av) self.timer.start('Complicated loop') for a1, a3 in atompairs: if a1 == a3: # Functions reside on same atom, so their overlap # does not change when atom is displaced continue m1start = M_a[a1] - M1start if m1start >= blocksize1: continue P_expansion = P_expansions.get(a1, a3) nm1 = P_expansion.shape[0] m1stop = min(m1start + nm1, m1max) if m1stop <= 0: continue m1start = max(m1start, 0) J1start = max(0, M1start - M_a[a1]) J1stop = J1start + m1stop - m1start disp_o = get_displacements(a1, a3, phicutoff_a[a1] + pcutoff_a[a3]) if len(disp_o) == 0: continue dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype) for disp in disp_o: disp.evaluate_overlap(P_expansion, dPdR_qvmi) dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() for a2 in a2values[a3]: m2start = M_a[a2] - M2start if m2start >= blocksize2: continue P_expansion2 = P_expansions.get(a2, a3) nm2 = P_expansion2.shape[0] m2stop = min(m2start + nm2, m2max) if m2stop <= 0: continue disp_o = get_displacements(a2, a3, phicutoff_a[a2] + pcutoff_a[a3]) if len(disp_o) == 0: continue m2start = max(m2start, 0) J2start = max(0, M2start - M_a[a2]) J2stop = J2start + m2stop - m2start if (a2, a3) in dHP_and_dSP_aauim: dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)] else: P_qmi = P_expansion2.zeros((nq, ), dtype=dtype) for disp in disp_o: disp.evaluate_direct(P_expansion2, P_qmi) P_qmi = P_qmi[:, J2start:J2stop].copy() dH_sp = alldH_asp[a3] dS_ii = self.setups[a3].dO_ii dHP_uim = [] dSP_uim = [] for u, kpt in enumerate(self.kpt_u): dH_ii = unpack(dH_sp[kpt.s]) dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() # XXX only need nq of these dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() dHP_uim.append(dHP_im) dSP_uim.append(dSP_im) dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) Fatom_c = 2.0 * (dPdRdHP_vmm * rhoT_mm).real.sum(-1).sum(-1) Frho_c = 2.0 * (dPdRdSP_vmm * ET_mm).real.sum(-1).sum(-1) Fatom_av[a1] += Fatom_c Fatom_av[a3] -= Fatom_c Frho_av[a1] -= Frho_c Frho_av[a3] += Frho_c self.timer.stop('Complicated loop') if not isblacs: # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('Potential') Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_uMM[u], kpt.q) self.timer.stop('Potential') # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('Paw correction') Frho_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((mynao, nao), dtype) ZE_MM = None for b in my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, dtype) dOP_iM = np.zeros((setup.ni, nao), dtype) gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c') for v in range(3): gemm(1.0, dOP_iM, dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0, work_MM, 'n') ZE_MM = (work_MM * ET_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term del work_MM, ZE_MM self.timer.stop('Paw correction') # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('Atomic Hamiltonian force') Fatom_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): for b in my_atom_indices: H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype) HP_iM = gemmdot( H_ii, np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj())) for v in range(3): dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop] ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('Atomic Hamiltonian force') F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av self.timer.start('Wait for sum') ksl.orbital_comm.sum(F_av) if self.bd.comm.rank == 0: self.kd.comm.sum(F_av, 0) self.timer.stop('Wait for sum') self.timer.stop('LCAO forces')
def diagonalize_full_hamiltonian(self, ham, atoms, occupations, log, nbands=None, ecut=None, scalapack=None, expert=False): if self.dtype != complex: raise ValueError('Your wavefunctions are not complex as ' 'required by the PW diagonalization routine.\n' 'Please supply GPAW(..., dtype=complex, ...) ' 'as an argument to the calculator to enforce ' 'complex wavefunctions.') if nbands is None and ecut is None: nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size elif nbands is None: ecut /= units.Hartree vol = abs(np.linalg.det(self.gd.cell_cv)) nbands = int(vol * ecut**1.5 * 2**0.5 / 3 / pi**2) else: assert nbands <= self.pd.ngmin if expert: iu = nbands else: iu = None self.bd = bd = BandDescriptor(nbands, self.bd.comm) log('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands)) log('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin, self.pd.ngmax)) mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2 log('Approximate memory usage per core: {0:.3f} MB'.format(mem)) if bd.comm.size > 1: if isinstance(scalapack, (list, tuple)): nprow, npcol, b = scalapack else: nprow = int(round(bd.comm.size**0.5)) while bd.comm.size % nprow != 0: nprow -= 1 npcol = bd.comm.size // nprow b = 64 log('ScaLapack grid: {0}x{1},'.format(nprow, npcol), 'block-size:', b) bg = BlacsGrid(bd.comm, bd.comm.size, 1) bg2 = BlacsGrid(bd.comm, nprow, npcol) scalapack = True else: nprow = npcol = 1 scalapack = False self.set_positions(atoms.get_scaled_positions()) self.kpt_u[0].P_ani = None self.allocate_arrays_for_projections(self.pt.my_atom_indices) myslice = bd.get_slice() pb = ProgressBar(log.fd) nkpt = len(self.kpt_u) for u, kpt in enumerate(self.kpt_u): pb.update(u / nkpt) npw = len(self.pd.Q_qG[kpt.q]) if scalapack: mynpw = -(-npw // bd.comm.size) md = BlacsDescriptor(bg, npw, npw, mynpw, npw) md2 = BlacsDescriptor(bg2, npw, npw, b, b) else: md = md2 = MatrixDescriptor(npw, npw) with self.timer('Build H and S'): H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md) if scalapack: r = Redistributor(bd.comm, md, md2) H_GG = r.redistribute(H_GG) S_GG = r.redistribute(S_GG) psit_nG = md2.empty(dtype=complex) eps_n = np.empty(npw) with self.timer('Diagonalize'): if not scalapack: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, iu=iu) else: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n) del H_GG, S_GG kpt.eps_n = eps_n[myslice].copy() if scalapack: md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw) r = Redistributor(bd.comm, md2, md3) psit_nG = r.redistribute(psit_nG) kpt.psit_nG = psit_nG[:bd.mynbands].copy() del psit_nG with self.timer('Projections'): self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q) kpt.f_n = None pb.finish() occupations.calculate(self) return nbands
from gpaw.mpi import world from gpaw.blacs import BlacsGrid, Redistributor if world.size < 2: raise ValueError('Runs on two or more processors') grid = BlacsGrid(world, 2, world.size // 2) desc = grid.new_descriptor(12, 8, 2, 3) a = desc.zeros() a[:] = world.rank subdesc = grid.new_descriptor(7, 7, 2, 2) b = subdesc.zeros() r = Redistributor(grid.comm, desc, subdesc, uplo='G') ia = 3 ja = 2 ib = 1 jb = 1 M = 4 N = 5 r.redistribute(a, b, M, N, ia, ja, ib, jb) a0 = desc.collect_on_master(a) b0 = subdesc.collect_on_master(b) if world.rank == 0: print a0
def get_vchi(self, w_w=None, eta=0.1, q_c=[0.0, 0.0, 0.0], direction=0, ac=1.0, readfile=None, optical=True, write_eig=None): """Returns v * \chi where v is the bare Coulomb interaction""" self.get_bse_matrix(q_c=q_c, direction=direction, ac=ac, readfile=readfile, optical=optical, write_eig=write_eig) w_T = self.w_T rhoG0_S = self.rhoG0_S df_S = self.df_S print('Calculating response function at %s frequency points' % len(w_w), file=self.fd) vchi_w = np.zeros(len(w_w), dtype=complex) if not self.td: C_T = np.zeros(self.nS - len(self.excludef_S), complex) if world.rank == 0: A_T = np.dot(rhoG0_S, self.v_ST) B_T = np.dot(rhoG0_S * df_S, self.v_ST) tmp = np.dot(self.v_ST.conj().T, self.v_ST) overlap_tt = np.linalg.inv(tmp) C_T = np.dot(B_T.conj(), overlap_tt.T) * A_T world.broadcast(C_T, 0) else: A_t = np.dot(rhoG0_S, self.v_St) B_t = np.dot(rhoG0_S * df_S, self.v_St) if world.size == 1: C_T = B_t.conj() * A_t else: Nv = self.nv * (self.spinors + 1) Nc = self.nc * (self.spinors + 1) Ns = self.spins nS = self.nS ns = -(-self.kd.nbzkpts // world.size) * Nv * Nc * Ns grid = BlacsGrid(world, world.size, 1) desc = grid.new_descriptor(nS, 1, ns, 1) C_t = desc.empty(dtype=complex) C_t[:, 0] = B_t.conj() * A_t C_T = desc.collect_on_master(C_t)[:, 0] if world.rank != 0: C_T = np.empty(nS, dtype=complex) world.broadcast(C_T, 0) eta /= Hartree for iw, w in enumerate(w_w / Hartree): tmp_T = 1. / (w - w_T + 1j * eta) vchi_w[iw] += np.dot(tmp_T, C_T) vchi_w *= 4 * np.pi / self.vol if not np.allclose(self.q_c, 0.0): cell_cv = self.calc.wfs.gd.cell_cv B_cv = 2 * np.pi * np.linalg.inv(cell_cv).T q_v = np.dot(q_c, B_cv) vchi_w /= np.dot(q_v, q_v) """Check f-sum rule.""" nv = self.calc.wfs.setups.nvalence dw_w = (w_w[1:] - w_w[:-1]) / Hartree wchi_w = (w_w[1:] * vchi_w[1:] + w_w[:-1] * vchi_w[:-1]) / Hartree / 2 N = -np.dot(dw_w, wchi_w.imag) * self.vol / (2 * np.pi**2) print(file=self.fd) print('Checking f-sum rule:', file=self.fd) print(' Valence = %s, N = %f' % (nv, N), file=self.fd) print(file=self.fd) if write_eig is not None: if world.rank == 0: f = open(write_eig, 'w') print('# %s eigenvalues in eV' % self.mode, file=f) for iw, w in enumerate(self.w_T * Hartree): print('%8d %12.6f %12.16f' % (iw, w.real, C_T[iw].real), file=f) f.close() return vchi_w * ac