def test_trivial_cholesky(self): # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n> I_nn = np.eye(*self.S0_nn.shape) alpha = 1e-3 # shift eigenvalues away from zero SI_nn = self.S0_nn + alpha * I_nn # Try Cholesky decomposition SI_nn = L_nn * L_nn^dag L_nn = np.linalg.cholesky(SI_nn) # |psit_n> -> C_nn |psit_n> , C_nn^(-1) = L_nn^dag # <psit_m|SI|psit_n> -> <psit_m|C_nn^dag SI C_nn|psit_n> = diag(W_n) C_nn = np.linalg.inv(L_nn.T.conj()) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) self.psit_nG = overlap.matrix_multiply(C_nn.T.copy(), self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(D_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_nn, 0) self.bd.comm.broadcast(D_nn, 0) if memstats: self.mem_test = record_memory() # D_nn = C_nn^dag * S_nn * C_nn = I_nn - alpha * C_nn^dag * C_nn D0_nn = I_nn - alpha * np.dot(C_nn.T.conj(), C_nn) self.check_and_plot(D_nn, D0_nn, 6, 'trivial,cholesky') #XXX precision
def test_overlaps_hermitian(self): # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() S_NN = self.ksl.nndescriptor.collect_on_master(S_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert S_NN.shape == (self.bd.nbands,) * 2 S_NN = S_NN.T.copy() # Fortran -> C indexing tri2full(S_NN, 'U') # upper to lower... else: assert S_NN.nbytes == 0 S_NN = np.empty((self.bd.nbands,) * 2, dtype=S_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_NN, 0) self.bd.comm.broadcast(S_NN, 0) self.check_and_plot(S_NN, self.S0_nn, 9, 'overlaps,hermitian')
def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_nn = self.S0_nn if self.dtype == complex: C_nn = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_nn = np.random.normal(size=self.nbands**2) C_nn = C_nn.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_nn,2) world.broadcast(C_nn, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) self.psit_nG = overlap.matrix_multiply(C_nn.T.copy(), self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(D_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_nn, 0) self.bd.comm.broadcast(D_nn, 0) if memstats: self.mem_test = record_memory() # D_nn = C_nn^dag * S_nn * C_nn D0_nn = np.dot(C_nn.T.conj(), np.dot(S_nn, C_nn)) self.check_and_plot(D_nn, D0_nn, 9, 'multiply,randomized')
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() S_NN = self.ksl.nndescriptor.collect_on_master(S_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert S_NN.shape == (self.bd.nbands,) * 2 S_NN = S_NN.T.copy() # Fortran -> C indexing else: assert S_NN.nbytes == 0 S_NN = np.empty((self.bd.nbands,) * 2, dtype=S_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_NN, 0) self.bd.comm.broadcast(S_NN, 0) self.check_and_plot(S_NN, alpha*self.S0_nn, 9, 'overlaps,nonhermitian')
def test_trivial_diagonalize(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_nn = self.S0_nn # Eigenvector decomposition S_nn = V_nn * W_nn * V_nn^dag # Utilize the fact that they are analytically known (cf. Maple) band_indices = np.arange(self.nbands) V_nn = np.eye(self.nbands).astype(self.dtype) if self.dtype == complex: V_nn[1:,1] = np.conj(self.gamma)**band_indices[1:] * band_indices[1:]**0.5 V_nn[1,2:] = -self.gamma**band_indices[1:-1] * band_indices[2:]**0.5 else: V_nn[2:,1] = band_indices[2:]**0.5 V_nn[1,2:] = -band_indices[2:]**0.5 W_n = np.zeros(self.nbands).astype(self.dtype) W_n[1] = (1. + self.Qtotal) * self.nbands * (self.nbands - 1) / 2. # Find the inverse basis Vinv_nn = np.linalg.inv(V_nn) # Test analytical eigenvectors for consistency against analytical S_nn D_nn = np.dot(Vinv_nn, np.dot(S_nn, V_nn)) self.assertAlmostEqual(np.abs(D_nn.diagonal()-W_n).max(), 0, 8) self.assertAlmostEqual(np.abs(np.tril(D_nn, -1)).max(), 0, 4) self.assertAlmostEqual(np.abs(np.triu(D_nn, 1)).max(), 0, 4) del Vinv_nn, D_nn # Perform Gram Schmidt orthonormalization for diagonalization # |psit_n> -> C_nn |psit_n>, using orthonormalized basis Q_nn # <psit_m|S|psit_n> -> <psit_m|C_nn^dag S C_nn|psit_n> = diag(W_n) # using S_nn = V_nn * W_nn * V_nn^(-1) = Q_nn * W_nn * Q_nn^dag C_nn = V_nn.copy() gram_schmidt(C_nn) self.assertAlmostEqual(np.abs(np.dot(C_nn.T.conj(), C_nn) \ - np.eye(self.nbands)).max(), 0, 6) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) self.psit_nG = overlap.matrix_multiply(C_nn.T.copy(), self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(D_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_nn, 0) self.bd.comm.broadcast(D_nn, 0) if memstats: self.mem_test = record_memory() # D_nn = C_nn^dag * S_nn * C_nn = W_n since Q_nn^dag = Q_nn^(-1) D0_nn = np.dot(C_nn.T.conj(), np.dot(S_nn, C_nn)) self.assertAlmostEqual(np.abs(D0_nn-np.diag(W_n)).max(), 0, 9) self.check_and_plot(D_nn, D0_nn, 9, 'trivial,diagonalize')
def test_multiply_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = alpha*self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_NN,2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands,) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,nonhermitian')
def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_NN,2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands,) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower... else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,randomized')
def run(psit_mG): overlap = MatrixOperator(ksl, J) def H(psit_xG): Htpsit_xG = np.empty_like(psit_xG) kin(psit_xG, Htpsit_xG) for psit_G, y_G in zip(psit_xG, Htpsit_xG): y_G += vt_G * psit_G return Htpsit_xG dH_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dH(a, P_ni): return np.dot(P_ni, dH_aii[a]) H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) t1 = time() if world.rank == 0: eps_n, H_nn = np.linalg.eigh(H_nn) H_nn = np.ascontiguousarray(H_nn.T) t2 = time() if world.rank == 0: print('Diagonalization Time %f' % (t2 - t1)) print(eps_n) # Distribute matrix: world.broadcast(H_nn, 0) psit_mG = overlap.matrix_multiply(H_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: assert not (P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not (P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) if world.rank == 0: for n in range(N): assert abs(H_nn[n, n] - eps_n[n]) < 2e-8 assert not H_nn[n + 1:, n].round(8).any() return psit_mG
def test_trivial_diagonalize(self): #XXX XXX XXX # Known starting point of S_nn = <psit_m|S|psit_n> S_nn = self.S0_nn # Eigenvector decomposition S_nn = V_nn * W_nn * V_nn^dag # Utilize the fact that they are analytically known (cf. Maple) W_n = np.zeros(self.nbands).astype(self.dtype) W_n[1] = (1. + self.Qtotal) * self.nbands * (self.nbands - 1) / 2. # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) eps_N = self.bd.empty(global_array=True) # XXX dtype? C_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype) self.ksl.nndescriptor.diagonalize_dc(S_nn, C_nn, eps_N, 'L') self.assertAlmostEqual(np.abs(np.sort(eps_N)-np.sort(W_n)).max(), 0, 9) #eps_n = self.bd.empty() #self.bd.distribute(eps_N, eps_n) # XXX only blocked groups, right? # Rotate wavefunctions to diagonalize the overlap self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) # Recaulculate the overlap matrix, which should now be diagonal D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower... else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) D0_NN = np.diag(eps_N) self.check_and_plot(D_NN, D0_NN, 9, 'trivial,diagonalize')
def test_overlaps_hermitian(self): # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(S_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_nn, 0) self.bd.comm.broadcast(S_nn, 0) if memstats: self.mem_test = record_memory() self.check_and_plot(S_nn, self.S0_nn, 9, 'overlaps,hermitian')
def run(psit_mG): overlap = MatrixOperator(ksl, J) def H(psit_xG): Htpsit_xG = np.empty_like(psit_xG) kin(psit_xG, Htpsit_xG) for psit_G, y_G in zip(psit_xG, Htpsit_xG): y_G += vt_G * psit_G return Htpsit_xG dH_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dH(a, P_ni): return np.dot(P_ni, dH_aii[a]) H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) t1 = time() if world.rank == 0: eps_n, H_nn = np.linalg.eigh(H_nn) H_nn = np.ascontiguousarray(H_nn.T) t2 = time() if world.rank == 0: print('Diagonalization Time %f' % (t2-t1)) print(eps_n) # Distribute matrix: world.broadcast(H_nn, 0) psit_mG = overlap.matrix_multiply(H_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: assert not(P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not(P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) if world.rank == 0: for n in range(N): assert abs(H_nn[n, n] - eps_n[n]) < 1.5e-8 assert not H_nn[n + 1:, n].round(8).any() return psit_mG
def __init__(self, stencil, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, timer): FDPWWaveFunctions.__init__(self, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, timer) # Kinetic energy operator: self.kin = Laplace(self.gd, -0.5, stencil, self.dtype) self.matrixoperator = MatrixOperator(self.orthoksl) self.taugrad_v = None # initialized by MGGA functional
def __init__(self, ecut, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, world, kd, timer): self.ecut = ecut / units.Hartree # Set dtype=complex and gamma=False: kd.gamma = False FDPWWaveFunctions.__init__(self, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, complex, world, kd, timer) orthoksl.gd = self.pd self.matrixoperator = MatrixOperator(orthoksl) self.wd = self.pd
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) S_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_nn, 0) self.bd.comm.broadcast(S_nn, 0) if memstats: self.mem_test = record_memory() self.check_and_plot(S_nn, alpha*self.S0_nn, 9, 'overlaps,nonhermitian')
def test_trivial_cholesky(self): # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n> I_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype) scalapack_set(self.ksl.nndescriptor, I_nn, 0.0, 1.0, 'L') alpha = 1e-3 # shift eigenvalues away from zero C_nn = S_nn + alpha * I_nn self.ksl.nndescriptor.inverse_cholesky(C_nn, 'L') self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower.. else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_NN = C_NN^dag * S_NN * C_NN = I_NN - alpha * C_NN^dag * C_NN I_NN = np.eye(self.bd.nbands) C0_NN = np.linalg.inv(np.linalg.cholesky(self.S0_nn + alpha*I_NN).T.conj()) D0_NN = I_NN - alpha * np.dot(C0_NN.T.conj(), C0_NN) self.check_and_plot(D_NN, D0_NN, 6, 'trivial,cholesky') #XXX precision
def __init__(self, ecut, fftwflags, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, timer): self.ecut = ecut self.fftwflags = fftwflags self.ng_k = None # number of G-vectors for all IBZ k-points FDPWWaveFunctions.__init__(self, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, timer) self.orthoksl.gd = self.pd self.matrixoperator = MatrixOperator(self.orthoksl)
def run(psit_mG): overlap = MatrixOperator(ksl, K) if 0: overlap.work1_xG = work1_xG overlap.work2_xG = work2_xG #S_nn = np.empty((N, N)) def S(x): return x dS_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dS(a, P_ni): return np.dot(P_ni, dS_aii[a]) S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) t1 = time() if world.rank == 0: print(S_nn.round(5)) inverse_cholesky(S_nn) C_nn = S_nn t2 = time() if world.rank == 0: print('Cholesky Time %f' % (t2 - t1)) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG = overlap.matrix_multiply(C_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) assert not (P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not (P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() if world.rank == 0: for n in range(N): assert abs(S_nn[n, n] - 1.0) < 1e-10 assert not S_nn[n + 1:, n].round(10).any() return psit_mG
def __init__(self, stencil, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, timer=None): FDPWWaveFunctions.__init__(self, diagksl, orthoksl, initksl, gd, nvalence, setups, bd, dtype, world, kd, timer) self.wd = self.gd # wave function descriptor # Kinetic energy operator: self.kin = Laplace(self.gd, -0.5, stencil, self.dtype, allocate=False) self.matrixoperator = MatrixOperator(orthoksl)
def run(psit_mG): overlap = MatrixOperator(ksl, K) if 0: overlap.work1_xG = work1_xG overlap.work2_xG = work2_xG #S_nn = np.empty((N, N)) def S(x): return x dS_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dS(a, P_ni): return np.dot(P_ni, dS_aii[a]) S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) t1 = time() if world.rank == 0: print S_nn.round(5) inverse_cholesky(S_nn) C_nn = S_nn t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2-t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG = overlap.matrix_multiply(C_nn, psit_mG, P_ani) if world.rank == 0: print 'Made it past matrix multiply' # Check: S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) assert not(P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not(P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() if world.rank == 0: for n in range(N): assert abs(S_nn[n, n] - 1.0) < 1e-10 assert not S_nn[n + 1:, n].round(10).any() return psit_mG
def dscf_kpoint_overlaps(paw, phasemod=True, broadcast=True): bd = paw.wfs.bd gd = paw.wfs.gd kd = paw.wfs.kd operator = MatrixOperator(paw.wfs.orthoksl, hermitian=False) atoms = paw.get_atoms() # Find the kpoint with lowest kpt.k_c (closest to gamma point) k0 = np.argmin(np.sum(paw.wfs.kd.ibzk_kc**2,axis=1)**0.5) # Maintain list of a single global reference kpoint for each spin kpt0_s = [] for s0 in range(kd.nspins): q0 = k0 - kd.get_offset() % kd.nibzkpts kpt0 = GlobalKPoint(None, s0, k0, q0, None) kpt0.update(paw.wfs) kpt0_s.append(kpt0) if phasemod: # Scaled grid point positions used for exponential with ibzk_kc # cf. wavefunctions.py lines 90-91 rev 4500(ca) # phase_cd = np.exp(2j * np.pi * sdisp_cd * ibzk_kc[k, :, np.newaxis]) r_cG = gd.empty(3) for c, r_G in enumerate(r_cG): slice_c2G = [np.newaxis, np.newaxis, np.newaxis] slice_c2G[c] = slice(None) #this means ':' r_G[:] = np.arange(gd.beg_c[c], gd.end_c[c], \ dtype=float)[slice_c2G] / gd.N_c[c] X_unn = np.empty((kd.mynks, bd.nbands, bd.nbands), dtype=paw.wfs.dtype) for myu, kpt in enumerate(paw.wfs.kpt_u): u = kd.global_index(myu) s, k = kd.what_is(u) kpt0 = kpt0_s[s] X_nn = X_unn[myu] if phasemod: assert paw.wfs.dtype == complex, 'Phase modification is complex!' k0_c = kd.ibzk_kc[k0] k_c = kd.ibzk_kc[k] eirk_G = np.exp(2j*np.pi*np.sum(r_cG*(k_c-k0_c)[:,np.newaxis,np.newaxis,np.newaxis], axis=0)) psit0_nG = eirk_G[np.newaxis,...]*kpt0.psit_nG P0_ani = paw.wfs.pt.dict(bd.mynbands) spos_ac = atoms.get_scaled_positions() % 1.0 for a, P0_ni in P0_ani.items(): # Expanding the exponential exp(ikr)=exp(ikR)*exp(ik(r-R)) # and neglecting the changed P_ani integral exp(ik(r-R))~1 P0_ni[:] = np.exp(2j*np.pi*np.sum(spos_ac[a]*(k_c-k0_c), axis=0)) * kpt0.P_ani[a] ## NB: No exp(ikr) approximate here, but has a parallelization bug #kpt0_rank, myu0 = kd.get_rank_and_index(kpt0.s, kpt0.k) #if kd.comm.rank == kpt0_rank: # paw.wfs.pt.integrate(psit0_nG, P0_ani, kpt0.q) #for a, P0_ni in P0_ani.items(): # kd.comm.broadcast(P0_ni, kpt0_rank) else: psit0_nG = kpt0.psit_nG P0_ani = kpt0.P_ani """ if paw.wfs.world.size == 1: for n, psit_G in enumerate(kpt.psit_nG): for n0, psit0_G in enumerate(psit0_nG): X_nn[n,n0] = np.vdot(psit_G, psit0_G)*gd.dv for a in range(len(paw.get_atoms())): P_ni, P0_ni, dO_ii = kpt.P_ani[a], P0_ani[a], paw.wfs.setups[a].dO_ii for n, P_i in enumerate(P_ni): for n0, P0_i in enumerate(P0_ni): X_nn[n,n0] += np.vdot(P_i, np.dot(dO_ii, P0_i)) """ X = lambda psit_nG, g=SliceGen(psit0_nG, operator): next(g) dX = lambda a, P_ni: np.dot(P0_ani[a], paw.wfs.setups[a].dO_ii) X_nn[:] = operator.calculate_matrix_elements(kpt.psit_nG, kpt.P_ani, X, dX).T if broadcast: if bd.comm.rank == 0: gd.comm.broadcast(X_unn, 0) bd.comm.broadcast(X_unn, 0) return kpt0_s, X_unn
def dscf_kpoint_overlaps(paw, phasemod=True, broadcast=True): bd = paw.wfs.bd gd = paw.wfs.gd kd = paw.wfs.kd operator = MatrixOperator(paw.wfs.orthoksl, hermitian=False) atoms = paw.get_atoms() # Find the kpoint with lowest kpt.k_c (closest to gamma point) k0 = np.argmin(np.sum(paw.wfs.ibzk_kc**2,axis=1)**0.5) # Maintain list of a single global reference kpoint for each spin kpt0_s = [] for s0 in range(kd.nspins): q0 = k0 - kd.get_offset() % kd.nibzkpts kpt0 = GlobalKPoint(None, s0, k0, q0, None) kpt0.update(paw.wfs) kpt0_s.append(kpt0) if phasemod: # Scaled grid point positions used for exponential with ibzk_kc # cf. wavefunctions.py lines 90-91 rev 4500(ca) # phase_cd = np.exp(2j * np.pi * sdisp_cd * ibzk_kc[k, :, np.newaxis]) r_cG = gd.empty(3) for c, r_G in enumerate(r_cG): slice_c2G = [np.newaxis, np.newaxis, np.newaxis] slice_c2G[c] = slice(None) #this means ':' r_G[:] = np.arange(gd.beg_c[c], gd.end_c[c], \ dtype=float)[slice_c2G] / gd.N_c[c] X_unn = np.empty((kd.mynks, bd.nbands, bd.nbands), dtype=paw.wfs.dtype) for myu, kpt in enumerate(paw.wfs.kpt_u): u = kd.global_index(myu) s, k = kd.what_is(u) kpt0 = kpt0_s[s] X_nn = X_unn[myu] if phasemod: assert paw.wfs.dtype == complex, 'Phase modification is complex!' k0_c = kd.ibzk_kc[k0] k_c = kd.ibzk_kc[k] eirk_G = np.exp(2j*np.pi*np.sum(r_cG*(k_c-k0_c)[:,np.newaxis,np.newaxis,np.newaxis], axis=0)) psit0_nG = eirk_G[np.newaxis,...]*kpt0.psit_nG P0_ani = paw.wfs.pt.dict(bd.mynbands) spos_ac = atoms.get_scaled_positions() % 1.0 for a, P0_ni in P0_ani.items(): # Expanding the exponential exp(ikr)=exp(ikR)*exp(ik(r-R)) # and neglecting the changed P_ani integral exp(ik(r-R))~1 P0_ni[:] = np.exp(2j*np.pi*np.sum(spos_ac[a]*(k_c-k0_c), axis=0)) * kpt0.P_ani[a] ## NB: No exp(ikr) approximate here, but has a parallelization bug #kpt0_rank, myu0 = kd.get_rank_and_index(kpt0.s, kpt0.k) #if kd.comm.rank == kpt0_rank: # paw.wfs.pt.integrate(psit0_nG, P0_ani, kpt0.q) #for a, P0_ni in P0_ani.items(): # kd.comm.broadcast(P0_ni, kpt0_rank) else: psit0_nG = kpt0.psit_nG P0_ani = kpt0.P_ani """ if paw.wfs.world.size == 1: for n, psit_G in enumerate(kpt.psit_nG): for n0, psit0_G in enumerate(psit0_nG): X_nn[n,n0] = np.vdot(psit_G, psit0_G)*gd.dv for a in range(len(paw.get_atoms())): P_ni, P0_ni, dO_ii = kpt.P_ani[a], P0_ani[a], paw.wfs.setups[a].dO_ii for n, P_i in enumerate(P_ni): for n0, P0_i in enumerate(P0_ni): X_nn[n,n0] += np.vdot(P_i, np.dot(dO_ii, P0_i)) """ X = lambda psit_nG, g=SliceGen(psit0_nG, operator): g.next() dX = lambda a, P_ni: np.dot(P0_ani[a], paw.wfs.setups[a].dO_ii) X_nn[:] = operator.calculate_matrix_elements(kpt.psit_nG, kpt.P_ani, X, dX).T if broadcast: if bd.comm.rank == 0: gd.comm.broadcast(X_unn, 0) bd.comm.broadcast(X_unn, 0) return kpt0_s, X_unn