def setUp(self): for virtvar in ['dtype', 'parstride_bands']: assert getattr(self, virtvar) is not None, 'Virtual "%s"!' % virtvar parsize, parsize_bands = create_parsize_maxbands( self.nbands, world.size) assert self.nbands % np.prod(parsize_bands) == 0 domain_comm, kpt_comm, band_comm = distribute_cpus( parsize, parsize_bands, self.nspins, self.nibzkpts) # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm, self.parstride_bands) # Set up grid descriptor: res, ngpts = shapeopt(100, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = (True, False, True) self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize) # Create Kohn-Sham layouts for these band and grid descriptors: self.ksl = self.create_kohn_sham_layouts() # What to do about kpoints? self.kpt_comm = kpt_comm
def setUp(self): for virtvar in ['boundaries']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar parsize_domain, parsize_bands = create_parsize_minbands(self.nbands, world.size) assert self.nbands % np.prod(parsize_bands) == 0 comms = distribute_cpus(parsize_domain, parsize_bands, self.nspins, self.nibzkpts) domain_comm, kpt_comm, band_comm, block_comm = \ [comms[name] for name in 'dkbK'] self.block_comm = block_comm # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm) # Set up grid descriptor: res, ngpts = shapeopt(300, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = {'zero' : False, \ 'periodic': True, \ 'mixed' : (True, False, True)}[self.boundaries] self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize_domain) # What to do about kpoints? self.kpt_comm = kpt_comm
def setUp(self): for virtvar in ['dtype','parstride_bands']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar parsize_domain, parsize_bands = create_parsize_maxbands(self.nbands, world.size) assert self.nbands % parsize_bands == 0 comms = distribute_cpus(parsize_domain, parsize_bands, self.nspins, self.nibzkpts) domain_comm, kpt_comm, band_comm, block_comm = \ [comms[name] for name in 'dkbK'] self.block_comm = block_comm # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm, self.parstride_bands) # Set up grid descriptor: res, ngpts = shapeopt(100, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = (True, False, True) self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize_domain) # Create Kohn-Sham layouts for these band and grid descriptors: self.ksl = self.create_kohn_sham_layouts() # What to do about kpoints? self.kpt_comm = kpt_comm
def setUp(self): for virtvar in ['boundaries']: assert getattr(self, virtvar) is not None, 'Virtual "%s"!' % virtvar # Basic unit cell information: res, N_c = shapeopt(100, self.G**3, 3, 0.2) #N_c = 4*np.round(np.array(N_c)/4) # makes domain decomposition easier cell_cv = self.h * np.diag(N_c) pbc_c = {'zero' : (False,False,False), \ 'periodic': (True,True,True), \ 'mixed' : (True, False, True)}[self.boundaries] # Create randomized gas-like atomic configuration on interim grid tmpgd = GridDescriptor(N_c, cell_cv, pbc_c) self.atoms = create_random_atoms(tmpgd) # Create setups Z_a = self.atoms.get_atomic_numbers() assert 1 == self.nspins self.setups = Setups(Z_a, p.setups, p.basis, p.lmax, xc) self.natoms = len(self.setups) # Decide how many kpoints to sample from the 1st Brillouin Zone kpts_c = np.ceil( (10 / Bohr) / np.sum(cell_cv**2, axis=1)**0.5).astype(int) kpts_c = tuple(kpts_c * pbc_c + 1 - pbc_c) self.bzk_kc = kpts2ndarray(kpts_c) # Set up k-point descriptor self.kd = KPointDescriptor(self.bzk_kc, self.nspins) self.kd.set_symmetry(self.atoms, self.setups, p.usesymm) # Set the dtype if self.kd.gamma: self.dtype = float else: self.dtype = complex # Create communicators parsize, parsize_bands = self.get_parsizes() assert self.nbands % np.prod(parsize_bands) == 0 domain_comm, kpt_comm, band_comm = distribute_cpus( parsize, parsize_bands, self.nspins, self.kd.nibzkpts) self.kd.set_communicator(kpt_comm) # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm) # Set up grid descriptor: self.gd = GridDescriptor(N_c, cell_cv, pbc_c, domain_comm, parsize) # Set up kpoint/spin descriptor (to be removed): self.kd_old = KPointDescriptorOld(self.nspins, self.kd.nibzkpts, kpt_comm, self.kd.gamma, self.dtype)
def setUp(self): for virtvar in ['boundaries', 'celltype']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar # Basic unit cell information: pbc_c = {'zero' : (False,False,False), \ 'periodic': (True,True,True), \ 'mixed' : (True, False, True)}[self.boundaries] a, b = self.a, 2**0.5*self.a cell_cv = {'general' : np.array([[0,a,a],[a/2,0,a/2],[a/2,a/2,0]]), 'rotated' : np.array([[0,0,b],[b/2,0,0],[0,b/2,0]]), 'inverted' : np.array([[0,0,b],[0,b/2,0],[b/2,0,0]]), 'orthogonal': np.diag([b, b/2, b/2])}[self.celltype] cell_cv = np.array([(4-3*pbc)*c_v for pbc,c_v in zip(pbc_c, cell_cv)]) # Decide how many kpoints to sample from the 1st Brillouin Zone kpts_c = np.ceil((10/Bohr)/np.sum(cell_cv**2,axis=1)**0.5).astype(int) kpts_c = tuple(kpts_c*pbc_c + 1-pbc_c) bzk_kc = kpts2ndarray(kpts_c) self.gamma = len(bzk_kc) == 1 and not bzk_kc[0].any() #p = InputParameters() #Z_a = self.atoms.get_atomic_numbers() #xcfunc = XC(p.xc) #setups = Setups(Z_a, p.setups, p.basis, p.lmax, xcfunc) #symmetry, weight_k, self.ibzk_kc = reduce_kpoints(self.atoms, bzk_kc, # setups, p.usesymm) self.ibzk_kc = bzk_kc.copy() # don't use symmetry reduction of kpoints self.nibzkpts = len(self.ibzk_kc) self.ibzk_kv = kpoint_convert(cell_cv, skpts_kc=self.ibzk_kc) # Parse parallelization parameters and create suitable communicators. #parsize, parsize_bands = create_parsize_minbands(self.nbands, world.size) parsize, parsize_bands = world.size//gcd(world.size, self.nibzkpts), 1 assert self.nbands % np.prod(parsize_bands) == 0 domain_comm, kpt_comm, band_comm = distribute_cpus(parsize, parsize_bands, self.nspins, self.nibzkpts) # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm) # Set up grid descriptor: N_c = np.round(np.sum(cell_cv**2, axis=1)**0.5 / self.h) N_c += 4-N_c % 4 # makes domain decomposition easier self.gd = GridDescriptor(N_c, cell_cv, pbc_c, domain_comm, parsize) self.assertEqual(self.gamma, np.all(~self.gd.pbc_c)) # What to do about kpoints? self.kpt_comm = kpt_comm if debug and world.rank == 0: comm_sizes = tuple([comm.size for comm in [world, self.bd.comm, \ self.gd.comm, self.kpt_comm]]) print '%d world, %d band, %d domain, %d kpt' % comm_sizes
def setUp(self): for virtvar in ['equipartition']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar kpts = {'even' : (12,1,2), \ 'prime': (23,1,1)}[self.equipartition] #primes = [i for i in xrange(50,1,-1) if ~np.any(i%np.arange(2,i)==0)] bzk_kc = kpts2ndarray(kpts) assert p.usesymm == None self.nibzkpts = len(bzk_kc) #parsize, parsize_bands = create_parsize_minbands(self.nbands, world.size) parsize, parsize_bands = 1, 1 #XXX assert self.nbands % np.prod(parsize_bands) == 0 domain_comm, kpt_comm, band_comm = distribute_cpus(parsize, parsize_bands, self.nspins, self.nibzkpts) # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm, p.parallel['stridebands']) # Set up grid descriptor: res, ngpts = shapeopt(300, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = (True, False, True) self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize) # Create randomized gas-like atomic configuration self.atoms = create_random_atoms(self.gd) # Create setups Z_a = self.atoms.get_atomic_numbers() self.setups = Setups(Z_a, p.setups, p.basis, p.lmax, xc) self.natoms = len(self.setups) # Set up kpoint descriptor: self.kd = KPointDescriptor(bzk_kc, self.nspins) self.kd.set_symmetry(self.atoms, self.setups, p.usesymm) self.kd.set_communicator(kpt_comm)
class UTBandParallelSetup(TestCase): """ Setup a simple band parallel calculation.""" # Number of bands nbands = 36 # Spin-paired, single kpoint nspins = 1 nibzkpts = 1 # Strided or blocked groups parstride_bands = None # Mean spacing and number of grid points per axis (G x G x G) h = 1.0 / Bohr G = 20 # Wavefunction data type dtype = None def setUp(self): for virtvar in ['dtype', 'parstride_bands']: assert getattr(self, virtvar) is not None, 'Virtual "%s"!' % virtvar parsize, parsize_bands = create_parsize_maxbands( self.nbands, world.size) assert self.nbands % np.prod(parsize_bands) == 0 domain_comm, kpt_comm, band_comm = distribute_cpus( parsize, parsize_bands, self.nspins, self.nibzkpts) # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm, self.parstride_bands) # Set up grid descriptor: res, ngpts = shapeopt(100, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = (True, False, True) self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize) # Create Kohn-Sham layouts for these band and grid descriptors: self.ksl = self.create_kohn_sham_layouts() # What to do about kpoints? self.kpt_comm = kpt_comm def tearDown(self): del self.bd, self.gd, self.ksl, self.kpt_comm def create_kohn_sham_layouts(self): return BandLayouts(self.gd, self.bd, self.dtype) # ================================= def verify_comm_sizes(self): if world.size == 1: return comm_sizes = tuple([comm.size for comm in [world, self.bd.comm, \ self.gd.comm, self.kpt_comm]]) self._parinfo = '%d world, %d band, %d domain, %d kpt' % comm_sizes self.assertEqual((self.nspins * self.nibzkpts) % self.kpt_comm.size, 0) def verify_kohn_sham_layouts(self): # TODO do more here :) self.assertFalse(self.ksl.using_blacs) self.assertTrue(self.ksl.bd is self.bd) self.assertTrue(self.ksl.gd is self.gd) def verify_band_stride_related(self): # Verify that (q1+q2)%B-q1 falls in ]-B;Q[ where Q=B//2+1 for B in range(1, 256): Q = B // 2 + 1 #dqs = [] #for q1 in range(B): # for q2 in range(Q): # dq = (q1+q2)%B-q1 # dqs.append(dq) #dqs = np.array(dqs) q1s = np.arange(B)[:, np.newaxis] q2s = np.arange(Q)[np.newaxis, :] dqs = (q1s + q2s) % B - q1s self.assertEqual(dqs.min(), -B + 1) self.assertEqual(dqs.max(), Q - 1) def verify_band_indexing_consistency(self): for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) self.assertEqual(self.bd.global_index(myn, band_rank), n) for band_rank in range(self.bd.comm.size): for myn in range(self.bd.mynbands): n = self.bd.global_index(myn, band_rank) self.assertTrue(self.bd.who_has(n) == (band_rank, myn)) def verify_band_ranking_consistency(self): rank_n = self.bd.get_band_ranks() for band_rank in range(self.bd.comm.size): my_band_indices = self.bd.get_band_indices(band_rank) matches = np.argwhere(rank_n == band_rank).ravel() self.assertTrue((matches == my_band_indices).all()) for myn in range(self.bd.mynbands): n = self.bd.global_index(myn, band_rank) self.assertEqual(my_band_indices[myn], n)
# Set up communicators: comms = distribute_cpus(parsize_domain=D, parsize_bands=B, nspins=1, nibzkpts=2) domain_comm, kpt_comm, band_comm, block_comm = \ [comms[name] for name in ['d', 'k', 'b', 'K']] assert world.size == D * B * kpt_comm.size if world.rank == 0: print('MPI: %d domains, %d band groups, %d kpts' % (domain_comm.size, band_comm.size, kpt_comm.size)) # Set up band and grid descriptors: bd = BandDescriptor(N, band_comm, False) gd = GridDescriptor((G, G, G), (a, a, a), True, domain_comm, parsize_c=D) mcpus, ncpus, blocksize = 2, 2, 6 def blacs_diagonalize(ksl, H_Nn, U_nN, eps_n): # H_Nn must be lower triangular or symmetric, # but not upper triangular. # U_nN will be symmetric # U_Nn needs to be simultaneously compatible with: # 1. outdescriptor # 2. broadcast with gd.comm # We will do this with a dummy buffer U2_nN bmd = ksl.new_descriptor()
def dscf_collapse_orbitals(paw, nbands_max='occupied', f_tol=1e-4, verify_density=True, nt_tol=1e-5, D_tol=1e-3): bd, gd, kd = paw.wfs.bd, paw.wfs.gd, paw.wfs.kd if bd.comm.size != 1: raise NotImplementedError('Undefined action for band parallelization.') f_skn = np.empty((kd.nspins, kd.nibzkpts, bd.nbands), dtype=float) for s, f_kn in enumerate(f_skn): for k, f_n in enumerate(f_kn): kpt_rank, myu = kd.get_rank_and_index(s, k) if kd.comm.rank == kpt_rank: f_n[:] = paw.wfs.kpt_u[myu].f_n kd.comm.broadcast(f_n, kpt_rank) # Find smallest band index, from which all bands have negligeble occupations n0 = np.argmax(f_skn<f_tol, axis=-1).max() assert np.all(f_skn[...,n0:]<f_tol) # XXX use f_skn[...,n0:].sum()<f_tol # Read the number of Delta-SCF orbitals norbitals = paw.occupations.norbitals if debug: mpi_debug('n0=%d, norbitals=%d, bd:%d, gd:%d, kd:%d' % (n0,norbitals,bd.comm.size,gd.comm.size,kd.comm.size)) if nbands_max < 0: nbands_max = n0 + norbitals - nbands_max elif nbands_max == 'occupied': nbands_max = n0 + norbitals assert nbands_max >= n0 + norbitals, 'Too few bands to include occupations.' ncut = nbands_max-norbitals if debug: mpi_debug('nbands_max=%d' % nbands_max) paw.wfs.initialize_wave_functions_from_restart_file() # hurts memmory for kpt in paw.wfs.kpt_u: mol = kpt.P_ani.keys() # XXX stupid (f_o, eps_o, wf_oG, P_aoi,) = dscf_reconstruct_orbitals_k_point(paw, norbitals, mol, kpt) assert np.abs(f_o-1).max() < 1e-9, 'Orbitals must be properly normalized.' f_o = kpt.ne_o # actual ocupatiion numbers # Crop band-data and inject data for Delta-SCF orbitals kpt.f_n = np.hstack((kpt.f_n[:n0], f_o, kpt.f_n[n0:ncut])) kpt.eps_n = np.hstack((kpt.eps_n[:n0], eps_o, kpt.eps_n[n0:ncut])) for a, P_ni in kpt.P_ani.items(): kpt.P_ani[a] = np.vstack((P_ni[:n0], P_aoi[a], P_ni[n0:ncut])) old_psit_nG = kpt.psit_nG kpt.psit_nG = gd.empty(nbands_max, dtype=old_psit_nG.dtype) if isinstance(old_psit_nG, FileReference): assert old_psit_nG.shape[-3:] == wf_oG.shape[-3:], 'Shape mismatch!' # Read band-by-band to save memory as full psit_nG may be large for n,psit_G in enumerate(kpt.psit_nG): if n < n0: full_psit_G = old_psit_nG[n] elif n in range(n0,n0+norbitals): full_psit_G = wf_oG[n-n0] else: full_psit_G = old_psit_nG[n-norbitals] gd.distribute(full_psit_G, psit_G) else: kpt.psit_nG[:n0] = old_psit_nG[:n0] kpt.psit_nG[n0:n0+norbitals] = wf_oG kpt.psit_nG[n0+norbitals:] = old_psit_nG[n0:ncut] del kpt.ne_o, kpt.c_on, old_psit_nG del paw.occupations.norbitals # Change various parameters related to new number of bands paw.wfs.bd = BandDescriptor(nbands_max, bd.comm, bd.strided) if paw.wfs.eigensolver: paw.wfs.eigensolver.initialized = False del bd # Crop convergence criteria nbands_converge to new number of bands par = paw.input_parameters if 'convergence' in par: cc = par['convergence'] if 'bands' in cc: cc['bands'] = min(nbands_max, cc['bands']) # Replace occupations class with a fixed variant (gets the magmom right) XXX?!? fermilevel, magmom = paw.occupations.fermilevel, paw.occupations.magmom paw.occupations = FermiDirac(paw.occupations.width * Hartree, paw.occupations.fixmagmom) paw.occupations.fermilevel = fermilevel paw.occupations.magmom = magmom del fermilevel, magmom # For good measure, self-consistency information should be destroyed paw.scf.reset() if verify_density: paw.initialize_positions() # Re-calculate pseudo density and watch for changes old_nt_sG = paw.density.nt_sG.copy() paw.density.calculate_pseudo_density(paw.wfs) if debug: mpi_debug('delta-density: %g' % np.abs(old_nt_sG-paw.density.nt_sG).max()) assert np.abs(paw.density.nt_sG-old_nt_sG).max() < nt_tol, 'Density changed!' # Re-calculate atomic density matrices and watch for changes old_D_asp = {} for a,D_sp in paw.density.D_asp.items(): old_D_asp[a] = D_sp.copy() paw.wfs.calculate_atomic_density_matrices(paw.density.D_asp) if debug: mpi_debug('delta-D_asp: %g' % max([0]+[np.abs(D_sp-old_D_asp[a]).max() for a,D_sp in paw.density.D_asp.items()])) for a,D_sp in paw.density.D_asp.items(): assert np.abs(D_sp-old_D_asp[a]).max() < D_tol, 'Atom %d changed!' % a
class UTBandParallelSetup(TestCase): """ Setup a simple band parallel calculation.""" # Number of bands nbands = 36 # Spin-paired, single kpoint nspins = 1 nibzkpts = 1 # Strided or blocked groups parstride_bands = None # Mean spacing and number of grid points per axis (G x G x G) h = 1.0 / Bohr G = 20 # Wavefunction data type dtype = None def setUp(self): for virtvar in ['dtype','parstride_bands']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar parsize_domain, parsize_bands = create_parsize_maxbands(self.nbands, world.size) assert self.nbands % parsize_bands == 0 comms = distribute_cpus(parsize_domain, parsize_bands, self.nspins, self.nibzkpts) domain_comm, kpt_comm, band_comm, block_comm = \ [comms[name] for name in 'dkbK'] self.block_comm = block_comm # Set up band descriptor: self.bd = BandDescriptor(self.nbands, band_comm, self.parstride_bands) # Set up grid descriptor: res, ngpts = shapeopt(100, self.G**3, 3, 0.2) cell_c = self.h * np.array(ngpts) pbc_c = (True, False, True) self.gd = GridDescriptor(ngpts, cell_c, pbc_c, domain_comm, parsize_domain) # Create Kohn-Sham layouts for these band and grid descriptors: self.ksl = self.create_kohn_sham_layouts() # What to do about kpoints? self.kpt_comm = kpt_comm def tearDown(self): del self.bd, self.gd, self.ksl, self.kpt_comm, self.block_comm def create_kohn_sham_layouts(self): return BandLayouts(self.gd, self.bd, self.block_comm, self.dtype) # ================================= def verify_comm_sizes(self): if world.size == 1: return comm_sizes = tuple([comm.size for comm in [world, self.bd.comm, \ self.gd.comm, self.kpt_comm]]) self._parinfo = '%d world, %d band, %d domain, %d kpt' % comm_sizes self.assertEqual((self.nspins*self.nibzkpts) % self.kpt_comm.size, 0) def verify_kohn_sham_layouts(self): # TODO do more here :) self.assertFalse(self.ksl.using_blacs) self.assertTrue(self.ksl.bd is self.bd) self.assertTrue(self.ksl.gd is self.gd) def verify_band_stride_related(self): # Verify that (q1+q2)%B-q1 falls in ]-B;Q[ where Q=B//2+1 for B in range(1,256): Q = B//2+1 #dqs = [] #for q1 in range(B): # for q2 in range(Q): # dq = (q1+q2)%B-q1 # dqs.append(dq) #dqs = np.array(dqs) q1s = np.arange(B)[:,np.newaxis] q2s = np.arange(Q)[np.newaxis,:] dqs = (q1s+q2s)%B-q1s self.assertEqual(dqs.min(), -B+1) self.assertEqual(dqs.max(), Q-1) def verify_band_indexing_consistency(self): for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) self.assertEqual(self.bd.global_index(myn, band_rank), n) for band_rank in range(self.bd.comm.size): for myn in range(self.bd.mynbands): n = self.bd.global_index(myn, band_rank) self.assertTrue(self.bd.who_has(n) == (band_rank, myn)) def verify_band_ranking_consistency(self): rank_n = self.bd.get_band_ranks() for band_rank in range(self.bd.comm.size): my_band_indices = self.bd.get_band_indices(band_rank) matches = np.argwhere(rank_n == band_rank).ravel() self.assertTrue((matches == my_band_indices).all()) for myn in range(self.bd.mynbands): n = self.bd.global_index(myn, band_rank) self.assertEqual(my_band_indices[myn], n)
def create_wave_functions(self, mode, realspace, nspins, nbands, nao, nvalence, setups, magmom_a, cell_cv, pbc_c): par = self.parameters bzkpts_kc = kpts2ndarray(par.kpts, self.atoms) kd = KPointDescriptor(bzkpts_kc, nspins) self.timer.start('Set symmetry') kd.set_symmetry(self.atoms, self.symmetry, comm=self.world) self.timer.stop('Set symmetry') self.log(kd) parallelization = mpi.Parallelization(self.world, nspins * kd.nibzkpts) parsize_kpt = self.parallel['kpt'] parsize_domain = self.parallel['domain'] parsize_bands = self.parallel['band'] ndomains = None if parsize_domain is not None: ndomains = np.prod(parsize_domain) if mode.name == 'pw': if ndomains is not None and ndomains > 1: raise ValueError('Planewave mode does not support ' 'domain decomposition.') ndomains = 1 parallelization.set(kpt=parsize_kpt, domain=ndomains, band=parsize_bands) comms = parallelization.build_communicators() domain_comm = comms['d'] kpt_comm = comms['k'] band_comm = comms['b'] kptband_comm = comms['D'] domainband_comm = comms['K'] self.comms = comms if par.gpts is not None: if par.h is not None: raise ValueError("""You can't use both "gpts" and "h"!""") N_c = np.array(par.gpts) else: h = par.h if h is not None: h /= Bohr N_c = get_number_of_grid_points(cell_cv, h, mode, realspace, kd.symmetry) self.symmetry.check_grid(N_c) kd.set_communicator(kpt_comm) parstride_bands = self.parallel['stridebands'] # Unfortunately we need to remember that we adjusted the # number of bands so we can print a warning if it differs # from the number specified by the user. (The number can # be inferred from the input parameters, but it's tricky # because we allow negative numbers) self.nbands_parallelization_adjustment = -nbands % band_comm.size nbands += self.nbands_parallelization_adjustment bd = BandDescriptor(nbands, band_comm, parstride_bands) # Construct grid descriptor for coarse grids for wave functions: gd = self.create_grid_descriptor(N_c, cell_cv, pbc_c, domain_comm, parsize_domain) if hasattr(self, 'time') or mode.force_complex_dtype: dtype = complex else: if kd.gamma: dtype = float else: dtype = complex wfs_kwargs = dict(gd=gd, nvalence=nvalence, setups=setups, bd=bd, dtype=dtype, world=self.world, kd=kd, kptband_comm=kptband_comm, timer=self.timer) if self.parallel['sl_auto']: # Choose scalapack parallelization automatically for key, val in self.parallel.items(): if (key.startswith('sl_') and key != 'sl_auto' and val is not None): raise ValueError("Cannot use 'sl_auto' together " "with '%s'" % key) max_scalapack_cpus = bd.comm.size * gd.comm.size nprow = max_scalapack_cpus npcol = 1 # Get a sort of reasonable number of columns/rows while npcol < nprow and nprow % 2 == 0: npcol *= 2 nprow //= 2 assert npcol * nprow == max_scalapack_cpus # ScaLAPACK creates trouble if there aren't at least a few # whole blocks; choose block size so there will always be # several blocks. This will crash for small test systems, # but so will ScaLAPACK in any case blocksize = min(-(-nbands // 4), 64) sl_default = (nprow, npcol, blocksize) else: sl_default = self.parallel['sl_default'] if mode.name == 'lcao': # Layouts used for general diagonalizer sl_lcao = self.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default lcaoksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, bd, domainband_comm, dtype, nao=nao, timer=self.timer) self.wfs = mode(lcaoksl, **wfs_kwargs) elif mode.name == 'fd' or mode.name == 'pw': # buffer_size keyword only relevant for fdpw buffer_size = self.parallel['buffer_size'] # Layouts used for diagonalizer sl_diagonalize = self.parallel['sl_diagonalize'] if sl_diagonalize is None: sl_diagonalize = sl_default diagksl = get_KohnSham_layouts( sl_diagonalize, 'fd', # XXX # choice of key 'fd' not so nice gd, bd, domainband_comm, dtype, buffer_size=buffer_size, timer=self.timer) # Layouts used for orthonormalizer sl_inverse_cholesky = self.parallel['sl_inverse_cholesky'] if sl_inverse_cholesky is None: sl_inverse_cholesky = sl_default if sl_inverse_cholesky != sl_diagonalize: message = 'sl_inverse_cholesky != sl_diagonalize ' \ 'is not implemented.' raise NotImplementedError(message) orthoksl = get_KohnSham_layouts(sl_inverse_cholesky, 'fd', gd, bd, domainband_comm, dtype, buffer_size=buffer_size, timer=self.timer) # Use (at most) all available LCAO for initialization lcaonbands = min(nbands, nao // band_comm.size * band_comm.size) try: lcaobd = BandDescriptor(lcaonbands, band_comm, parstride_bands) except RuntimeError: initksl = None else: # Layouts used for general diagonalizer # (LCAO initialization) sl_lcao = self.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default initksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, lcaobd, domainband_comm, dtype, nao=nao, timer=self.timer) self.wfs = mode(diagksl, orthoksl, initksl, **wfs_kwargs) else: self.wfs = mode(self, **wfs_kwargs) self.log(self.wfs, '\n')
def create_wave_functions(self, mode, realspace, nspins, collinear, nbands, nao, nvalence, setups, cell_cv, pbc_c): par = self.parameters kd = self.create_kpoint_descriptor(nspins) parallelization = mpi.Parallelization(self.world, nspins * kd.nibzkpts) parsize_kpt = self.parallel['kpt'] parsize_domain = self.parallel['domain'] parsize_bands = self.parallel['band'] ndomains = None if parsize_domain is not None: ndomains = np.prod(parsize_domain) parallelization.set(kpt=parsize_kpt, domain=ndomains, band=parsize_bands) comms = parallelization.build_communicators() domain_comm = comms['d'] kpt_comm = comms['k'] band_comm = comms['b'] kptband_comm = comms['D'] domainband_comm = comms['K'] self.comms = comms if par.gpts is not None: if par.h is not None: raise ValueError("""You can't use both "gpts" and "h"!""") N_c = np.array(par.gpts) else: h = par.h if h is not None: h /= Bohr N_c = get_number_of_grid_points(cell_cv, h, mode, realspace, kd.symmetry) self.symmetry.check_grid(N_c) kd.set_communicator(kpt_comm) parstride_bands = self.parallel['stridebands'] bd = BandDescriptor(nbands, band_comm, parstride_bands) # Construct grid descriptor for coarse grids for wave functions: gd = self.create_grid_descriptor(N_c, cell_cv, pbc_c, domain_comm, parsize_domain) if hasattr(self, 'time') or mode.force_complex_dtype or not collinear: dtype = complex else: if kd.gamma: dtype = float else: dtype = complex wfs_kwargs = dict(gd=gd, nvalence=nvalence, setups=setups, bd=bd, dtype=dtype, world=self.world, kd=kd, kptband_comm=kptband_comm, timer=self.timer) if self.parallel['sl_auto']: # Choose scalapack parallelization automatically for key, val in self.parallel.items(): if (key.startswith('sl_') and key != 'sl_auto' and val is not None): raise ValueError("Cannot use 'sl_auto' together " "with '%s'" % key) max_scalapack_cpus = bd.comm.size * gd.comm.size sl_default = suggest_blocking(nbands, max_scalapack_cpus) else: sl_default = self.parallel['sl_default'] if mode.name == 'lcao': assert collinear # Layouts used for general diagonalizer sl_lcao = self.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default elpasolver = None if self.parallel['use_elpa']: elpasolver = self.parallel['elpasolver'] lcaoksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, bd, domainband_comm, dtype, nao=nao, timer=self.timer, elpasolver=elpasolver) self.wfs = mode(lcaoksl, **wfs_kwargs) elif mode.name == 'fd' or mode.name == 'pw': # Use (at most) all available LCAO for initialization lcaonbands = min(nbands, nao) try: lcaobd = BandDescriptor(lcaonbands, band_comm, parstride_bands) except RuntimeError: initksl = None else: # Layouts used for general diagonalizer # (LCAO initialization) sl_lcao = self.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default initksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, lcaobd, domainband_comm, dtype, nao=nao, timer=self.timer) reuse_wfs_method = par.experimental.get('reuse_wfs_method', 'paw') sl = (domainband_comm, ) + (self.parallel['sl_diagonalize'] or sl_default or (1, 1, None)) self.wfs = mode(sl, initksl, reuse_wfs_method=reuse_wfs_method, collinear=collinear, **wfs_kwargs) else: self.wfs = mode(self, collinear=collinear, **wfs_kwargs) self.log(self.wfs, '\n')
def initialize(self, atoms=None): """Inexpensive initialization.""" if atoms is None: atoms = self.atoms else: # Save the state of the atoms: self.atoms = atoms.copy() par = self.input_parameters world = par.communicator if world is None: world = mpi.world elif hasattr(world, 'new_communicator'): # Check for whether object has correct type already # # Using isinstance() is complicated because of all the # combinations, serial/parallel/debug... pass else: # world should be a list of ranks: world = mpi.world.new_communicator(np.asarray(world)) self.wfs.world = world if 'txt' in self._changed_keywords: self.set_txt(par.txt) self.verbose = par.verbose natoms = len(atoms) cell_cv = atoms.get_cell() / Bohr pbc_c = atoms.get_pbc() Z_a = atoms.get_atomic_numbers() magmom_av = atoms.get_initial_magnetic_moments() self.check_atoms() # Generate new xc functional only when it is reset by set # XXX sounds like this should use the _changed_keywords dictionary. if self.hamiltonian is None or self.hamiltonian.xc is None: if isinstance(par.xc, str): xc = XC(par.xc) else: xc = par.xc else: xc = self.hamiltonian.xc mode = par.mode if mode == 'fd': mode = FD() elif mode == 'pw': mode = pw.PW() elif mode == 'lcao': mode = LCAO() else: assert hasattr(mode, 'name'), str(mode) if xc.orbital_dependent and mode.name == 'lcao': raise NotImplementedError('LCAO mode does not support ' 'orbital-dependent XC functionals.') if par.realspace is None: realspace = (mode.name != 'pw') else: realspace = par.realspace if mode.name == 'pw': assert not realspace if par.filter is None and mode.name != 'pw': gamma = 1.6 if par.gpts is not None: h = ((np.linalg.inv(cell_cv)**2).sum(0)**-0.5 / par.gpts).max() else: h = (par.h or 0.2) / Bohr def filter(rgd, rcut, f_r, l=0): gcut = np.pi / h - 2 / rcut / gamma f_r[:] = rgd.filter(f_r, rcut * gamma, gcut, l) else: filter = par.filter setups = Setups(Z_a, par.setups, par.basis, par.lmax, xc, filter, world) if magmom_av.ndim == 1: collinear = True magmom_av, magmom_a = np.zeros((natoms, 3)), magmom_av magmom_av[:, 2] = magmom_a else: collinear = False magnetic = magmom_av.any() spinpol = par.spinpol if par.hund: if natoms != 1: raise ValueError('hund=True arg only valid for single atoms!') spinpol = True magmom_av[0] = (0, 0, setups[0].get_hunds_rule_moment(par.charge)) if spinpol is None: spinpol = magnetic elif magnetic and not spinpol: raise ValueError('Non-zero initial magnetic moment for a ' + 'spin-paired calculation!') if collinear: nspins = 1 + int(spinpol) ncomp = 1 else: nspins = 1 ncomp = 2 if par.usesymm != 'default': warnings.warn('Use "symmetry" keyword instead of ' + '"usesymm" keyword') par.symmetry = usesymm2symmetry(par.usesymm) symm = par.symmetry if symm == 'off': symm = {'point_group': False, 'time_reversal': False} bzkpts_kc = kpts2ndarray(par.kpts, self.atoms) kd = KPointDescriptor(bzkpts_kc, nspins, collinear) m_av = magmom_av.round(decimals=3) # round off id_a = zip(setups.id_a, *m_av.T) symmetry = Symmetry(id_a, cell_cv, atoms.pbc, **symm) kd.set_symmetry(atoms, symmetry, comm=world) setups.set_symmetry(symmetry) if par.gpts is not None: N_c = np.array(par.gpts) else: h = par.h if h is not None: h /= Bohr N_c = get_number_of_grid_points(cell_cv, h, mode, realspace, kd.symmetry) symmetry.check_grid(N_c) width = par.width if width is None: if pbc_c.any(): width = 0.1 # eV else: width = 0.0 else: assert par.occupations is None if hasattr(self, 'time') or par.dtype == complex: dtype = complex else: if kd.gamma: dtype = float else: dtype = complex nao = setups.nao nvalence = setups.nvalence - par.charge M_v = magmom_av.sum(0) M = np.dot(M_v, M_v)**0.5 nbands = par.nbands orbital_free = any(setup.orbital_free for setup in setups) if orbital_free: nbands = 1 if isinstance(nbands, basestring): if nbands[-1] == '%': basebands = int(nvalence + M + 0.5) // 2 nbands = int((float(nbands[:-1]) / 100) * basebands) else: raise ValueError('Integer Expected: Only use a string ' 'if giving a percentage of occupied bands') if nbands is None: nbands = 0 for setup in setups: nbands_from_atom = setup.get_default_nbands() # Any obscure setup errors? if nbands_from_atom < -(-setup.Nv // 2): raise ValueError('Bad setup: This setup requests %d' ' bands but has %d electrons.' % (nbands_from_atom, setup.Nv)) nbands += nbands_from_atom nbands = min(nao, nbands) elif nbands > nao and mode.name == 'lcao': raise ValueError('Too many bands for LCAO calculation: ' '%d bands and only %d atomic orbitals!' % (nbands, nao)) if nvalence < 0: raise ValueError( 'Charge %f is not possible - not enough valence electrons' % par.charge) if nbands <= 0: nbands = int(nvalence + M + 0.5) // 2 + (-nbands) if nvalence > 2 * nbands and not orbital_free: raise ValueError('Too few bands! Electrons: %f, bands: %d' % (nvalence, nbands)) nbands *= ncomp if par.width is not None: self.text('**NOTE**: please start using ' 'occupations=FermiDirac(width).') if par.fixmom: self.text('**NOTE**: please start using ' 'occupations=FermiDirac(width, fixmagmom=True).') if self.occupations is None: if par.occupations is None: # Create object for occupation numbers: if orbital_free: width = 0.0 # even for PBC self.occupations = occupations.TFOccupations( width, par.fixmom) else: self.occupations = occupations.FermiDirac( width, par.fixmom) else: self.occupations = par.occupations # If occupation numbers are changed, and we have wave functions, # recalculate the occupation numbers if self.wfs is not None and not isinstance(self.wfs, EmptyWaveFunctions): self.occupations.calculate(self.wfs) self.occupations.magmom = M_v[2] cc = par.convergence if mode.name == 'lcao': niter_fixdensity = 0 else: niter_fixdensity = None if self.scf is None: force_crit = cc['forces'] if force_crit is not None: force_crit /= Hartree / Bohr self.scf = SCFLoop(cc['eigenstates'] / Hartree**2 * nvalence, cc['energy'] / Hartree * max(nvalence, 1), cc['density'] * nvalence, par.maxiter, par.fixdensity, niter_fixdensity, force_crit) parsize_kpt = par.parallel['kpt'] parsize_domain = par.parallel['domain'] parsize_bands = par.parallel['band'] if not realspace: pbc_c = np.ones(3, bool) if not self.wfs: if parsize_domain == 'domain only': # XXX this was silly! parsize_domain = world.size parallelization = mpi.Parallelization(world, nspins * kd.nibzkpts) ndomains = None if parsize_domain is not None: ndomains = np.prod(parsize_domain) if mode.name == 'pw': if ndomains > 1: raise ValueError('Planewave mode does not support ' 'domain decomposition.') ndomains = 1 parallelization.set(kpt=parsize_kpt, domain=ndomains, band=parsize_bands) comms = parallelization.build_communicators() domain_comm = comms['d'] kpt_comm = comms['k'] band_comm = comms['b'] kptband_comm = comms['D'] domainband_comm = comms['K'] self.comms = comms kd.set_communicator(kpt_comm) parstride_bands = par.parallel['stridebands'] # Unfortunately we need to remember that we adjusted the # number of bands so we can print a warning if it differs # from the number specified by the user. (The number can # be inferred from the input parameters, but it's tricky # because we allow negative numbers) self.nbands_parallelization_adjustment = -nbands % band_comm.size nbands += self.nbands_parallelization_adjustment # I would like to give the following error message, but apparently # there are cases, e.g. gpaw/test/gw_ppa.py, which involve # nbands > nao and are supposed to work that way. #if nbands > nao: # raise ValueError('Number of bands %d adjusted for band ' # 'parallelization %d exceeds number of atomic ' # 'orbitals %d. This problem can be fixed ' # 'by reducing the number of bands a bit.' # % (nbands, band_comm.size, nao)) bd = BandDescriptor(nbands, band_comm, parstride_bands) if (self.density is not None and self.density.gd.comm.size != domain_comm.size): # Domain decomposition has changed, so we need to # reinitialize density and hamiltonian: if par.fixdensity: raise RuntimeError( 'Density reinitialization conflict ' + 'with "fixdensity" - specify domain decomposition.') self.density = None self.hamiltonian = None # Construct grid descriptor for coarse grids for wave functions: gd = self.grid_descriptor_class(N_c, cell_cv, pbc_c, domain_comm, parsize_domain) # do k-point analysis here? XXX args = (gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, self.timer) if par.parallel['sl_auto']: # Choose scalapack parallelization automatically for key, val in par.parallel.items(): if (key.startswith('sl_') and key != 'sl_auto' and val is not None): raise ValueError("Cannot use 'sl_auto' together " "with '%s'" % key) max_scalapack_cpus = bd.comm.size * gd.comm.size nprow = max_scalapack_cpus npcol = 1 # Get a sort of reasonable number of columns/rows while npcol < nprow and nprow % 2 == 0: npcol *= 2 nprow //= 2 assert npcol * nprow == max_scalapack_cpus # ScaLAPACK creates trouble if there aren't at least a few # whole blocks; choose block size so there will always be # several blocks. This will crash for small test systems, # but so will ScaLAPACK in any case blocksize = min(-(-nbands // 4), 64) sl_default = (nprow, npcol, blocksize) else: sl_default = par.parallel['sl_default'] if mode.name == 'lcao': # Layouts used for general diagonalizer sl_lcao = par.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default lcaoksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, bd, domainband_comm, dtype, nao=nao, timer=self.timer) self.wfs = mode(collinear, lcaoksl, *args) elif mode.name == 'fd' or mode.name == 'pw': # buffer_size keyword only relevant for fdpw buffer_size = par.parallel['buffer_size'] # Layouts used for diagonalizer sl_diagonalize = par.parallel['sl_diagonalize'] if sl_diagonalize is None: sl_diagonalize = sl_default diagksl = get_KohnSham_layouts( sl_diagonalize, 'fd', # XXX # choice of key 'fd' not so nice gd, bd, domainband_comm, dtype, buffer_size=buffer_size, timer=self.timer) # Layouts used for orthonormalizer sl_inverse_cholesky = par.parallel['sl_inverse_cholesky'] if sl_inverse_cholesky is None: sl_inverse_cholesky = sl_default if sl_inverse_cholesky != sl_diagonalize: message = 'sl_inverse_cholesky != sl_diagonalize ' \ 'is not implemented.' raise NotImplementedError(message) orthoksl = get_KohnSham_layouts(sl_inverse_cholesky, 'fd', gd, bd, domainband_comm, dtype, buffer_size=buffer_size, timer=self.timer) # Use (at most) all available LCAO for initialization lcaonbands = min(nbands, nao) try: lcaobd = BandDescriptor(lcaonbands, band_comm, parstride_bands) except RuntimeError: initksl = None else: # Layouts used for general diagonalizer # (LCAO initialization) sl_lcao = par.parallel['sl_lcao'] if sl_lcao is None: sl_lcao = sl_default initksl = get_KohnSham_layouts(sl_lcao, 'lcao', gd, lcaobd, domainband_comm, dtype, nao=nao, timer=self.timer) if hasattr(self, 'time'): assert mode.name == 'fd' from gpaw.tddft import TimeDependentWaveFunctions self.wfs = TimeDependentWaveFunctions( par.stencils[0], diagksl, orthoksl, initksl, gd, nvalence, setups, bd, world, kd, kptband_comm, self.timer) elif mode.name == 'fd': self.wfs = mode(par.stencils[0], diagksl, orthoksl, initksl, *args) else: assert mode.name == 'pw' self.wfs = mode(diagksl, orthoksl, initksl, *args) else: self.wfs = mode(self, *args) else: self.wfs.set_setups(setups) if not self.wfs.eigensolver: # Number of bands to converge: nbands_converge = cc['bands'] if nbands_converge == 'all': nbands_converge = nbands elif nbands_converge != 'occupied': assert isinstance(nbands_converge, int) if nbands_converge < 0: nbands_converge += nbands eigensolver = get_eigensolver(par.eigensolver, mode, par.convergence) eigensolver.nbands_converge = nbands_converge # XXX Eigensolver class doesn't define an nbands_converge property if isinstance(xc, SIC): eigensolver.blocksize = 1 self.wfs.set_eigensolver(eigensolver) if self.density is None: gd = self.wfs.gd if par.stencils[1] != 9: # Construct grid descriptor for fine grids for densities # and potentials: finegd = gd.refine() else: # Special case (use only coarse grid): finegd = gd if realspace: self.density = RealSpaceDensity( gd, finegd, nspins, par.charge + setups.core_charge, collinear, par.stencils[1]) else: self.density = pw.ReciprocalSpaceDensity( gd, finegd, nspins, par.charge + setups.core_charge, collinear) self.density.initialize(setups, self.timer, magmom_av, par.hund) self.density.set_mixer(par.mixer) if self.hamiltonian is None: gd, finegd = self.density.gd, self.density.finegd if realspace: self.hamiltonian = RealSpaceHamiltonian( gd, finegd, nspins, setups, self.timer, xc, world, self.wfs.kptband_comm, par.external, collinear, par.poissonsolver, par.stencils[1]) else: self.hamiltonian = pw.ReciprocalSpaceHamiltonian( gd, finegd, self.density.pd2, self.density.pd3, nspins, setups, self.timer, xc, world, self.wfs.kptband_comm, par.external, collinear) xc.initialize(self.density, self.hamiltonian, self.wfs, self.occupations) self.text() self.print_memory_estimate(self.txt, maxdepth=memory_estimate_depth) self.txt.flush() self.timer.print_info(self) if dry_run: self.dry_run() if realspace and \ self.hamiltonian.poisson.get_description() == 'FDTD+TDDFT': self.hamiltonian.poisson.set_density(self.density) self.hamiltonian.poisson.print_messages(self.text) self.txt.flush() self.initialized = True self._changed_keywords.clear()
def diagonalize_full_hamiltonian(self, ham, atoms, occupations, txt, nbands=None, scalapack=None, expert=False): assert self.dtype == complex if nbands is None: nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size else: assert nbands <= self.pd.ngmin if expert: iu = nbands else: iu = None self.bd = bd = BandDescriptor(nbands, self.bd.comm) p = functools.partial(print, file=txt) p('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands)) p('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin, self.pd.ngmax)) mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2 p('Approximate memory usage per core: {0:.3f} MB'.format(mem)) if bd.comm.size > 1: if isinstance(scalapack, (list, tuple)): nprow, npcol, b = scalapack else: nprow = int(round(bd.comm.size**0.5)) while bd.comm.size % nprow != 0: nprow -= 1 npcol = bd.comm.size // nprow b = 64 p('ScaLapack grid: {0}x{1},'.format(nprow, npcol), 'block-size:', b) bg = BlacsGrid(bd.comm, bd.comm.size, 1) bg2 = BlacsGrid(bd.comm, nprow, npcol) scalapack = True else: nprow = npcol = 1 scalapack = False self.pt.set_positions(atoms.get_scaled_positions()) self.kpt_u[0].P_ani = None self.allocate_arrays_for_projections(self.pt.my_atom_indices) myslice = bd.get_slice() pb = ProgressBar(txt) nkpt = len(self.kpt_u) for u, kpt in enumerate(self.kpt_u): pb.update(u / nkpt) npw = len(self.pd.Q_qG[kpt.q]) if scalapack: mynpw = -(-npw // bd.comm.size) md = BlacsDescriptor(bg, npw, npw, mynpw, npw) md2 = BlacsDescriptor(bg2, npw, npw, b, b) else: md = md2 = MatrixDescriptor(npw, npw) with self.timer('Build H and S'): H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md) if scalapack: r = Redistributor(bd.comm, md, md2) H_GG = r.redistribute(H_GG) S_GG = r.redistribute(S_GG) psit_nG = md2.empty(dtype=complex) eps_n = np.empty(npw) with self.timer('Diagonalize'): if not scalapack: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, iu=iu) else: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n) del H_GG, S_GG kpt.eps_n = eps_n[myslice].copy() if scalapack: md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw) r = Redistributor(bd.comm, md2, md3) psit_nG = r.redistribute(psit_nG) kpt.psit_nG = psit_nG[:bd.mynbands].copy() del psit_nG with self.timer('Projections'): self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q) kpt.f_n = None pb.finish() occupations.calculate(self)
def diagonalize_full_hamiltonian(self, ham, atoms, occupations, log, nbands=None, ecut=None, scalapack=None, expert=False): if self.dtype != complex: raise ValueError('Your wavefunctions are not complex as ' 'required by the PW diagonalization routine.\n' 'Please supply GPAW(..., dtype=complex, ...) ' 'as an argument to the calculator to enforce ' 'complex wavefunctions.') if nbands is None and ecut is None: nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size elif nbands is None: ecut /= units.Hartree vol = abs(np.linalg.det(self.gd.cell_cv)) nbands = int(vol * ecut**1.5 * 2**0.5 / 3 / pi**2) else: assert nbands <= self.pd.ngmin if expert: iu = nbands else: iu = None self.bd = bd = BandDescriptor(nbands, self.bd.comm) log('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands)) log('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin, self.pd.ngmax)) mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2 log('Approximate memory usage per core: {0:.3f} MB'.format(mem)) if bd.comm.size > 1: if isinstance(scalapack, (list, tuple)): nprow, npcol, b = scalapack else: nprow = int(round(bd.comm.size**0.5)) while bd.comm.size % nprow != 0: nprow -= 1 npcol = bd.comm.size // nprow b = 64 log('ScaLapack grid: {0}x{1},'.format(nprow, npcol), 'block-size:', b) bg = BlacsGrid(bd.comm, bd.comm.size, 1) bg2 = BlacsGrid(bd.comm, nprow, npcol) scalapack = True else: nprow = npcol = 1 scalapack = False self.set_positions(atoms.get_scaled_positions()) self.kpt_u[0].P_ani = None self.allocate_arrays_for_projections(self.pt.my_atom_indices) myslice = bd.get_slice() pb = ProgressBar(log.fd) nkpt = len(self.kpt_u) for u, kpt in enumerate(self.kpt_u): pb.update(u / nkpt) npw = len(self.pd.Q_qG[kpt.q]) if scalapack: mynpw = -(-npw // bd.comm.size) md = BlacsDescriptor(bg, npw, npw, mynpw, npw) md2 = BlacsDescriptor(bg2, npw, npw, b, b) else: md = md2 = MatrixDescriptor(npw, npw) with self.timer('Build H and S'): H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md) if scalapack: r = Redistributor(bd.comm, md, md2) H_GG = r.redistribute(H_GG) S_GG = r.redistribute(S_GG) psit_nG = md2.empty(dtype=complex) eps_n = np.empty(npw) with self.timer('Diagonalize'): if not scalapack: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, iu=iu) else: md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n) del H_GG, S_GG kpt.eps_n = eps_n[myslice].copy() if scalapack: md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw) r = Redistributor(bd.comm, md2, md3) psit_nG = r.redistribute(psit_nG) kpt.psit_nG = psit_nG[:bd.mynbands].copy() del psit_nG with self.timer('Projections'): self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q) kpt.f_n = None pb.finish() occupations.calculate(self) return nbands