def __init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, buffer_size=None, timer=nulltimer): BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, timer) self.buffer_size = buffer_size nbands = bd.nbands self.mynbands = mynbands = bd.mynbands self.blocksize = blocksize # 1D layout - columns self.columngrid = BlacsGrid(self.column_comm, 1, bd.comm.size) self.Nndescriptor = self.columngrid.new_descriptor(nbands, nbands, nbands, mynbands) # 2D layout self.nndescriptor = self.blockgrid.new_descriptor(nbands, nbands, blocksize, blocksize) # 1D layout - rows self.rowgrid = BlacsGrid(self.column_comm, bd.comm.size, 1) self.nNdescriptor = self.rowgrid.new_descriptor(nbands, nbands, mynbands, nbands) # Only redistribute filled out half for Hermitian matrices self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor, self.nndescriptor) #self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor, # self.nndescriptor, 'L') #XXX faster but... # Resulting matrix will be used in dgemm which is symmetry obvlious self.nn2nN = Redistributor(self.block_comm, self.nndescriptor, self.nNdescriptor)
def __init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, nao, timer=nulltimer): BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus, blocksize, timer) nbands = bd.nbands self.blocksize = blocksize self.mynbands = mynbands = bd.mynbands self.orbital_comm = self.bd.comm self.naoblocksize = naoblocksize = -((-nao) // self.orbital_comm.size) self.nao = nao # Range of basis functions for BLACS distribution of matrices: self.Mmax = nao self.Mstart = bd.comm.rank * naoblocksize self.Mstop = min(self.Mstart + naoblocksize, self.Mmax) self.mynao = self.Mstop - self.Mstart # Column layout for one matrix per band rank: self.columngrid = BlacsGrid(bd.comm, bd.comm.size, 1) self.mMdescriptor = self.columngrid.new_descriptor(nao, nao, naoblocksize, nao) self.nMdescriptor = self.columngrid.new_descriptor(nbands, nao, mynbands, nao) #parallelprint(world, (mynao, self.mMdescriptor.shape)) # Column layout for one matrix in total (only on grid masters): self.single_column_grid = BlacsGrid(self.column_comm, bd.comm.size, 1) self.mM_unique_descriptor = self.single_column_grid.new_descriptor( \ nao, nao, naoblocksize, nao) # nM_unique_descriptor is meant to hold the coefficients after # diagonalization. BLACS requires it to be nao-by-nao, but # we only fill meaningful data into the first nbands columns. # # The array will then be trimmed and broadcast across # the grid descriptor's communicator. self.nM_unique_descriptor = self.single_column_grid.new_descriptor( \ nbands, nao, mynbands, nao) # Fully blocked grid for diagonalization with many CPUs: self.mmdescriptor = self.blockgrid.new_descriptor(nao, nao, blocksize, blocksize) #self.nMdescriptor = nMdescriptor self.mM2mm = Redistributor(self.block_comm, self.mM_unique_descriptor, self.mmdescriptor) self.mm2nM = Redistributor(self.block_comm, self.mmdescriptor, self.nM_unique_descriptor)
def calculate_blocked_density_matrix(self, f_n, C_nM): nbands = self.bd.nbands mynbands = self.bd.mynbands nao = self.nao dtype = C_nM.dtype self.nMdescriptor.checkassert(C_nM) if self.gd.rank == 0: Cf_nM = (C_nM * f_n[:, None]).conj() else: C_nM = self.nM_unique_descriptor.zeros(dtype=dtype) Cf_nM = self.nM_unique_descriptor.zeros(dtype=dtype) r = Redistributor(self.block_comm, self.nM_unique_descriptor, self.mmdescriptor) Cf_mm = self.mmdescriptor.zeros(dtype=dtype) r.redistribute(Cf_nM, Cf_mm, nbands, nao) del Cf_nM C_mm = self.mmdescriptor.zeros(dtype=dtype) r.redistribute(C_nM, C_mm, nbands, nao) # no use to delete C_nM as it's in the input... rho_mm = self.mmdescriptor.zeros(dtype=dtype) pblas_simple_gemm(self.mmdescriptor, self.mmdescriptor, self.mmdescriptor, Cf_mm, C_mm, rho_mm, transa='T') return rho_mm
def distribute_to_columns(self, rho_mm, srcdescriptor): redistributor = Redistributor(self.block_comm, # XXX srcdescriptor, self.mM_unique_descriptor) rho_mM = redistributor.redistribute(rho_mm) if self.gd.rank != 0: rho_mM = self.mMdescriptor.zeros(dtype=rho_mm.dtype) self.gd.comm.broadcast(rho_mM, 0) return rho_mM
def calculate_density_matrix(self, f_n, C_nM, rho_mM=None): """Calculate density matrix from occupations and coefficients. Presently this function performs the usual scalapack 3-step trick: redistribute-numbercrunching-backdistribute. Notes on future performance improvement. As per the current framework, C_nM exists as copies on each domain, i.e. this is not parallel over domains. We'd like to correct this and have an efficient distribution using e.g. the block communicator. The diagonalization routine and other parts of the code should however be changed to accommodate the following scheme: Keep coefficients in C_mm form after the diagonalization. rho_mm can then be directly calculated from C_mm without redistribution, after which we only need to redistribute rho_mm across domains. """ dtype = C_nM.dtype rho_mm = self.calculate_blocked_density_matrix(f_n, C_nM) rback = Redistributor(self.block_comm, self.mmdescriptor, self.mM_unique_descriptor) rho1_mM = self.mM_unique_descriptor.zeros(dtype=dtype) rback.redistribute(rho_mm, rho1_mM) del rho_mm if rho_mM is None: if self.gd.rank == 0: rho_mM = rho1_mM else: rho_mM = self.mMdescriptor.zeros(dtype=dtype) self.gd.comm.broadcast(rho_mM, 0) return rho_mM