Python Transformer.estimate_memory Examples

Programming Language: Python

Namespace/Package Name: gpaw.transformers

Class/Type: Transformer

Method/Function: estimate_memory

Examples at hotexamples.com: 2

Python Transformer.estimate_memory - 2 examples found. These are the top rated real world Python examples of gpaw.transformers.Transformer.estimate_memory extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Transformer(30)

apply(15)

allocate(6)

estimate_memory(2)

Example #1

Show file

class Density:
    """Density object.
    
    Attributes:
     =============== =====================================================
     ``gd``          Grid descriptor for coarse grids.
     ``finegd``      Grid descriptor for fine grids.
     ``interpolate`` Function for interpolating the electron density.
     ``mixer``       ``DensityMixer`` object.
     =============== =====================================================

    Soft and smooth pseudo functions on uniform 3D grids:
     ========== =========================================
     ``nt_sG``  Electron density on the coarse grid.
     ``nt_sg``  Electron density on the fine grid.
     ``nt_g``   Electron density on the fine grid.
     ``rhot_g`` Charge density on the fine grid.
     ``nct_G``  Core electron-density on the coarse grid.
     ========== =========================================
    """
    
    def __init__(self, gd, finegd, nspins, charge):
        """Create the Density object."""

        self.gd = gd
        self.finegd = finegd
        self.nspins = nspins
        self.charge = float(charge)

        self.charge_eps = 1e-7
        
        self.D_asp = None
        self.Q_aL = None

        self.nct_G = None
        self.nt_sG = None
        self.rhot_g = None
        self.nt_sg = None
        self.nt_g = None

        self.rank_a = None

        self.mixer = BaseMixer()
        self.timer = nulltimer
        self.allocated = False
        
    def initialize(self, setups, stencil, timer, magmom_a, hund):
        self.timer = timer
        self.setups = setups
        self.hund = hund
        self.magmom_a = magmom_a
        
        # Interpolation function for the density:
        self.interpolator = Transformer(self.gd, self.finegd, stencil,
                                        allocate=False)
        
        spline_aj = []
        for setup in setups:
            if setup.nct is None:
                spline_aj.append([])
            else:
                spline_aj.append([setup.nct])
        self.nct = LFC(self.gd, spline_aj,
                       integral=[setup.Nct for setup in setups],
                       forces=True, cut=True)
        self.ghat = LFC(self.finegd, [setup.ghat_l for setup in setups],
                        integral=sqrt(4 * pi), forces=True)
        if self.allocated:
            self.allocated = False
            self.allocate()

    def allocate(self):
        assert not self.allocated
        self.interpolator.allocate()
        self.allocated = True

    def reset(self):
        # TODO: reset other parameters?
        self.nt_sG = None

    def set_positions(self, spos_ac, rank_a=None):
        if not self.allocated:
            self.allocate()
        self.nct.set_positions(spos_ac)
        self.ghat.set_positions(spos_ac)
        self.mixer.reset()

        self.nct_G = self.gd.zeros()
        self.nct.add(self.nct_G, 1.0 / self.nspins)
        #self.nt_sG = None
        self.nt_sg = None
        self.nt_g = None
        self.rhot_g = None
        self.Q_aL = None

        # If both old and new atomic ranks are present, start a blank dict if
        # it previously didn't exist but it will needed for the new atoms.
        if (self.rank_a is not None and rank_a is not None and
            self.D_asp is None and (rank_a == self.gd.comm.rank).any()):
            self.D_asp = {}

        if self.rank_a is not None and self.D_asp is not None:
            self.timer.start('Redistribute')
            requests = []
            flags = (self.rank_a != rank_a)
            my_incoming_atom_indices = np.argwhere(np.bitwise_and(flags, \
                rank_a == self.gd.comm.rank)).ravel()
            my_outgoing_atom_indices = np.argwhere(np.bitwise_and(flags, \
                self.rank_a == self.gd.comm.rank)).ravel()

            for a in my_incoming_atom_indices:
                # Get matrix from old domain:
                ni = self.setups[a].ni
                D_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                requests.append(self.gd.comm.receive(D_sp, self.rank_a[a],
                                                     tag=a, block=False))
                assert a not in self.D_asp
                self.D_asp[a] = D_sp

            for a in my_outgoing_atom_indices:
                # Send matrix to new domain:
                D_sp = self.D_asp.pop(a)
                requests.append(self.gd.comm.send(D_sp, rank_a[a],
                                                  tag=a, block=False))
            self.gd.comm.waitall(requests)
            self.timer.stop('Redistribute')

        self.rank_a = rank_a

    def calculate_pseudo_density(self, wfs):
        """Calculate nt_sG from scratch.

        nt_sG will be equal to nct_G plus the contribution from
        wfs.add_to_density().
        """
        wfs.calculate_density_contribution(self.nt_sG)
        self.nt_sG += self.nct_G

    def update(self, wfs):
        self.timer.start('Density')
        self.timer.start('Pseudo density')
        self.calculate_pseudo_density(wfs)
        self.timer.stop('Pseudo density')
        self.timer.start('Atomic density matrices')
        wfs.calculate_atomic_density_matrices(self.D_asp)
        self.timer.stop('Atomic density matrices')
        self.timer.start('Multipole moments')
        comp_charge = self.calculate_multipole_moments()
        self.timer.stop('Multipole moments')
        
        if isinstance(wfs, LCAOWaveFunctions):
            self.timer.start('Normalize')
            self.normalize(comp_charge)
            self.timer.stop('Normalize')

        self.timer.start('Mix')
        self.mix(comp_charge)
        self.timer.stop('Mix')
        self.timer.stop('Density')

    def normalize(self, comp_charge=None):
        """Normalize pseudo density."""
        if comp_charge is None:
            comp_charge = self.calculate_multipole_moments()
        
        pseudo_charge = self.gd.integrate(self.nt_sG).sum()

        if pseudo_charge + self.charge + comp_charge != 0:
            if pseudo_charge != 0:
                x = -(self.charge + comp_charge) / pseudo_charge
                self.nt_sG *= x
            else:
                # Use homogeneous background:
                self.nt_sG[:] = (self.charge + comp_charge) * self.gd.dv

    def calculate_pseudo_charge(self, comp_charge):
        self.nt_g = self.nt_sg.sum(axis=0)
        self.rhot_g = self.nt_g.copy()
        self.ghat.add(self.rhot_g, self.Q_aL)

        if debug:
            charge = self.finegd.integrate(self.rhot_g) + self.charge
            if abs(charge) > self.charge_eps:
                raise RuntimeError('Charge not conserved: excess=%.9f' %
                                   charge)

    def mix(self, comp_charge):
        if not self.mixer.mix_rho:
            self.mixer.mix(self)
            comp_charge = None
          
        self.interpolate(comp_charge)
        self.calculate_pseudo_charge(comp_charge)

        if self.mixer.mix_rho:
            self.mixer.mix(self)

    def interpolate(self, comp_charge=None):
        """Interpolate pseudo density to fine grid."""
        if comp_charge is None:
            comp_charge = self.calculate_multipole_moments()

        if self.nt_sg is None:
            self.nt_sg = self.finegd.empty(self.nspins)

        for s in range(self.nspins):
            self.interpolator.apply(self.nt_sG[s], self.nt_sg[s])

        # With periodic boundary conditions, the interpolation will
        # conserve the number of electrons.
        if not self.gd.pbc_c.all():
            # With zero-boundary conditions in one or more directions,
            # this is not the case.
            pseudo_charge = -(self.charge + comp_charge)
            if abs(pseudo_charge) > 1.0e-14:
                x = pseudo_charge / self.finegd.integrate(self.nt_sg).sum()
                self.nt_sg *= x

    def calculate_multipole_moments(self):
        """Calculate multipole moments of compensation charges.

        Returns the total compensation charge in units of electron
        charge, so the number will be negative because of the
        dominating contribution from the nuclear charge."""

        comp_charge = 0.0
        self.Q_aL = {}
        for a, D_sp in self.D_asp.items():
            Q_L = self.Q_aL[a] = np.dot(D_sp.sum(0), self.setups[a].Delta_pL)
            Q_L[0] += self.setups[a].Delta0
            comp_charge += Q_L[0]
        return self.gd.comm.sum(comp_charge) * sqrt(4 * pi)

    def initialize_from_atomic_densities(self, basis_functions):
        """Initialize D_asp, nt_sG and Q_aL from atomic densities.

        nt_sG is initialized from atomic orbitals, and will
        be constructed with the specified magnetic moments and
        obeying Hund's rules if ``hund`` is true."""

        # XXX does this work with blacs?  What should be distributed?
        # Apparently this doesn't use blacs at all, so it's serial
        # with respect to the blacs distribution.  That means it works
        # but is not particularly efficient (not that this is a time
        # consuming step)

        f_sM = np.empty((self.nspins, basis_functions.Mmax))
        self.D_asp = {}
        f_asi = {}
        for a in basis_functions.atom_indices:
            c = self.charge / len(self.setups)  # distribute on all atoms
            f_si = self.setups[a].calculate_initial_occupation_numbers(
                    self.magmom_a[a], self.hund, charge=c, nspins=self.nspins)
            if a in basis_functions.my_atom_indices:
                self.D_asp[a] = self.setups[a].initialize_density_matrix(f_si)
            f_asi[a] = f_si

        self.nt_sG = self.gd.zeros(self.nspins)
        basis_functions.add_to_density(self.nt_sG, f_asi)
        self.nt_sG += self.nct_G
        self.calculate_normalized_charges_and_mix()

    def initialize_from_wavefunctions(self, wfs):
        """Initialize D_asp, nt_sG and Q_aL from wave functions."""
        self.nt_sG = self.gd.empty(self.nspins)
        self.calculate_pseudo_density(wfs)
        self.D_asp = {}
        my_atom_indices = np.argwhere(wfs.rank_a == self.gd.comm.rank).ravel()
        for a in my_atom_indices:
            ni = self.setups[a].ni
            self.D_asp[a] = np.empty((self.nspins, ni * (ni + 1) // 2))
        wfs.calculate_atomic_density_matrices(self.D_asp)
        self.calculate_normalized_charges_and_mix()

    def initialize_directly_from_arrays(self, nt_sG, D_asp):
        """Set D_asp and nt_sG directly."""
        self.nt_sG = nt_sG
        self.D_asp = D_asp
        #self.calculate_normalized_charges_and_mix()
        # No calculate multipole moments?  Tests will fail because of
        # improperly initialized mixer

    def calculate_normalized_charges_and_mix(self):
        comp_charge = self.calculate_multipole_moments()
        self.normalize(comp_charge)
        self.mix(comp_charge)

    def set_mixer(self, mixer):
        if mixer is not None:
            if self.nspins == 1 and isinstance(mixer, MixerSum):
                raise RuntimeError('Cannot use MixerSum with nspins==1')
            self.mixer = mixer
        else:
            if self.gd.pbc_c.any():
                beta = 0.1
                weight = 50.0
            else:
                beta = 0.25
                weight = 1.0
                
            if self.nspins == 2:
                self.mixer = MixerSum(beta=beta, weight=weight)
            else:
                self.mixer = Mixer(beta=beta, weight=weight)

        self.mixer.initialize(self)
        
    def estimate_magnetic_moments(self):
        magmom_a = np.zeros_like(self.magmom_a)
        if self.nspins == 2:
            for a, D_sp in self.D_asp.items():
                magmom_a[a] = np.dot(D_sp[0] - D_sp[1], self.setups[a].N0_p)
            self.gd.comm.sum(magmom_a)
        return magmom_a

    def get_correction(self, a, spin):
        """Integrated atomic density correction.

        Get the integrated correction to the pseuso density relative to
        the all-electron density.
        """
        setup = self.setups[a]
        return sqrt(4 * pi) * (
            np.dot(self.D_asp[a][spin], setup.Delta_pL[:, 0])
            + setup.Delta0 / self.nspins)

    def get_density_array(self):
        XXX
        # XXX why not replace with get_spin_density and get_total_density?
        """Return pseudo-density array."""
        if self.nspins == 2:
            return self.nt_sG
        else:
            return self.nt_sG[0]
    
    def get_all_electron_density(self, atoms, gridrefinement=2):
        """Return real all-electron density array."""

        # Refinement of coarse grid, for representation of the AE-density
        if gridrefinement == 1:
            gd = self.gd
            n_sg = self.nt_sG.copy()
        elif gridrefinement == 2:
            gd = self.finegd
            if self.nt_sg is None:
                self.interpolate()
            n_sg = self.nt_sg.copy()
        elif gridrefinement == 4:
            # Extra fine grid
            gd = self.finegd.refine()
            
            # Interpolation function for the density:
            interpolator = Transformer(self.finegd, gd, 3)

            # Transfer the pseudo-density to the fine grid:
            n_sg = gd.empty(self.nspins)
            if self.nt_sg is None:
                self.interpolate()
            for s in range(self.nspins):
                interpolator.apply(self.nt_sg[s], n_sg[s])
        else:
            raise NotImplementedError

        # Add corrections to pseudo-density to get the AE-density
        splines = {}
        phi_aj = []
        phit_aj = []
        nc_a = []
        nct_a = []
        for a, id in enumerate(self.setups.id_a):
            if id in splines:
                phi_j, phit_j, nc, nct = splines[id]
            else:
                # Load splines:
                phi_j, phit_j, nc, nct = self.setups[a].get_partial_waves()[:4]
                splines[id] = (phi_j, phit_j, nc, nct)
            phi_aj.append(phi_j)
            phit_aj.append(phit_j)
            nc_a.append([nc])
            nct_a.append([nct])

        # Create localized functions from splines
        phi = LFC(gd, phi_aj)
        phit = LFC(gd, phit_aj)
        nc = LFC(gd, nc_a)
        nct = LFC(gd, nct_a)
        spos_ac = atoms.get_scaled_positions() % 1.0
        phi.set_positions(spos_ac)
        phit.set_positions(spos_ac)
        nc.set_positions(spos_ac)
        nct.set_positions(spos_ac)

        all_D_asp = []
        for a, setup in enumerate(self.setups):
            D_sp = self.D_asp.get(a)
            if D_sp is None:
                ni = setup.ni
                D_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
            if gd.comm.size > 1:
                gd.comm.broadcast(D_sp, self.rank_a[a])
            all_D_asp.append(D_sp)

        for s in range(self.nspins):
            I_a = np.zeros(len(atoms))
            nc.add1(n_sg[s], 1.0 / self.nspins, I_a)
            nct.add1(n_sg[s], -1.0 / self.nspins, I_a)
            phi.add2(n_sg[s], all_D_asp, s, 1.0, I_a)
            phit.add2(n_sg[s], all_D_asp, s, -1.0, I_a)
            for a, D_sp in self.D_asp.items():
                setup = self.setups[a]
                I_a[a] -= ((setup.Nc - setup.Nct) / self.nspins +
                           sqrt(4 * pi) *
                           np.dot(D_sp[s], setup.Delta_pL[:, 0]))
            gd.comm.sum(I_a)
            N_c = gd.N_c
            g_ac = np.around(N_c * spos_ac).astype(int) % N_c - gd.beg_c
            for I, g_c in zip(I_a, g_ac):
                if (g_c >= 0).all() and (g_c < gd.n_c).all():
                    n_sg[s][tuple(g_c)] -= I / gd.dv

        return n_sg, gd

    def new_get_all_electron_density(self, atoms, gridrefinement=2):
        """Return real all-electron density array."""

        # Refinement of coarse grid, for representation of the AE-density
        if gridrefinement == 1:
            gd = self.gd
            n_sg = self.nt_sG.copy()
        elif gridrefinement == 2:
            gd = self.finegd
            if self.nt_sg is None:
                self.interpolate()
            n_sg = self.nt_sg.copy()
        elif gridrefinement == 4:
            # Extra fine grid
            gd = self.finegd.refine()
            
            # Interpolation function for the density:
            interpolator = Transformer(self.finegd, gd, 3)

            # Transfer the pseudo-density to the fine grid:
            n_sg = gd.empty(self.nspins)
            if self.nt_sg is None:
                self.interpolate()
            for s in range(self.nspins):
                interpolator.apply(self.nt_sg[s], n_sg[s])
        else:
            raise NotImplementedError

        # Add corrections to pseudo-density to get the AE-density
        splines = {}
        phi_aj = []
        phit_aj = []
        nc_a = []
        nct_a = []
        for a, id in enumerate(self.setups.id_a):
            if id in splines:
                phi_j, phit_j, nc, nct = splines[id]
            else:
                # Load splines:
                phi_j, phit_j, nc, nct = self.setups[a].get_partial_waves()[:4]
                splines[id] = (phi_j, phit_j, nc, nct)
            phi_aj.append(phi_j)
            phit_aj.append(phit_j)
            nc_a.append([nc])
            nct_a.append([nct])

        # Create localized functions from splines
        phi = BasisFunctions(gd, phi_aj)
        phit = BasisFunctions(gd, phit_aj)
        nc = LFC(gd, nc_a)
        nct = LFC(gd, nct_a)
        spos_ac = atoms.get_scaled_positions() % 1.0
        phi.set_positions(spos_ac)
        phit.set_positions(spos_ac)
        nc.set_positions(spos_ac)
        nct.set_positions(spos_ac)

        I_sa = np.zeros((self.nspins, len(atoms)))
        a_W =  np.empty(len(phi.M_W), np.int32)
        W = 0
        for a in phi.atom_indices:
            nw = len(phi.sphere_a[a].M_w)
            a_W[W:W + nw] = a
            W += nw
        rho_MM = np.zeros((phi.Mmax, phi.Mmax))
        for s, I_a in enumerate(I_sa):
            M1 = 0
            for a, setup in enumerate(self.setups):
                ni = setup.ni
                D_sp = self.D_asp.get(a)
                if D_sp is None:
                    D_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                else:
                    I_a[a] = ((setup.Nct - setup.Nc) / self.nspins -
                              sqrt(4 * pi) *
                              np.dot(D_sp[s], setup.Delta_pL[:, 0]))
                if gd.comm.size > 1:
                    gd.comm.broadcast(D_sp, self.rank_a[a])
                M2 = M1 + ni
                rho_MM[M1:M2, M1:M2] = unpack2(D_sp[s])
                M1 = M2

            phi.lfc.ae_valence_density_correction(rho_MM, n_sg[s], a_W, I_a)
            phit.lfc.ae_valence_density_correction(-rho_MM, n_sg[s], a_W, I_a)

        a_W =  np.empty(len(nc.M_W), np.int32)
        W = 0
        for a in nc.atom_indices:
            nw = len(nc.sphere_a[a].M_w)
            a_W[W:W + nw] = a
            W += nw
        scale = 1.0 / self.nspins
        for s, I_a in enumerate(I_sa):
            nc.lfc.ae_core_density_correction(scale, n_sg[s], a_W, I_a)
            nct.lfc.ae_core_density_correction(-scale, n_sg[s], a_W, I_a)
            gd.comm.sum(I_a)
            N_c = gd.N_c
            g_ac = np.around(N_c * spos_ac).astype(int) % N_c - gd.beg_c
            for I, g_c in zip(I_a, g_ac):
                if (g_c >= 0).all() and (g_c < gd.n_c).all():
                    n_sg[s][tuple(g_c)] -= I / gd.dv
        return n_sg, gd

    if extra_parameters.get('usenewlfc', True):
        get_all_electron_density = new_get_all_electron_density
        
    def estimate_memory(self, mem):
        nspins = self.nspins
        nbytes = self.gd.bytecount()
        nfinebytes = self.finegd.bytecount()

        arrays = mem.subnode('Arrays')
        for name, size in [('nt_sG', nbytes * nspins),
                           ('nt_sg', nfinebytes * nspins),
                           ('nt_g', nfinebytes),
                           ('rhot_g', nfinebytes),
                           ('nct_G', nbytes)]:
            arrays.subnode(name, size)

        lfs = mem.subnode('Localized functions')
        for name, obj in [('nct', self.nct),
                          ('ghat', self.ghat)]:
            obj.estimate_memory(lfs.subnode(name))
        self.mixer.estimate_memory(mem.subnode('Mixer'), self.gd)

        # TODO
        # The implementation of interpolator memory use is not very
        # accurate; 20 MiB vs 13 MiB estimated in one example, probably
        # worse for parallel calculations.
        
        self.interpolator.estimate_memory(mem.subnode('Interpolator'))

    def get_spin_contamination(self, atoms, majority_spin=0):
        """Calculate the spin contamination.

        Spin contamination is defined as the integral over the
        spin density difference, where it is negative (i.e. the
        minority spin density is larger than the majority spin density.
        """

        if majority_spin == 0:
            smaj = 0
            smin = 1
        else:
            smaj = 1
            smin = 0
        nt_sg, gd = self.get_all_electron_density(atoms)
        dt_sg = nt_sg[smin] - nt_sg[smaj]
        dt_sg = np.where(dt_sg > 0, dt_sg, 0.0)
        return gd.integrate(dt_sg)

Example #2

Show file

File: hamiltonian.py Project: yihsuanliu/gpaw

class Hamiltonian:
    """Hamiltonian object.

    Attributes:
     =============== =====================================================
     ``xc``          ``XC3DGrid`` object.
     ``poisson``     ``PoissonSolver``.
     ``gd``          Grid descriptor for coarse grids.
     ``finegd``      Grid descriptor for fine grids.
     ``restrict``    Function for restricting the effective potential.
     =============== =====================================================

    Soft and smooth pseudo functions on uniform 3D grids:
     ========== =========================================
     ``vHt_g``  Hartree potential on the fine grid.
     ``vt_sG``  Effective potential on the coarse grid.
     ``vt_sg``  Effective potential on the fine grid.
     ========== =========================================

    Energy contributions and forces:

    =========== ==========================================
                Description
    =========== ==========================================
    ``Ekin``    Kinetic energy.
    ``Epot``    Potential energy.
    ``Etot``    Total energy.
    ``Exc``     Exchange-Correlation energy.
    ``Eext``    Energy of external potential
    ``Eref``    Reference energy for all-electron atoms.
    ``S``       Entropy.
    ``Ebar``    Should be close to zero!
    =========== ==========================================

    """

    def __init__(self, gd, finegd, nspins, setups, stencil, timer, xc,
                 psolver, vext_g):
        """Create the Hamiltonian."""
        self.gd = gd
        self.finegd = finegd
        self.nspins = nspins
        self.setups = setups
        self.timer = timer
        self.xc = xc
        
        # Solver for the Poisson equation:
        if psolver is None:
            psolver = PoissonSolver(nn=3, relax='J')
        self.poisson = psolver
        self.poisson.set_grid_descriptor(finegd)

        self.dH_asp = None

        # The external potential
        self.vext_g = vext_g

        self.vt_sG = None
        self.vHt_g = None
        self.vt_sg = None
        self.vbar_g = None

        self.rank_a = None

        # Restrictor function for the potential:
        self.restrictor = Transformer(self.finegd, self.gd, stencil,
                                      allocate=False)
        self.restrict = self.restrictor.apply

        self.vbar = LFC(self.finegd, [[setup.vbar] for setup in setups],
                        forces=True)

        self.Ekin0 = None
        self.Ekin = None
        self.Epot = None
        self.Ebar = None
        self.Eext = None
        self.Exc = None
        self.Etot = None
        self.S = None
        self.allocated = False

    def allocate(self):
        # TODO We should move most of the gd.empty() calls here
        assert not self.allocated
        self.restrictor.allocate()
        self.allocated = True

    def set_positions(self, spos_ac, rank_a=None):
        self.spos_ac = spos_ac
        if not self.allocated:
            self.allocate()
        self.vbar.set_positions(spos_ac)
        if self.vbar_g is None:
            self.vbar_g = self.finegd.empty()
        self.vbar_g[:] = 0.0
        self.vbar.add(self.vbar_g)

        self.xc.set_positions(spos_ac)
        
        # If both old and new atomic ranks are present, start a blank dict if
        # it previously didn't exist but it will needed for the new atoms.
        if (self.rank_a is not None and rank_a is not None and
            self.dH_asp is None and (rank_a == self.gd.comm.rank).any()):
            self.dH_asp = {}

        if self.rank_a is not None and self.dH_asp is not None:
            self.timer.start('Redistribute')
            requests = []
            flags = (self.rank_a != rank_a)
            my_incoming_atom_indices = np.argwhere(np.bitwise_and(flags, \
                rank_a == self.gd.comm.rank)).ravel()
            my_outgoing_atom_indices = np.argwhere(np.bitwise_and(flags, \
                self.rank_a == self.gd.comm.rank)).ravel()

            for a in my_incoming_atom_indices:
                # Get matrix from old domain:
                ni = self.setups[a].ni
                dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                requests.append(self.gd.comm.receive(dH_sp, self.rank_a[a],
                                                     tag=a, block=False))
                assert a not in self.dH_asp
                self.dH_asp[a] = dH_sp

            for a in my_outgoing_atom_indices:
                # Send matrix to new domain:
                dH_sp = self.dH_asp.pop(a)
                requests.append(self.gd.comm.send(dH_sp, rank_a[a],
                                                  tag=a, block=False))
            self.gd.comm.waitall(requests)
            self.timer.stop('Redistribute')

        self.rank_a = rank_a

    def aoom(self, DM, a, l, scale=1):
        """Atomic Orbital Occupation Matrix.
        
        Determine the Atomic Orbital Occupation Matrix (aoom) for a
        given l-quantum number.
        
        This operation, takes the density matrix (DM), which for
        example is given by unpack2(D_asq[i][spin]), and corrects for
        the overlap between the selected orbitals (l) upon which the
        the density is expanded (ex <p|p*>,<p|p>,<p*|p*> ).

        Returned is only the "corrected" part of the density matrix,
        which represents the orbital occupation matrix for l=2 this is
        a 5x5 matrix.
        """
        S=self.setups[a]
        l_j = S.l_j
        n_j = S.n_j
        lq  = S.lq
        nl  = np.where(np.equal(l_j, l))[0]
        V = np.zeros(np.shape(DM))
        if len(nl) == 2:
            aa = (nl[0])*len(l_j)-((nl[0]-1)*(nl[0])/2)
            bb = (nl[1])*len(l_j)-((nl[1]-1)*(nl[1])/2)
            ab = aa+nl[1]-nl[0]
            
            if(scale==0 or scale=='False' or scale =='false'):
                lq_a  = lq[aa]
                lq_ab = lq[ab]
                lq_b  = lq[bb]
            else:
                lq_a  = 1
                lq_ab = lq[ab]/lq[aa]
                lq_b  = lq[bb]/lq[aa]
 
            # and the correct entrances in the DM
            nn = (2*np.array(l_j)+1)[0:nl[0]].sum()
            mm = (2*np.array(l_j)+1)[0:nl[1]].sum()
            
            # finally correct and add the four submatrices of NC_DM
            A = DM[nn:nn+2*l+1,nn:nn+2*l+1]*(lq_a)
            B = DM[nn:nn+2*l+1,mm:mm+2*l+1]*(lq_ab)
            C = DM[mm:mm+2*l+1,nn:nn+2*l+1]*(lq_ab)
            D = DM[mm:mm+2*l+1,mm:mm+2*l+1]*(lq_b)
            
            V[nn:nn+2*l+1,nn:nn+2*l+1]=+(lq_a)
            V[nn:nn+2*l+1,mm:mm+2*l+1]=+(lq_ab)
            V[mm:mm+2*l+1,nn:nn+2*l+1]=+(lq_ab)
            V[mm:mm+2*l+1,mm:mm+2*l+1]=+(lq_b)
 
            return  A+B+C+D, V
        else:
            nn =(2*np.array(l_j)+1)[0:nl[0]].sum()
            A=DM[nn:nn+2*l+1,nn:nn+2*l+1]*lq[-1]
            V[nn:nn+2*l+1,nn:nn+2*l+1]=+lq[-1]
            return A,V

    def update(self, density):
        """Calculate effective potential.

        The XC-potential and the Hartree potential are evaluated on
        the fine grid, and the sum is then restricted to the coarse
        grid."""

        self.timer.start('Hamiltonian')

        if self.vt_sg is None:
            self.timer.start('Initialize Hamiltonian')
            self.vt_sg = self.finegd.empty(self.nspins)
            self.vHt_g = self.finegd.zeros()
            self.vt_sG = self.gd.empty(self.nspins)
            self.poisson.initialize()
            self.timer.stop('Initialize Hamiltonian')

        self.timer.start('vbar')
        Ebar = self.finegd.integrate(self.vbar_g, density.nt_g,
                                     global_integral=False)

        vt_g = self.vt_sg[0]
        vt_g[:] = self.vbar_g
        self.timer.stop('vbar')

        Eext = 0.0
        if self.vext_g is not None:
            vt_g += self.vext_g.get_potential(self.finegd)
            Eext = self.finegd.integrate(vt_g, density.nt_g,
                                         global_integral=False) - Ebar

        if self.nspins == 2:
            self.vt_sg[1] = vt_g

        self.timer.start('XC 3D grid')
        Exc = self.xc.calculate(self.finegd, density.nt_sg, self.vt_sg)
        Exc /= self.gd.comm.size
        self.timer.stop('XC 3D grid')

        self.timer.start('Poisson')
        # npoisson is the number of iterations:
        self.npoisson = self.poisson.solve(self.vHt_g, density.rhot_g,
                                           charge=-density.charge)
        self.timer.stop('Poisson')

        self.timer.start('Hartree integrate/restrict')
        Epot = 0.5 * self.finegd.integrate(self.vHt_g, density.rhot_g,
                                           global_integral=False)
        Ekin = 0.0
        for vt_g, vt_G, nt_G in zip(self.vt_sg, self.vt_sG, density.nt_sG):
            vt_g += self.vHt_g
            self.restrict(vt_g, vt_G)
            Ekin -= self.gd.integrate(vt_G, nt_G - density.nct_G,
                                      global_integral=False)
        self.timer.stop('Hartree integrate/restrict')
            
        # Calculate atomic hamiltonians:
        self.timer.start('Atomic')
        W_aL = {}
        for a in density.D_asp:
            W_aL[a] = np.empty((self.setups[a].lmax + 1)**2)
        density.ghat.integrate(self.vHt_g, W_aL)
        self.dH_asp = {}
        for a, D_sp in density.D_asp.items():
            W_L = W_aL[a]
            setup = self.setups[a]

            D_p = D_sp.sum(0)
            dH_p = (setup.K_p + setup.M_p +
                    setup.MB_p + 2.0 * np.dot(setup.M_pp, D_p) +
                    np.dot(setup.Delta_pL, W_L))
            Ekin += np.dot(setup.K_p, D_p) + setup.Kc
            Ebar += setup.MB + np.dot(setup.MB_p, D_p)
            Epot += setup.M + np.dot(D_p, (setup.M_p +
                                           np.dot(setup.M_pp, D_p)))

            if self.vext_g is not None:
                vext = self.vext_g.get_taylor(spos_c=self.spos_ac[a, :])
                # Tailor expansion to the zeroth order
                Eext += vext[0][0] * (sqrt(4 * pi) * density.Q_aL[a][0]
                                      + setup.Z)
                dH_p += vext[0][0] * sqrt(4 * pi) * setup.Delta_pL[:, 0]
                if len(vext) > 1:
                    # Tailor expansion to the first order
                    Eext += sqrt(4 * pi / 3) * np.dot(vext[1],
                                                      density.Q_aL[a][1:4])
                    # there must be a better way XXXX
                    Delta_p1 = np.array([setup.Delta_pL[:, 1],
                                          setup.Delta_pL[:, 2],
                                          setup.Delta_pL[:, 3]])
                    dH_p += sqrt(4 * pi / 3) * np.dot(vext[1], Delta_p1)

            self.dH_asp[a] = dH_sp = np.zeros_like(D_sp)
            self.timer.start('XC Correction')
            Exc += setup.xc_correction.calculate(self.xc, D_sp, dH_sp, a)
            self.timer.stop('XC Correction')

            if setup.HubU is not None:
                nspins = len(D_sp)
                
                l_j = setup.l_j
                l   = setup.Hubl
                nl  = np.where(np.equal(l_j,l))[0]
                nn  = (2*np.array(l_j)+1)[0:nl[0]].sum()
                
                for D_p, H_p in zip(D_sp, self.dH_asp[a]):
                    [N_mm,V] =self.aoom(unpack2(D_p),a,l)
                    N_mm = N_mm / 2 * nspins
                     
                    Eorb = setup.HubU / 2. * (N_mm - np.dot(N_mm,N_mm)).trace()
                    Vorb = setup.HubU * (0.5 * np.eye(2*l+1) - N_mm)
                    Exc += Eorb
                    if nspins == 1:
                        # add contribution of other spin manyfold
                        Exc += Eorb
                    
                    if len(nl)==2:
                        mm  = (2*np.array(l_j)+1)[0:nl[1]].sum()
                        
                        V[nn:nn+2*l+1,nn:nn+2*l+1] *= Vorb
                        V[mm:mm+2*l+1,nn:nn+2*l+1] *= Vorb
                        V[nn:nn+2*l+1,mm:mm+2*l+1] *= Vorb
                        V[mm:mm+2*l+1,mm:mm+2*l+1] *= Vorb
                    else:
                        V[nn:nn+2*l+1,nn:nn+2*l+1] *= Vorb
                    
                    Htemp = unpack(H_p)
                    Htemp += V
                    H_p[:] = pack2(Htemp)

            dH_sp += dH_p

            Ekin -= (D_sp * dH_sp).sum()

        self.timer.stop('Atomic')

        # Make corrections due to non-local xc:
        #xcfunc = self.xc.xcfunc
        self.Enlxc = 0.0#XXXxcfunc.get_non_local_energy()
        Ekin += self.xc.get_kinetic_energy_correction() / self.gd.comm.size

        energies = np.array([Ekin, Epot, Ebar, Eext, Exc])
        self.timer.start('Communicate energies')
        self.gd.comm.sum(energies)
        self.timer.stop('Communicate energies')
        (self.Ekin0, self.Epot, self.Ebar, self.Eext, self.Exc) = energies

        #self.Exc += self.Enlxc
        #self.Ekin0 += self.Enlkin

        self.timer.stop('Hamiltonian')

    def get_energy(self, occupations):
        self.Ekin = self.Ekin0 + occupations.e_band
        self.S = occupations.e_entropy

        # Total free energy:
        self.Etot = (self.Ekin + self.Epot + self.Eext +
                     self.Ebar + self.Exc - self.S)

        return self.Etot

    def apply_local_potential(self, psit_nG, Htpsit_nG, s):
        """Apply the Hamiltonian operator to a set of vectors.

        XXX Parameter description is deprecated!
        
        Parameters:

        a_nG: ndarray
            Set of vectors to which the overlap operator is applied.
        b_nG: ndarray, output
            Resulting H times a_nG vectors.
        kpt: KPoint object
            k-point object defined in kpoint.py.
        calculate_projections: bool
            When True, the integrals of projector times vectors
            P_ni = <p_i | a_nG> are calculated.
            When False, existing P_uni are used
        local_part_only: bool
            When True, the non-local atomic parts of the Hamiltonian
            are not applied and calculate_projections is ignored.
        
        """
        vt_G = self.vt_sG[s]
        if psit_nG.ndim == 3:
            Htpsit_nG += psit_nG * vt_G
        else:
            for psit_G, Htpsit_G in zip(psit_nG, Htpsit_nG):
                Htpsit_G += psit_G * vt_G

    def apply(self, a_xG, b_xG, wfs, kpt, calculate_P_ani=True):
        """Apply the Hamiltonian operator to a set of vectors.

        Parameters:

        a_nG: ndarray
            Set of vectors to which the overlap operator is applied.
        b_nG: ndarray, output
            Resulting S times a_nG vectors.
        wfs: WaveFunctions
            Wave-function object defined in wavefunctions.py
        kpt: KPoint object
            k-point object defined in kpoint.py.
        calculate_P_ani: bool
            When True, the integrals of projector times vectors
            P_ni = <p_i | a_nG> are calculated.
            When False, existing P_ani are used
        
        """

        wfs.kin.apply(a_xG, b_xG, kpt.phase_cd)
        self.apply_local_potential(a_xG, b_xG, kpt.s)
        shape = a_xG.shape[:-3]
        P_axi = wfs.pt.dict(shape)

        if calculate_P_ani: #TODO calculate_P_ani=False is experimental
            wfs.pt.integrate(a_xG, P_axi, kpt.q)
        else:
            for a, P_ni in kpt.P_ani.items():
                P_axi[a][:] = P_ni

        for a, P_xi in P_axi.items():
            dH_ii = unpack(self.dH_asp[a][kpt.s])
            P_axi[a] = np.dot(P_xi, dH_ii)
        wfs.pt.add(b_xG, P_axi, kpt.q)

    def get_xc_difference(self, xc, density):
        """Calculate non-selfconsistent XC-energy difference."""
        if density.nt_sg is None:
            density.interpolate()
        nt_sg = density.nt_sg
        if hasattr(xc, 'hybrid'):
            xc.calculate_exx()
        Exc = xc.calculate(density.finegd, nt_sg) / self.gd.comm.size
        for a, D_sp in density.D_asp.items():
            setup = self.setups[a]
            Exc += setup.xc_correction.calculate(xc, D_sp)
        Exc = self.gd.comm.sum(Exc)
        return Exc - self.Exc

    def get_vxc(self, density, wfs):
        """Calculate matrix elements of the xc-potential."""
        dtype = wfs.dtype
        nbands = wfs.nbands
        nu = len(wfs.kpt_u)
        if density.nt_sg is None:
            density.interpolate()

        # Allocate space for result matrix
        Vxc_unn = np.empty((nu, nbands, nbands), dtype=dtype)

        # Get pseudo xc potential on the coarse grid
        Vxct_sG = self.gd.empty(self.nspins)
        Vxct_sg = self.finegd.zeros(self.nspins)
        if nspins == 1:
            self.xc.get_energy_and_potential(density.nt_sg[0], Vxct_sg[0])
        else:
            self.xc.get_energy_and_potential(density.nt_sg[0], Vxct_sg[0],
                                             density.nt_sg[1], Vxct_sg[1])
        for Vxct_G, Vxct_g in zip(Vxct_sG, Vxct_sg):
            self.restrict(Vxct_g, Vxct_G)
        del Vxct_sg

        # Get atomic corrections to the xc potential
        Vxc_asp = {}
        for a, D_sp in density.D_asp.items():
            Vxc_asp[a] = np.zeros_like(D_sp)
            self.setups[a].xc_correction.calculate_energy_and_derivatives(
                D_sp, Vxc_asp[a])

        # Project potential onto the eigenstates
        for kpt, Vxc_nn in xip(wfs.kpt_u, Vxc_unn):
            s, q = kpt.s, kpt.q
            psit_nG = kpt.psit_nG

            # Project pseudo part
            r2k(.5 * self.gd.dv, psit_nG, Vxct_sG[s] * psit_nG, 0.0, Vxc_nn)
            tri2full(Vxc_nn, 'L')
            self.gd.comm.sum(Vxc_nn)

            # Add atomic corrections
            # H_ij = \int dr phi_i(r) Ĥ phi_j^*(r)
            # P_ni = \int dr psi_n(r) pt_i^*(r)
            # Vxc_nm = \int dr phi_n(r) vxc(r) phi_m^*(r)
            #      + sum_ij P_ni H_ij P_mj^*
            for a, P_ni in kpt.P_ani.items():
                Vxc_ii = unpack(Vxc_asp[a][s])
                Vxc_nn += np.dot(P_ni, np.inner(H_ii, P_ni).conj())
        return Vxc_unn

    def estimate_memory(self, mem):
        nbytes = self.gd.bytecount()
        nfinebytes = self.finegd.bytecount()
        arrays = mem.subnode('Arrays', 0)
        arrays.subnode('vHt_g', nfinebytes)
        arrays.subnode('vt_sG', self.nspins * nbytes)
        arrays.subnode('vt_sg', self.nspins * nfinebytes)
        self.restrictor.estimate_memory(mem.subnode('Restrictor'))
        self.xc.estimate_memory(mem.subnode('XC'))
        self.poisson.estimate_memory(mem.subnode('Poisson'))
        self.vbar.estimate_memory(mem.subnode('vbar'))