Python maxrss Examples, gpaw.utilities.memory.maxrss Python Examples

Example #1

0

Show file

    def update(self, x):
        """Update progress-bar (0 <= x <= 1)."""
        if x == 0 or self.done:
            return

        if self.tty:
            N = 35
        elif self.nobar:
            N = 10
        else:
            N = 40

        n = int(N * x)
        t = time() - self.t
        t_dt = self.format_time(t)
        est = self.format_time(t / x)
        p = functools.partial(print, file=self.fd)

        if self.tty:
            bar = '-' * (n - 1) + self.symbols[int(t % len(self.symbols))]
            p(('\r{0} / {1} ({2:.0f}%) |{3:' + str(N) + '}| ').format(
                t_dt, est, x * 100, bar),
              end='')
            print(' {0:.0f} MB/core'.format(maxrss() / 1024**2), end='')
            if x == 1:
                p()
                self.done = True
            self.fd.flush()
        elif self.nobar:
            if self.n is None:
                p(('Started: {0:.0f} MB/core').format(maxrss() / 1024**2))
                self.n = 0
            if n > self.n:
                p(('{0} of {1} ({2:.0f}%) {3:.0f} MB/core').format(
                    t_dt, est, x * 100,
                    maxrss() / 1024**2))
                self.fd.flush()
                self.n = n
            if x == 1:
                p('Finished in {0}'.format(t_dt))
                self.fd.flush()
                self.done = True
        else:
            if self.n is None:
                p('{0}s |'.format(t / x), end='')
                self.n = 0
            if n > self.n:
                p('-' * (n - self.n), end='')
                self.fd.flush()
                self.n = n
            if x == 1:
                p('| Time: {0:.3f}s'.format(t))
                self.fd.flush()
                self.done = True

Example #2

0

Show file

File: progressbar.py Project: ryancoleman/lotsofcoresbook2code

    def update(self, x):
        """Update progress-bar (0 <= x <= 1)."""
        if x == 0 or self.done:
            return

        if self.tty:
            N = 35
        elif self.nobar:
            N = 10
        else:
            N = 40

        n = int(N * x)
        t = time() - self.t
        t_dt = self.format_time(t)
        est = self.format_time(t / x)
        p = functools.partial(print, file=self.fd)

        if self.tty:
            bar = '-' * (n - 1) + self.symbols[int(t % len(self.symbols))]
            p(('\r{0} / {1} ({2:.0f}%) |{3:' + str(N) + '}| ')
              .format(t_dt, est, x * 100, bar), end='')
            print(' {0:.0f} MB/core'.format(maxrss() / 1024**2), end='')
            if x == 1:
                p()
                self.done = True
            self.fd.flush()
        elif self.nobar:
            if self.n is None:
                p(('Started: {0:.0f} MB/core')
                  .format(maxrss() / 1024**2))
                self.n = 0
            if n > self.n:
                p(('{0} of {1} ({2:.0f}%) {3:.0f} MB/core')
                  .format(t_dt, est, x * 100, maxrss() / 1024**2))
                self.fd.flush()
                self.n = n
            if x == 1:
                p('Finished in {0}'.format(t_dt))
                self.fd.flush()
                self.done = True
        else:
            if self.n is None:
                p('{0}s |'.format(t / x), end='')
                self.n = 0
            if n > self.n:
                p('-' * (n - self.n), end='')
                self.fd.flush()
                self.n = n
            if x == 1:
                p('| Time: {0:.3f}s'.format(t))
                self.fd.flush()
                self.done = True

Example #3

0

Show file

File: paw.py Project: Huaguiyuan/gpawDFT

    def print_memory_estimate(self, log=None, maxdepth=-1):
        """Print estimated memory usage for PAW object and components.

        maxdepth is the maximum nesting level of displayed components.

        The PAW object must be initialize()'d, but needs not have large
        arrays allocated."""
        # NOTE.  This should work with --dry-run=N
        #
        # However, the initial overhead estimate is wrong if this method
        # is called within a real mpirun/gpaw-python context.
        if log is None:
            log = self.log
        log('Memory estimate:')

        mem_init = maxrss()  # initial overhead includes part of Hamiltonian!
        log('  Process memory now: %.2f MiB' % (mem_init / 1024.0**2))

        mem = MemNode('Calculator', 0)
        mem.indent = '  '
        try:
            self.estimate_memory(mem)
        except AttributeError as m:
            log('Attribute error: %r' % m)
            log('Some object probably lacks estimate_memory() method')
            log('Memory breakdown may be incomplete')
        mem.calculate_size()
        mem.write(log.fd, maxdepth=maxdepth, depth=1)
        log()

Example #4

0

Show file

File: paw.py Project: robwarm/gpaw-symm

    def print_memory_estimate(self, txt=None, maxdepth=-1):
        """Print estimated memory usage for PAW object and components.

        maxdepth is the maximum nesting level of displayed components.

        The PAW object must be initialize()'d, but needs not have large
        arrays allocated."""
        # NOTE.  This should work with --dry-run=N
        #
        # However, the initial overhead estimate is wrong if this method
        # is called within a real mpirun/gpaw-python context.
        if txt is None:
            txt = self.txt
        txt.write('Memory estimate\n')
        txt.write('---------------\n')

        mem_init = maxrss()  # initial overhead includes part of Hamiltonian!
        txt.write('Process memory now: %.2f MiB\n' % (mem_init / 1024.0**2))

        mem = MemNode('Calculator', 0)
        try:
            self.estimate_memory(mem)
        except AttributeError, m:
            txt.write('Attribute error: %r' % m)
            txt.write('Some object probably lacks estimate_memory() method')
            txt.write('Memory breakdown may be incomplete')

Example #5

0

Show file

File: paw.py Project: qsnake/gpaw

 def estimate_memory(self, mem):
     """Estimate memory use of this object."""
     mem_init = maxrss() # XXX initial overhead includes part of Hamiltonian
     mem.subnode('Initial overhead', mem_init)
     for name, obj in [('Density', self.density),
                       ('Hamiltonian', self.hamiltonian),
                       ('Wavefunctions', self.wfs),
                       ]:
         obj.estimate_memory(mem.subnode(name))

Example #6

0

Show file

    def print_chi(self, pd):
        calc = self.calc
        gd = calc.wfs.gd

        if extra_parameters.get('df_dry_run'):
            from gpaw.mpi import DryRunCommunicator
            size = extra_parameters['df_dry_run']
            world = DryRunCommunicator(size)
        else:
            world = self.world

        q_c = pd.kd.bzk_kc[0]
        nw = len(self.omega_w)
        ecut = self.ecut * Hartree
        ns = calc.wfs.nspins
        nbands = self.nbands
        nk = calc.wfs.kd.nbzkpts
        nik = calc.wfs.kd.nibzkpts
        ngmax = pd.ngmax
        eta = self.eta * Hartree
        wsize = world.size
        knsize = self.kncomm.size
        nocc = self.nocc1
        npocc = self.nocc2
        ngridpoints = gd.N_c[0] * gd.N_c[1] * gd.N_c[2]
        nstat = (ns * npocc + world.size - 1) // world.size
        occsize = nstat * ngridpoints * 16. / 1024**2
        bsize = self.blockcomm.size
        chisize = nw * pd.ngmax**2 * 16. / 1024**2 / bsize

        p = partial(print, file=self.fd)

        p('%s' % ctime())
        p('Called response.chi0.calculate with')
        p('    q_c: [%f, %f, %f]' % (q_c[0], q_c[1], q_c[2]))
        p('    Number of frequency points: %d' % nw)
        p('    Planewave cutoff: %f' % ecut)
        p('    Number of spins: %d' % ns)
        p('    Number of bands: %d' % nbands)
        p('    Number of kpoints: %d' % nk)
        p('    Number of irredicible kpoints: %d' % nik)
        p('    Number of planewaves: %d' % ngmax)
        p('    Broadening (eta): %f' % eta)
        p('    world.size: %d' % wsize)
        p('    kncomm.size: %d' % knsize)
        p('    blockcomm.size: %d' % bsize)
        p('    Number of completely occupied states: %d' % nocc)
        p('    Number of partially occupied states: %d' % npocc)
        p()
        p('    Memory estimate of potentially large arrays:')
        p('        chi0_wGG: %f M / cpu' % chisize)
        p('        Occupied states: %f M / cpu' % occsize)
        p('        Memory usage before allocation: %f M / cpu' %
          (maxrss() / 1024**2))
        p()

Example #7

0

Show file

File: output.py Project: Xu-Kai/lotsofcoresbook2code

    def __del__(self):
        """Destructor:  Write timing output before closing."""
        if not dry_run:
            mr = maxrss()
            if mr > 0:
                if mr < 1024.0**3:
                    self.text('Memory usage: %.2f MiB' % (mr / 1024.0**2))
                else:
                    self.text('Memory usage: %.2f GiB' % (mr / 1024.0**3))

            self.timer.write(self.txt)

Example #8

0

Show file

File: output.py Project: ryancoleman/lotsofcoresbook2code

    def __del__(self):
        """Destructor:  Write timing output before closing."""
        if not dry_run:
            mr = maxrss()
            if mr > 0:
                if mr < 1024.0**3:
                    self.text('Memory usage: %.2f MiB' % (mr / 1024.0**2))
                else:
                    self.text('Memory usage: %.2f GiB' % (mr / 1024.0**3))

            self.timer.write(self.txt)

Example #9

0

Show file

File: output.py Project: yihsuanliu/gpaw

    def __del__(self):
        """Destructor:  Write timing output before closing."""
        if not hasattr(self, 'txt') or self.txt is None:
            return
        
        if not dry_run:
            mr = maxrss()
            if mr > 0:
                if mr < 1024.0**3:
                    self.text('Memory usage: %.2f MB' % (mr / 1024.0**2))
                else:
                    self.text('Memory usage: %.2f GB' % (mr / 1024.0**3))

            self.timer.write(self.txt)

Example #10

0

Show file

File: chi0.py Project: robwarm/gpaw-symm

    def print_chi(self, pd):
        calc = self.calc
        gd = calc.wfs.gd

        ns = calc.wfs.nspins
        nk = calc.wfs.kd.nbzkpts
        nb = self.nocc2

        if extra_parameters.get("df_dry_run"):
            from gpaw.mpi import DryRunCommunicator

            size = extra_parameters["df_dry_run"]
            world = DryRunCommunicator(size)
        else:
            world = self.world

        nw = len(self.omega_w)
        q_c = pd.kd.bzk_kc[0]
        nstat = (ns * nk * nb + world.size - 1) // world.size

        print("%s" % ctime(), file=self.fd)
        print("Called response.chi0.calculate with", file=self.fd)
        print("    q_c: [%f, %f, %f]" % (q_c[0], q_c[1], q_c[2]), file=self.fd)
        print("    Number of frequency points   : %d" % nw, file=self.fd)
        print("    Planewave cutoff: %f" % (self.ecut * Hartree), file=self.fd)
        print("    Number of spins: %d" % ns, file=self.fd)
        print("    Number of bands: %d" % self.nbands, file=self.fd)
        print("    Number of kpoints: %d" % nk, file=self.fd)
        print("    Number of planewaves: %d" % pd.ngmax, file=self.fd)
        print("    Broadening (eta): %f" % (self.eta * Hartree), file=self.fd)
        print("    Keep occupied states: %s" % self.keep_occupied_states, file=self.fd)

        print("", file=self.fd)
        print("    Related to parallelization", file=self.fd)
        print("        world.size: %d" % world.size, file=self.fd)
        print("        Number of completely occupied states: %d" % self.nocc1, file=self.fd)
        print("        Number of partially occupied states: %d" % self.nocc2, file=self.fd)
        print("        Number of terms handled in chi-sum by each rank: %d" % nstat, file=self.fd)

        print("", file=self.fd)
        print("    Memory estimate:", file=self.fd)
        print("        chi0_wGG: %f M / cpu" % (nw * pd.ngmax ** 2 * 16.0 / 1024 ** 2), file=self.fd)
        print(
            "        Occupied states: %f M / cpu" % (nstat * gd.N_c[0] * gd.N_c[1] * gd.N_c[2] * 16.0 / 1024 ** 2),
            file=self.fd,
        )
        print("        Max mem sofar   : %f M / cpu" % (maxrss() / 1024 ** 2), file=self.fd)

        print("", file=self.fd)

Example #11

0

Show file

    def __del__(self):
        """Destructor:  Write timing output before closing."""
        if dry_run:
            return

        try:
            mr = maxrss()
        except (LookupError, TypeError, NameError):
            # Thing can get weird during interpreter shutdown ...
            mr = 0

        if mr > 0:
            if mr < 1024**3:
                self('Memory usage: %.2f MiB' % (mr / 1024**2))
            else:
                self('Memory usage: %.2f GiB' % (mr / 1024**3))

        self('Date: ' + time.asctime())

Example #12

0

Show file

File: maxrss.py Project: eojons/gpaw-scme

    for i in range(mno):
        a.append(np.ones((msize, msize), dtype=dtype))

    # must return and store the matrix to allocate memory
    return a, mmemory, mno

# max memory to be used for allocation matrices in MiB (2**20 B)
maxmems = [256] * 5 # exceed 1GiB to get resource.getrusage problem

msize = 256 # matrix dimensions

a = []

# initial memory
# Peak resident set size ("high water mark") (in bytes)
mem0 = maxrss()
mem = mem0

for n, maxmem in enumerate(maxmems):
    try:
        a1, mmemory, mno = allocate_matrix(maxmem, msize, dtype)
        # store the matrix
        a.append(a1)

        # memory used
        newmem = maxrss()
        memused = newmem - mem

        mset = 'Matrix set No ' + str(n) + ':'
        if verbose:
            print mset,

Example #13

0

Show file

    def initialize(self):

        self.printtxt('')
        self.printtxt('-----------------------------------------')
        self.printtxt('Response function calculation started at:')
        self.starttime = time()
        self.printtxt(ctime())

        BASECHI.initialize(self)

        # Frequency init
        self.dw = None
        if len(self.w_w) == 1:
            self.HilberTrans = False

        if self.hilbert_trans:
            self.dw = self.w_w[1] - self.w_w[0]
            assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) <
                    1e-10).all()  # make sure its linear w grid
            assert self.w_w.max() == self.w_w[-1]

            self.dw /= Hartree
            self.w_w /= Hartree
            self.wmax = self.w_w[-1]
            self.wcut = self.wmax + 5. / Hartree
            self.Nw = int(self.wmax / self.dw) + 1
            self.NwS = int(self.wcut / self.dw) + 1
        else:
            self.Nw = len(self.w_w)
            self.NwS = 0
            if len(self.w_w) > 1:
                self.dw = self.w_w[1] - self.w_w[0]
                assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all()
                self.dw /= Hartree

        if self.hilbert_trans:
            # for band parallelization.
            for n in range(self.nbands):
                if (self.f_kn[:, n] - self.ftol < 0).all():
                    self.nvalbands = n
                    break
        else:
            # if not hilbert transform, all the bands should be used.
            self.nvalbands = self.nbands

        # Parallelization initialize
        self.parallel_init()

        # Printing calculation information
        self.print_chi()

        if extra_parameters.get('df_dry_run'):
            raise SystemExit

        calc = self.calc

        # For LCAO wfs
        if calc.input_parameters['mode'] == 'lcao':
            calc.initialize_positions()
        self.printtxt('     GS calculator   : %f M / cpu' %
                      (maxrss() / 1024**2))
        # PAW part init
        # calculate <phi_i | e**(-i(q+G).r) | phi_j>
        # G != 0 part
        self.get_phi_aGp()

        # Calculate ALDA kernel for EELS spectrum
        # Use RPA kernel for Optical spectrum and rpa correlation energy
        if not self.optical_limit and np.dtype(self.w_w[0]) == float:
            R_av = calc.atoms.positions / Bohr
            self.Kxc_GG = calculate_Kxc(
                self.gd,  # global grid
                calc.density.nt_sG,
                self.npw,
                self.Gvec_Gc,
                self.nG,
                self.vol,
                self.bcell_cv,
                R_av,
                calc.wfs.setups,
                calc.density.D_asp)

            self.printtxt('Finished ALDA kernel ! ')
        else:
            self.Kxc_GG = np.zeros((self.npw, self.npw))
            self.printtxt('Use RPA for optical spectrum ! ')
            self.printtxt('')

        return

Example #14

0

Show file

File: maxrss.py Project: Xu-Kai/lotsofcoresbook2code

    for i in range(mno):
        a.append(np.ones((msize, msize), dtype=dtype))

    # must return and store the matrix to allocate memory
    return a, mmemory, mno

# max memory to be used for allocation matrices in MiB (2**20 B)
maxmems = [256] * 5 # exceed 1GiB to get resource.getrusage problem

msize = 256 # matrix dimensions

a = []

# initial memory
# Peak resident set size ("high water mark") (in bytes)
mem0 = maxrss()
mem = mem0

for n, maxmem in enumerate(maxmems):
    try:
        a1, mmemory, mno = allocate_matrix(maxmem, msize, dtype)
        # store the matrix
        a.append(a1)

        # memory used
        newmem = maxrss()
        memused = newmem - mem

        mset = 'Matrix set No ' + str(n) + ':'
        if verbose:
            print(mset, end=' ')

Example #15

0

Show file

File: chi.py Project: qsnake/gpaw

    def initialize(self):

        self.printtxt('')
        self.printtxt('-----------------------------------------')
        self.printtxt('Response function calculation started at:')
        self.starttime = time()
        self.printtxt(ctime())

        BASECHI.initialize(self)

        # Frequency init
        self.dw = None
        if len(self.w_w) == 1:
            self.HilberTrans = False

        if self.hilbert_trans:
            self.dw = self.w_w[1] - self.w_w[0]
            assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid
            assert self.w_w.max() == self.w_w[-1]
            
            self.dw /= Hartree
            self.w_w  /= Hartree
            self.wmax = self.w_w[-1] 
            self.wcut = self.wmax + 5. / Hartree
            self.Nw  = int(self.wmax / self.dw) + 1
            self.NwS = int(self.wcut / self.dw) + 1
        else:
            self.Nw = len(self.w_w)
            self.NwS = 0
            if len(self.w_w) > 1:
                self.dw = self.w_w[1] - self.w_w[0]
                assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all()
                self.dw /= Hartree
                
        if self.hilbert_trans:
            # for band parallelization.
            for n in range(self.nbands):
                if (self.f_kn[:, n] - self.ftol < 0).all():
                    self.nvalbands = n
                    break
        else:
            # if not hilbert transform, all the bands should be used.
            self.nvalbands = self.nbands

        # Parallelization initialize
        self.parallel_init()

        # Printing calculation information
        self.print_chi()

        if extra_parameters.get('df_dry_run'):
            raise SystemExit

        calc = self.calc

        # For LCAO wfs
        if calc.input_parameters['mode'] == 'lcao':
            calc.initialize_positions()        
        self.printtxt('     GS calculator   : %f M / cpu' %(maxrss() / 1024**2))
        # PAW part init
        # calculate <phi_i | e**(-i(q+G).r) | phi_j>
        # G != 0 part
        self.get_phi_aGp()

        # Calculate ALDA kernel for EELS spectrum
        # Use RPA kernel for Optical spectrum and rpa correlation energy
        if not self.optical_limit and np.dtype(self.w_w[0]) == float:
            R_av = calc.atoms.positions / Bohr
            self.Kxc_GG = calculate_Kxc(self.gd, # global grid
                                    calc.density.nt_sG,
                                    self.npw, self.Gvec_Gc,
                                    self.nG, self.vol,
                                    self.bcell_cv, R_av,
                                    calc.wfs.setups,
                                    calc.density.D_asp)

            self.printtxt('Finished ALDA kernel ! ')
        else:
            self.Kxc_GG = np.zeros((self.npw, self.npw))
            self.printtxt('Use RPA for optical spectrum ! ')
            self.printtxt('')
            
        return

Example #16

0

Show file

File: chi.py Project: ryancoleman/lotsofcoresbook2code

    def initialize(self, simple_version=False):

        self.printtxt("")
        self.printtxt("-----------------------------------------")
        self.printtxt("Response function calculation started at:")
        self.starttime = time()
        self.printtxt(ctime())

        BASECHI.initialize(self)

        # Frequency init
        self.dw = None
        if len(self.w_w) == 1:
            self.hilbert_trans = False

        if self.hilbert_trans:
            self.dw = self.w_w[1] - self.w_w[0]
            #            assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid
            assert self.w_w.max() == self.w_w[-1]

            self.dw /= Hartree
            self.w_w /= Hartree
            self.wmax = self.w_w[-1]
            self.wcut = self.wmax + 5.0 / Hartree
            #            self.Nw  = int(self.wmax / self.dw) + 1
            self.Nw = len(self.w_w)
            self.NwS = int(self.wcut / self.dw) + 1
        else:
            self.Nw = len(self.w_w)
            self.NwS = 0
            if len(self.w_w) > 2:
                self.dw = self.w_w[1] - self.w_w[0]
                assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all()
                self.dw /= Hartree

        self.nvalbands = self.nbands
        tmpn = np.zeros(self.nspins, dtype=int)
        for spin in range(self.nspins):
            for n in range(self.nbands):
                if (self.f_skn[spin][:, n] - self.ftol < 0).all():
                    tmpn[spin] = n
                    break
        if tmpn.max() > 0:
            self.nvalbands = tmpn.max()

        # Parallelization initialize
        self.parallel_init()

        # Printing calculation information
        self.print_chi()

        if extra_parameters.get("df_dry_run"):
            raise SystemExit

        calc = self.calc

        # For LCAO wfs
        if calc.input_parameters["mode"] == "lcao":
            calc.initialize_positions()
        self.printtxt("     Max mem sofar   : %f M / cpu" % (maxrss() / 1024 ** 2))

        if simple_version is True:
            return
        # PAW part init
        # calculate <phi_i | e**(-i(q+G).r) | phi_j>
        # G != 0 part
        self.phi_aGp, self.phiG0_avp = self.get_phi_aGp(alldir=True)
        self.printtxt("Finished phi_aGp !")
        mem = np.array([self.phi_aGp[i].size * 16 / 1024.0 ** 2 for i in range(len(self.phi_aGp))])
        self.printtxt("     Phi_aGp         : %f M / cpu" % (mem.sum()))

        # Calculate ALDA kernel (not used in chi0)
        R_av = calc.atoms.positions / Bohr
        if self.xc == "RPA":  # type(self.w_w[0]) is float:
            self.Kc_GG = None
            self.printtxt("RPA calculation.")
        elif self.xc == "ALDA" or self.xc == "ALDA_X":
            # self.Kc_GG = calculate_Kc(self.q_c,
            #                          self.Gvec_Gc,
            #                          self.acell_cv,
            #                          self.bcell_cv,
            #                          self.calc.atoms.pbc,
            #                          self.vcut)
            # Initialize a CoulombKernel instance
            kernel = CoulombKernel(vcut=self.vcut, pbc=self.calc.atoms.pbc, cell=self.acell_cv)
            self.Kc_GG = kernel.calculate_Kc(self.q_c, self.Gvec_Gc, self.bcell_cv)

            self.Kxc_sGG = calculate_Kxc(
                self.gd,  # global grid
                self.gd.zero_pad(calc.density.nt_sG),
                self.npw,
                self.Gvec_Gc,
                self.gd.N_c,
                self.vol,
                self.bcell_cv,
                R_av,
                calc.wfs.setups,
                calc.density.D_asp,
                functional=self.xc,
                density_cut=self.density_cut,
            )

            self.printtxt("Finished %s kernel ! " % self.xc)

        return

Example #17

0

Show file

File: chi0.py Project: robwarm/gpaw-symm

    def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii, m1, m2, spins):
        wfs = self.calc.wfs

        if self.keep_occupied_states:
            self.mykpts = [self.get_k_point(s, K, n1, n2) for s, K, n1, n2 in self.mysKn1n2]

        numberofkpts = len(self.mysKn1n2)

        if self.eta == 0.0:
            update = self.update_hermitian
        elif self.hilbert:
            update = self.update_hilbert
        else:
            update = self.update

        q_c = pd.kd.bzk_kc[0]
        optical_limit = np.allclose(q_c, 0.0)
        print("\n    Starting summation", file=self.fd)

        self.timer.start("Loop")
        # kpt1 occupied and kpt2 empty:
        for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2):
            if self.keep_occupied_states:
                kpt1 = self.mykpts[kn]
            else:
                kpt1 = self.get_k_point(s, K, n1, n2)

            if not kpt1.s in spins:
                continue

            with self.timer("k+q"):
                K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0]
            with self.timer("get k2"):
                kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True)
            with self.timer("fft-indices"):
                Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd, kpt1.shift_c - kpt2.shift_c)

            for n in range(kpt1.n2 - kpt1.n1):
                eps1 = kpt1.eps_n[n]

                # Only update if there exists deps <= omegamax
                if not self.omegamax is None:
                    m = [m for m, d in enumerate(eps1 - kpt2.eps_n) if abs(d) <= self.omegamax]
                else:
                    m = range(len(kpt2.eps_n))

                if not len(m):
                    continue

                deps_m = (eps1 - kpt2.eps_n)[m]
                f1 = kpt1.f_n[n]
                with self.timer("conj"):
                    ut1cc_R = kpt1.ut_nR[n].conj()
                with self.timer("paw"):
                    C1_aGi = [np.dot(Q_Gii, P1_ni[n].conj()) for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani)]
                n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2, pd, Q_G)[m]
                df_m = (f1 - kpt2.f_n)[m]

                # This is not quite right for degenerate partially occupied
                # bands, but good enough for now:
                df_m[df_m < 0] = 0.0

                if optical_limit:
                    self.update_optical_limit(n, m, kpt1, kpt2, deps_m, df_m, n_mG, chi0_wxvG, chi0_wvv)

                update(n_mG, deps_m, df_m, chi0_wGG)

            if optical_limit and self.intraband:
                # Avoid that more ranks are summing up
                # the intraband contributions
                if kpt1.n1 == 0:
                    self.update_intraband(kpt2, chi0_wvv)

            if numberofkpts > 10 and kn % (numberofkpts // 10) == 0:
                print(
                    "    %s," % ctime()
                    + " local Kpoint no: %d / %d," % (kn, numberofkpts)
                    + "\n        mem. used.: "
                    + "%f M / cpu" % (maxrss() / 1024 ** 2),
                    file=self.fd,
                )

        self.timer.stop("Loop")

        print(
            "    %s, Finished kpoint sum" % ctime() + "\n        mem. used.: " + "%f M / cpu" % (maxrss() / 1024 ** 2),
            file=self.fd,
        )

        with self.timer("Sum CHI_0"):
            for chi0_GG in chi0_wGG:
                self.kncomm.sum(chi0_GG)

            if optical_limit:
                self.world.sum(chi0_wxvG)
                self.world.sum(chi0_wvv)
                if self.intraband:
                    self.world.sum(self.chi0_vv)

        print(
            "    %s, Finished summation over ranks" % ctime()
            + "\n        mem. used.: "
            + "%f M / cpu" % (maxrss() / 1024 ** 2),
            file=self.fd,
        )

        if self.eta == 0.0 or (self.hilbert and self.blockcomm.size == 1):
            # Fill in upper/lower triangle also:
            nG = pd.ngmax
            il = np.tril_indices(nG, -1)
            iu = il[::-1]
            if self.hilbert:
                for chi0_GG in chi0_wGG:
                    chi0_GG[il] = chi0_GG[iu].conj()
            else:
                for chi0_GG in chi0_wGG:
                    chi0_GG[iu] = chi0_GG[il].conj()

        if self.hilbert:
            with self.timer("Hilbert transform"):
                ht = HilbertTransform(self.omega_w, self.eta, self.timeordered)
                ht(chi0_wGG)
                if optical_limit:
                    ht(chi0_wvv)
                    ht(chi0_wxvG)
            print("Hilbert transform done", file=self.fd)

        if optical_limit and self.intraband:  # Add intraband contribution
            omega_w = self.omega_w.copy()
            if omega_w[0] == 0.0:
                omega_w[0] = 1e-14

            chi0_wvv += self.chi0_vv[np.newaxis] / (
                omega_w[:, np.newaxis, np.newaxis] * (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta)
            )

        return pd, chi0_wGG, chi0_wxvG, chi0_wvv

Example #18

0

Show file

File: chi.py Project: Xu-Kai/lotsofcoresbook2code

    def initialize(self, simple_version=False):

        self.printtxt('')
        self.printtxt('-----------------------------------------')
        self.printtxt('Response function calculation started at:')
        self.starttime = time()
        self.printtxt(ctime())

        BASECHI.initialize(self)

        # Frequency init
        self.dw = None
        if len(self.w_w) == 1:
            self.hilbert_trans = False

        if self.hilbert_trans:
            self.dw = self.w_w[1] - self.w_w[0]
#            assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all() # make sure its linear w grid
            assert self.w_w.max() == self.w_w[-1]
            
            self.dw /= Hartree
            self.w_w /= Hartree
            self.wmax = self.w_w[-1] 
            self.wcut = self.wmax + 5. / Hartree
#            self.Nw  = int(self.wmax / self.dw) + 1
            self.Nw = len(self.w_w)
            self.NwS = int(self.wcut / self.dw) + 1
        else:
            self.Nw = len(self.w_w)
            self.NwS = 0
            if len(self.w_w) > 2:
                self.dw = self.w_w[1] - self.w_w[0]
                assert ((self.w_w[1:] - self.w_w[:-1] - self.dw) < 1e-10).all()
                self.dw /= Hartree

        self.nvalbands = self.nbands
        tmpn = np.zeros(self.nspins, dtype=int)
        for spin in range(self.nspins):
            for n in range(self.nbands):
                if (self.f_skn[spin][:, n] - self.ftol < 0).all():
                    tmpn[spin] = n
                    break
        if tmpn.max() > 0:
            self.nvalbands = tmpn.max()

        # Parallelization initialize
        self.parallel_init()

        # Printing calculation information
        self.print_chi()

        if extra_parameters.get('df_dry_run'):
            raise SystemExit

        calc = self.calc

        # For LCAO wfs
        if calc.input_parameters['mode'] == 'lcao':
            calc.initialize_positions()        
        self.printtxt('     Max mem sofar   : %f M / cpu' %(maxrss() / 1024**2))

        if simple_version is True:
            return
        # PAW part init
        # calculate <phi_i | e**(-i(q+G).r) | phi_j>
        # G != 0 part
        self.phi_aGp, self.phiG0_avp = self.get_phi_aGp(alldir=True)
        self.printtxt('Finished phi_aGp !')
        mem = np.array([self.phi_aGp[i].size * 16 /1024.**2 for i in range(len(self.phi_aGp))])
        self.printtxt('     Phi_aGp         : %f M / cpu' %(mem.sum()))

        # Calculate ALDA kernel (not used in chi0)
        R_av = calc.atoms.positions / Bohr
        if self.xc == 'RPA': #type(self.w_w[0]) is float:
            self.Kc_GG = None
            self.printtxt('RPA calculation.')
        elif self.xc == 'ALDA' or self.xc == 'ALDA_X':
            #self.Kc_GG = calculate_Kc(self.q_c,
            #                          self.Gvec_Gc,
            #                          self.acell_cv,
            #                          self.bcell_cv,
            #                          self.calc.atoms.pbc,
            #                          self.vcut)
            # Initialize a CoulombKernel instance
            kernel = CoulombKernel(vcut=self.vcut,
                                   pbc=self.calc.atoms.pbc,
                                   cell=self.acell_cv)
            self.Kc_GG = kernel.calculate_Kc(self.q_c,
                                             self.Gvec_Gc,
                                             self.bcell_cv)
            
            self.Kxc_sGG = calculate_Kxc(self.gd, # global grid
                                         self.gd.zero_pad(calc.density.nt_sG),
                                         self.npw, self.Gvec_Gc,
                                         self.gd.N_c, self.vol,
                                         self.bcell_cv, R_av,
                                         calc.wfs.setups,
                                         calc.density.D_asp,
                                         functional=self.xc,
                                         density_cut=self.density_cut)
            
            self.printtxt('Finished %s kernel ! ' % self.xc)
                
        return

Example #19

0

Show file

File: chi0.py Project: Xu-Kai/lotsofcoresbook2code

    def print_chi(self, pd):
        calc = self.calc
        gd = calc.wfs.gd

        if extra_parameters.get('df_dry_run'):
            from gpaw.mpi import DryRunCommunicator
            size = extra_parameters['df_dry_run']
            world = DryRunCommunicator(size)
        else:
            world = self.world

        print('%s' % ctime(), file=self.fd)
        print('Called response.chi0.calculate with', file=self.fd)

        q_c = pd.kd.bzk_kc[0]
        print('    q_c: [%f, %f, %f]' % (q_c[0], q_c[1], q_c[2]), file=self.fd)

        nw = len(self.omega_w)
        print('    Number of frequency points: %d' % nw, file=self.fd)

        ecut = self.ecut * Hartree
        print('    Planewave cutoff: %f' % ecut, file=self.fd)

        ns = calc.wfs.nspins
        print('    Number of spins: %d' % ns, file=self.fd)

        nbands = self.nbands
        print('    Number of bands: %d' % nbands, file=self.fd)

        nk = calc.wfs.kd.nbzkpts
        print('    Number of kpoints: %d' % nk, file=self.fd)

        nik = calc.wfs.kd.nibzkpts
        print('    Number of irredicible kpoints: %d' % nik, file=self.fd)

        ngmax = pd.ngmax
        print('    Number of planewaves: %d' % ngmax, file=self.fd)

        eta = self.eta * Hartree
        print('    Broadening (eta): %f' % eta, file=self.fd)

        wsize = world.size
        print('    world.size: %d' % wsize, file=self.fd)

        knsize = self.kncomm.size
        print('    kncomm.size: %d' % knsize, file=self.fd)

        bsize = self.blockcomm.size
        print('    blockcomm.size: %d' % bsize, file=self.fd)

        nocc = self.nocc1
        print('    Number of completely occupied states: %d' % nocc,
              file=self.fd)

        npocc = self.nocc2
        print('    Number of partially occupied states: %d' % npocc,
              file=self.fd)

        keep = self.keep_occupied_states
        print('    Keep occupied states: %s' % keep, file=self.fd)

        print('', file=self.fd)
        print('    Memory estimate of potentially large arrays:', file=self.fd)

        chisize = nw * pd.ngmax**2 * 16. / 1024**2
        print('        chi0_wGG: %f M / cpu' % chisize, file=self.fd)

        ngridpoints = gd.N_c[0] * gd.N_c[1] * gd.N_c[2]

        if self.keep_occupied_states:
            nstat = (ns * nk * npocc + world.size - 1) // world.size
        else:
            nstat = (ns * npocc + world.size - 1) // world.size

        occsize = nstat * ngridpoints * 16. / 1024**2
        print('        Occupied states: %f M / cpu' % occsize, file=self.fd)

        print('        Memory usage before allocation: %f M / cpu' %
              (maxrss() / 1024**2),
              file=self.fd)

        print('', file=self.fd)

Example #20

0

Show file

File: chi0.py Project: Huaguiyuan/gpawDFT

    def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, m1, m2, spins):
        # Choose which update method to use
        if self.eta == 0.0:
            update = self.update_hermitian
        elif self.hilbert:
            update = self.update_hilbert
        else:
            update = self.update

        q_c = pd.kd.bzk_kc[0]
        optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0)
        generator = self.generate_pair_densities

        # Use symmetries
        PWSA = PWSymmetryAnalyzer
        PWSA = PWSA(self.calc.wfs.kd, pd,
                    disable_point_group=self.disable_point_group,
                    disable_time_reversal=self.disable_time_reversal,
                    timer=self.timer, txt=self.fd)

        # If chi's are supplied it
        # is assumed that they are symmetric
        # and we have to divide by the number of
        # symmetries if we are adding
        # the unsymmetric chi
        if self.unsymmetrized:
            nsym = PWSA.how_many_symmetries()
            if nsym > 1:
                chi0_wGG /= nsym
                if chi0_wxvG is not None:
                    chi0_wxvG /= nsym
                if chi0_wvv is not None:
                    chi0_wvv /= nsym

        # Calculate unsymmetrized chi or spectral function
        self.timer.start('Loop')
        for f2_m, df_m, deps_m, n_mG, n_mv, vel_mv in \
            generator(pd, m1, m2, spins, PWSA=PWSA,
                      disable_optical_limit=not optical_limit,
                      intraband=self.intraband,
                      use_more_memory=self.use_more_memory,
                      unsymmetrized=self.unsymmetrized):
            # If the generator returns None for a pair-density
            # then skip updating
            if n_mG is not None:
                update(np.ascontiguousarray(n_mG), deps_m, df_m, chi0_wGG)
            if optical_limit and n_mv is not None:
                self.update_optical_limit(n_mv, deps_m, df_m,
                                          n_mG, chi0_wxvG, chi0_wvv)
            if optical_limit and self.intraband and vel_mv is not None:
                self.update_intraband(f2_m, vel_mv, self.chi0_vv)
        self.timer.stop('Loop')

        # Sum chi
        with self.timer('Sum CHI_0'):
            for chi0_GG in chi0_wGG:
                self.kncomm.sum(chi0_GG)

            if optical_limit:
                self.kncomm.sum(chi0_wxvG)
                self.kncomm.sum(chi0_wvv)
                if self.intraband:
                    self.kncomm.sum(self.chi0_vv)

        print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2),
              file=self.fd)

        if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1:
            # Fill in upper/lower triangle also:
            nG = pd.ngmax
            il = np.tril_indices(nG, -1)
            iu = il[::-1]
            if self.hilbert:
                for chi0_GG in chi0_wGG:
                    chi0_GG[il] = chi0_GG[iu].conj()
            else:
                for chi0_GG in chi0_wGG:
                    chi0_GG[iu] = chi0_GG[il].conj()

        if self.hilbert:
            with self.timer('Hilbert transform'):
                ht = HilbertTransform(self.omega_w, self.eta,
                                      self.timeordered)
                ht(chi0_wGG)
                if optical_limit:
                    ht(chi0_wvv)
                    ht(chi0_wxvG)
            print('Hilbert transform done', file=self.fd)

        if optical_limit and self.intraband:  # Add intraband contribution
            omega_w = self.omega_w.copy()
            if omega_w[0] == 0.0:
                omega_w[0] = 1e-14

            chi0_vv = self.chi0_vv
            self.world.broadcast(chi0_vv, 0)

            chi0_wvv += (chi0_vv[np.newaxis] /
                         (omega_w[:, np.newaxis, np.newaxis]
                          + 1j * self.eta)**2)

        if self.unsymmetrized:
            # Carry out symmetrization
            # Redistribute if block par
            tmpchi0_wGG = self.redistribute(chi0_wGG)
            PWSA.symmetrize_wGG(tmpchi0_wGG)
            self.redistribute(tmpchi0_wGG, chi0_wGG)

            if optical_limit:
                PWSA.symmetrize_wxvG(chi0_wxvG)
                PWSA.symmetrize_wvv(chi0_wvv)
                # Since chi_wGG is nonanalytic in the head
                # and wings we have to take care that
                # these are handled correctly. Note that
                # it is important that the wings are overwritten first.
                chi0_wGG[:, :, 0] = chi0_wxvG[:, 1, 0, self.Ga:self.Gb]
                if self.blockcomm.rank == 0:
                    chi0_wGG[:, 0] = chi0_wxvG[:, 0, 0]
                    chi0_wGG[:, 0, 0] = chi0_wvv[:, 0, 0]
                    
        return pd, chi0_wGG, chi0_wxvG, chi0_wvv

Example #21

0

Show file

    def calculate(self):
        calc = self.calc
        focc_S = self.focc_S
        e_S = self.e_S
        op_scc = calc.wfs.kd.symmetry.op_scc

        # Get phi_qaGp
        if self.mode == 'RPA':
            self.phi_aGp = self.get_phi_aGp()
        else:
            fd = opencew('phi_qaGp')
            if fd is None:
                self.reader = Reader('phi_qaGp')
                tmp = self.load_phi_aGp(self.reader, 0)[0]
                assert len(tmp) == self.npw
                self.printtxt('Finished reading phi_aGp')
            else:
                self.printtxt('Calculating phi_qaGp')
                self.get_phi_qaGp()
                world.barrier()
                self.reader = Reader('phi_qaGp')
            self.printtxt('Memory used %f M' % (maxrss() / 1024.**2))
            self.printtxt('')

        if self.optical_limit:
            iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0]
        else:
            iq = np.where(
                np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0]
        kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)])
        if self.optical_limit:
            kc_G[0] = 0.

        # Get screened Coulomb kernel
        if self.mode == 'BSE':
            try:
                # Read
                data = pickle.load(open(self.kernel_file + '.pckl'))
                W_qGG = data['W_qGG']
                assert np.shape(W_qGG) == np.shape(self.V_qGG)
                self.printtxt('Finished reading screening interaction kernel')
            except:
                # Calculate from scratch
                self.printtxt('Calculating screening interaction kernel.')
                W_qGG = self.full_static_screened_interaction()
            self.printtxt('')
        else:
            W_qGG = self.V_qGG

        t0 = time()
        self.printtxt('Calculating %s matrix elements' % self.mode)

        # Calculate full kernel
        K_SS = np.zeros((self.nS_local, self.nS), dtype=complex)
        self.rhoG0_S = np.zeros(self.nS, dtype=complex)

        #noGmap = 0
        for iS in range(self.nS_start, self.nS_end):
            k1, n1, m1 = self.Sindex_S3[iS]
            rho1_G = self.density_matrix(n1, m1, k1)
            self.rhoG0_S[iS] = rho1_G[0]
            for jS in range(self.nS):
                k2, n2, m2 = self.Sindex_S3[jS]
                rho2_G = self.density_matrix(n2, m2, k2)
                K_SS[iS - self.nS_start,
                     jS] = np.sum(rho1_G.conj() * rho2_G * kc_G)

                if not self.mode == 'RPA':
                    rho3_G = self.density_matrix(n1, n2, k1, k2)
                    rho4_G = self.density_matrix(m1, m2, self.kq_k[k1],
                                                 self.kq_k[k2])

                    q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1]
                    q_c[np.where(q_c > 0.501)] -= 1.
                    q_c[np.where(q_c < -0.499)] += 1.
                    iq = self.kd.where_is_q(q_c, self.bzq_qc)

                    if not self.qsymm:
                        W_GG = W_qGG[iq]
                    else:
                        ibzq = self.ibzq_q[iq]
                        W_GG_tmp = W_qGG[ibzq]

                        iop = self.iop_q[iq]
                        timerev = self.timerev_q[iq]
                        diff_c = self.diff_qc[iq]
                        invop = np.linalg.inv(op_scc[iop])
                        Gindex = np.zeros(self.npw, dtype=int)
                        for iG in range(self.npw):
                            G_c = self.Gvec_Gc[iG]
                            if timerev:
                                RotG_c = -np.int8(
                                    np.dot(invop, G_c + diff_c).round())
                            else:
                                RotG_c = np.int8(
                                    np.dot(invop, G_c + diff_c).round())
                            tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1)
                            try:
                                Gindex[iG] = np.where(tmp_G < 1e-5)[0][0]
                            except:
                                #noGmap += 1
                                Gindex[iG] = -1

                        W_GG = np.zeros_like(W_GG_tmp)
                        for iG in range(self.npw):
                            for jG in range(self.npw):
                                if Gindex[iG] == -1 or Gindex[jG] == -1:
                                    W_GG[iG, jG] = 0
                                else:
                                    W_GG[iG, jG] = W_GG_tmp[Gindex[iG],
                                                            Gindex[jG]]

                    if self.mode == 'BSE':
                        tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG
                        K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_GG)
                    else:
                        tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG)
                        K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_G)
            self.timing(iS, t0, self.nS_local, 'pair orbital')

        K_SS /= self.vol

        world.sum(self.rhoG0_S)
        #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap)

        # Get and solve Hamiltonian
        H_sS = np.zeros_like(K_SS)
        for iS in range(self.nS_start, self.nS_end):
            H_sS[iS - self.nS_start, iS] = e_S[iS]
            for jS in range(self.nS):
                H_sS[iS - self.nS_start,
                     jS] += focc_S[iS] * K_SS[iS - self.nS_start, jS]

        # Force matrix to be Hermitian
        if not self.coupling:
            if world.size > 1:
                H_Ss = self.redistribute_H(H_sS)
            else:
                H_Ss = H_sS
            H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * (
                np.imag(H_sS) - np.imag(H_Ss.T)) / 2.

        # Save H_sS matrix
        self.par_save('H_SS', 'H_SS', H_sS)

        return H_sS

Example #22

0

Show file

File: chi0.py Project: ryancoleman/lotsofcoresbook2code

    def print_chi(self, pd):
        calc = self.calc
        gd = calc.wfs.gd

        if extra_parameters.get('df_dry_run'):
            from gpaw.mpi import DryRunCommunicator
            size = extra_parameters['df_dry_run']
            world = DryRunCommunicator(size)
        else:
            world = self.world

        print('%s' % ctime(), file=self.fd)
        print('Called response.chi0.calculate with', file=self.fd)

        q_c = pd.kd.bzk_kc[0]
        print('    q_c: [%f, %f, %f]' % (q_c[0], q_c[1], q_c[2]), file=self.fd)

        nw = len(self.omega_w)
        print('    Number of frequency points: %d' % nw, file=self.fd)

        ecut = self.ecut * Hartree
        print('    Planewave cutoff: %f' % ecut, file=self.fd)

        ns = calc.wfs.nspins
        print('    Number of spins: %d' % ns, file=self.fd)

        nbands = self.nbands
        print('    Number of bands: %d' % nbands, file=self.fd)

        nk = calc.wfs.kd.nbzkpts
        print('    Number of kpoints: %d' % nk, file=self.fd)

        nik = calc.wfs.kd.nibzkpts
        print('    Number of irredicible kpoints: %d' % nik, file=self.fd)
        
        ngmax = pd.ngmax
        print('    Number of planewaves: %d' % ngmax, file=self.fd)

        eta = self.eta * Hartree
        print('    Broadening (eta): %f' % eta, file=self.fd)
        
        wsize = world.size
        print('    world.size: %d' % wsize, file=self.fd)

        knsize = self.kncomm.size
        print('    kncomm.size: %d' % knsize, file=self.fd)

        bsize = self.blockcomm.size
        print('    blockcomm.size: %d' % bsize, file=self.fd)
        
        nocc = self.nocc1
        print('    Number of completely occupied states: %d'
              % nocc, file=self.fd)
        
        npocc = self.nocc2
        print('    Number of partially occupied states: %d'
              % npocc, file=self.fd)

        keep = self.keep_occupied_states
        print('    Keep occupied states: %s' % keep, file=self.fd)

        print('', file=self.fd)
        print('    Memory estimate of potentially large arrays:', file=self.fd)

        chisize = nw * pd.ngmax**2 * 16. / 1024**2
        print('        chi0_wGG: %f M / cpu' % chisize, file=self.fd)

        ngridpoints = gd.N_c[0] * gd.N_c[1] * gd.N_c[2]

        if self.keep_occupied_states:
            nstat = (ns * nk * npocc + world.size - 1) // world.size
        else:
            nstat = (ns * npocc + world.size - 1) // world.size

        occsize = nstat * ngridpoints * 16. / 1024**2
        print('        Occupied states: %f M / cpu' % occsize,
              file=self.fd)

        print('        Memory usage before allocation: %f M / cpu'
              % (maxrss() / 1024**2), file=self.fd)

        print('', file=self.fd)

Example #23

0

Show file

File: bse.py Project: robwarm/gpaw-symm

    def calculate(self):
        calc = self.calc
        f_skn = self.f_skn
        e_skn = self.e_skn
        kq_k = self.kq_k
        focc_S = self.focc_S
        e_S = self.e_S
        op_scc = calc.wfs.symmetry.op_scc

        # Get phi_qaGp
        if self.mode == 'RPA':
            self.phi_aGp = self.get_phi_aGp()
        else:
            try:
                self.reader = Reader('phi_qaGp')
                tmp = self.load_phi_aGp(self.reader, 0)[0]
                assert len(tmp) == self.npw
                self.printtxt('Finished reading phi_aGp')
            except:
                self.printtxt('Calculating phi_qaGp')
                self.get_phi_qaGp()
                world.barrier()
                self.reader = Reader('phi_qaGp')                
            self.printtxt('Memory used %f M' % (maxrss() / 1024.**2))
            self.printtxt('')

        if self.optical_limit:
            iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0]
        else:
            iq = np.where(np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0]
        kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)])
        if self.optical_limit:
            kc_G[0] = 0.

        # Get screened Coulomb kernel
        if self.mode == 'BSE':
            try:
                # Read
                data = pickle.load(open(self.kernel_file+'.pckl'))
                W_qGG = data['W_qGG']
                assert np.shape(W_qGG) == np.shape(self.V_qGG)
                self.printtxt('Finished reading screening interaction kernel')
            except:
                # Calculate from scratch
                self.printtxt('Calculating screening interaction kernel.')
                W_qGG = self.full_static_screened_interaction()
            self.printtxt('')
        else:
            W_qGG = self.V_qGG
 
        t0 = time()
        self.printtxt('Calculating %s matrix elements' % self.mode)

        # Calculate full kernel
        K_SS = np.zeros((self.nS_local, self.nS), dtype=complex)
        self.rhoG0_S = np.zeros(self.nS, dtype=complex)

        #noGmap = 0
        for iS in range(self.nS_start, self.nS_end):
            k1, n1, m1 = self.Sindex_S3[iS]
            rho1_G = self.density_matrix(n1,m1,k1)
            self.rhoG0_S[iS] = rho1_G[0]
            for jS in range(self.nS):
                k2, n2, m2 = self.Sindex_S3[jS]
                rho2_G = self.density_matrix(n2,m2,k2)
                K_SS[iS-self.nS_start, jS] = np.sum(rho1_G.conj() * rho2_G * kc_G)

                if not self.mode == 'RPA':
                    rho3_G = self.density_matrix(n1,n2,k1,k2)
                    rho4_G = self.density_matrix(m1,m2,self.kq_k[k1],
                                                 self.kq_k[k2])

                    q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1]
                    q_c[np.where(q_c > 0.501)] -= 1.
                    q_c[np.where(q_c < -0.499)] += 1.
                    iq = self.kd.where_is_q(q_c, self.bzq_qc)
                    
                    if not self.qsymm:    
                        W_GG = W_qGG[iq]
                    else:
                        ibzq = self.ibzq_q[iq]
                        W_GG_tmp = W_qGG[ibzq]

                        iop = self.iop_q[iq]
                        timerev = self.timerev_q[iq]
                        diff_c = self.diff_qc[iq]
                        invop = np.linalg.inv(op_scc[iop])
                        Gindex = np.zeros(self.npw, dtype=int)
                        for iG in range(self.npw):
                            G_c = self.Gvec_Gc[iG]
                            if timerev:
                                RotG_c = -np.int8(np.dot(invop, G_c+diff_c).round())
                            else:
                                RotG_c = np.int8(np.dot(invop, G_c+diff_c).round())
                            tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1)
                            try:
                                Gindex[iG] = np.where(tmp_G < 1e-5)[0][0]
                            except:
                                #noGmap += 1
                                Gindex[iG] = -1
    
                        W_GG = np.zeros_like(W_GG_tmp)
                        for iG in range(self.npw):
                            for jG in range(self.npw):
                                if Gindex[iG] == -1 or Gindex[jG] == -1:
                                    W_GG[iG, jG] = 0
                                else:
                                    W_GG[iG, jG] = W_GG_tmp[Gindex[iG], Gindex[jG]]
                                    
                    if self.mode == 'BSE':
                        tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG
                        K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_GG)
                    else:
                        tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG)
                        K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_G)
            self.timing(iS, t0, self.nS_local, 'pair orbital') 
 
        K_SS /= self.vol

        world.sum(self.rhoG0_S)
        #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap)

        # Get and solve Hamiltonian
        H_sS = np.zeros_like(K_SS)
        for iS in range(self.nS_start, self.nS_end):
            H_sS[iS-self.nS_start,iS] = e_S[iS]
            for jS in range(self.nS):
                H_sS[iS-self.nS_start,jS] += focc_S[iS] * K_SS[iS-self.nS_start,jS]
  
        # Force matrix to be Hermitian
        if not self.coupling:
            if world.size > 1:
                H_Ss = self.redistribute_H(H_sS)
            else:
                H_Ss = H_sS
            H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * (np.imag(H_sS) - np.imag(H_Ss.T)) /2.

        # Save H_sS matrix
        self.par_save('H_SS','H_SS', H_sS)

        return H_sS

Example #24

0

Show file

File: chi0.py Project: ryancoleman/lotsofcoresbook2code

    def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii,
                   m1, m2, spins):
        wfs = self.calc.wfs

        if self.keep_occupied_states:
            self.mykpts = [self.get_k_point(s, K, n1, n2)
                           for s, K, n1, n2 in self.mysKn1n2]

        if self.eta == 0.0:
            update = self.update_hermitian
        elif self.hilbert:
            update = self.update_hilbert
        else:
            update = self.update

        q_c = pd.kd.bzk_kc[0]
        optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0)

        pb = ProgressBar(self.fd)

        self.timer.start('Loop')
        # kpt1 occupied and kpt2 empty:
        for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2):
            pb.update(kn / len(self.mysKn1n2))
            if self.keep_occupied_states:
                kpt1 = self.mykpts[kn]
            else:
                kpt1 = self.get_k_point(s, K, n1, n2)

            if kpt1.s not in spins:
                continue

            with self.timer('k+q'):
                K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0]
            with self.timer('get k2'):
                kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True)
            with self.timer('fft-indices'):
                Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd,
                                           kpt1.shift_c - kpt2.shift_c)

            for n in range(kpt1.n2 - kpt1.n1):
                eps1 = kpt1.eps_n[n]
                
                # Only update if there exists deps <= omegamax
                if self.omegamax is not None:
                    m = [m for m, d in enumerate(eps1 - kpt2.eps_n)
                         if abs(d) <= self.omegamax]
                else:
                    m = range(0, kpt2.n2 - kpt2.n1)
                
                if not len(m):
                    continue

                deps_m = (eps1 - kpt2.eps_n)[m]
                f1 = kpt1.f_n[n]
                with self.timer('conj'):
                    ut1cc_R = kpt1.ut_nR[n].conj()
                with self.timer('paw'):
                    C1_aGi = [np.dot(Q_Gii, P1_ni[n].conj())
                              for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani)]
                n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2,
                                                     pd, Q_G)[m]
                df_m = (f1 - kpt2.f_n)[m]

                # This is not quite right for degenerate partially occupied
                # bands, but good enough for now:
                df_m[df_m <= 1e-20] = 0.0

                if optical_limit:
                    self.update_optical_limit(
                        n, m, kpt1, kpt2, deps_m, df_m, n_mG,
                        chi0_wxvG, chi0_wvv)

                update(n_mG, deps_m, df_m, chi0_wGG)

            if optical_limit and self.intraband:
                # Avoid that more ranks are summing up
                # the intraband contributions
                if kpt1.n1 == 0 and self.blockcomm.rank == 0:
                    assert self.nocc2 <= kpt2.nb, \
                        print('Error: Too few unoccupied bands')
                    self.update_intraband(kpt2)

        self.timer.stop('Loop')

        pb.finish()

        with self.timer('Sum CHI_0'):
            for chi0_GG in chi0_wGG:
                self.kncomm.sum(chi0_GG)

            if optical_limit:
                self.kncomm.sum(chi0_wxvG)
                self.kncomm.sum(chi0_wvv)
                if self.intraband:
                    self.kncomm.sum(self.chi0_vv)

        print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2),
              file=self.fd)

        if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1:
            # Fill in upper/lower triangle also:
            nG = pd.ngmax
            il = np.tril_indices(nG, -1)
            iu = il[::-1]
            if self.hilbert:
                for chi0_GG in chi0_wGG:
                    chi0_GG[il] = chi0_GG[iu].conj()
            else:
                for chi0_GG in chi0_wGG:
                    chi0_GG[iu] = chi0_GG[il].conj()

        if self.hilbert:
            with self.timer('Hilbert transform'):
                ht = HilbertTransform(self.omega_w, self.eta,
                                      self.timeordered)
                ht(chi0_wGG)
                if optical_limit:
                    ht(chi0_wvv)
                    ht(chi0_wxvG)
            print('Hilbert transform done', file=self.fd)

        if optical_limit and self.intraband:  # Add intraband contribution
            omega_w = self.omega_w.copy()
            if omega_w[0] == 0.0:
                omega_w[0] = 1e-14

            chi0_vv = self.chi0_vv
            self.world.broadcast(chi0_vv, 0)

            chi0_wvv += (chi0_vv[np.newaxis] /
                         (omega_w[:, np.newaxis, np.newaxis] *
                          (omega_w[:, np.newaxis, np.newaxis] +
                           1j * self.eta)))

        return pd, chi0_wGG, chi0_wxvG, chi0_wvv

Example #25

0

Show file

File: chi0.py Project: Xu-Kai/lotsofcoresbook2code

    def _calculate(self, pd, chi0_wGG, chi0_wxvG, chi0_wvv, Q_aGii, m1, m2,
                   spins):
        wfs = self.calc.wfs

        if self.keep_occupied_states:
            self.mykpts = [
                self.get_k_point(s, K, n1, n2)
                for s, K, n1, n2 in self.mysKn1n2
            ]

        if self.eta == 0.0:
            update = self.update_hermitian
        elif self.hilbert:
            update = self.update_hilbert
        else:
            update = self.update

        q_c = pd.kd.bzk_kc[0]
        optical_limit = not self.no_optical_limit and np.allclose(q_c, 0.0)

        pb = ProgressBar(self.fd)

        self.timer.start('Loop')
        # kpt1 occupied and kpt2 empty:
        for kn, (s, K, n1, n2) in enumerate(self.mysKn1n2):
            pb.update(kn / len(self.mysKn1n2))
            if self.keep_occupied_states:
                kpt1 = self.mykpts[kn]
            else:
                kpt1 = self.get_k_point(s, K, n1, n2)

            if kpt1.s not in spins:
                continue

            with self.timer('k+q'):
                K2 = wfs.kd.find_k_plus_q(q_c, [kpt1.K])[0]
            with self.timer('get k2'):
                kpt2 = self.get_k_point(kpt1.s, K2, m1, m2, block=True)
            with self.timer('fft-indices'):
                Q_G = self.get_fft_indices(kpt1.K, kpt2.K, q_c, pd,
                                           kpt1.shift_c - kpt2.shift_c)

            for n in range(kpt1.n2 - kpt1.n1):
                eps1 = kpt1.eps_n[n]

                # Only update if there exists deps <= omegamax
                if self.omegamax is not None:
                    m = [
                        m for m, d in enumerate(eps1 - kpt2.eps_n)
                        if abs(d) <= self.omegamax
                    ]
                else:
                    m = range(0, kpt2.n2 - kpt2.n1)

                if not len(m):
                    continue

                deps_m = (eps1 - kpt2.eps_n)[m]
                f1 = kpt1.f_n[n]
                with self.timer('conj'):
                    ut1cc_R = kpt1.ut_nR[n].conj()
                with self.timer('paw'):
                    C1_aGi = [
                        np.dot(Q_Gii, P1_ni[n].conj())
                        for Q_Gii, P1_ni in zip(Q_aGii, kpt1.P_ani)
                    ]
                n_mG = self.calculate_pair_densities(ut1cc_R, C1_aGi, kpt2, pd,
                                                     Q_G)[m]
                df_m = (f1 - kpt2.f_n)[m]

                # This is not quite right for degenerate partially occupied
                # bands, but good enough for now:
                df_m[df_m <= 1e-20] = 0.0

                if optical_limit:
                    self.update_optical_limit(n, m, kpt1, kpt2, deps_m, df_m,
                                              n_mG, chi0_wxvG, chi0_wvv)

                update(n_mG, deps_m, df_m, chi0_wGG)

            if optical_limit and self.intraband:
                # Avoid that more ranks are summing up
                # the intraband contributions
                if kpt1.n1 == 0 and self.blockcomm.rank == 0:
                    assert self.nocc2 <= kpt2.nb, \
                        print('Error: Too few unoccupied bands')
                    self.update_intraband(kpt2)

        self.timer.stop('Loop')

        pb.finish()

        with self.timer('Sum CHI_0'):
            for chi0_GG in chi0_wGG:
                self.kncomm.sum(chi0_GG)

            if optical_limit:
                self.kncomm.sum(chi0_wxvG)
                self.kncomm.sum(chi0_wvv)
                if self.intraband:
                    self.kncomm.sum(self.chi0_vv)

        print('Memory used: {0:.3f} MB / CPU'.format(maxrss() / 1024**2),
              file=self.fd)

        if (self.eta == 0.0 or self.hilbert) and self.blockcomm.size == 1:
            # Fill in upper/lower triangle also:
            nG = pd.ngmax
            il = np.tril_indices(nG, -1)
            iu = il[::-1]
            if self.hilbert:
                for chi0_GG in chi0_wGG:
                    chi0_GG[il] = chi0_GG[iu].conj()
            else:
                for chi0_GG in chi0_wGG:
                    chi0_GG[iu] = chi0_GG[il].conj()

        if self.hilbert:
            with self.timer('Hilbert transform'):
                ht = HilbertTransform(self.omega_w, self.eta, self.timeordered)
                ht(chi0_wGG)
                if optical_limit:
                    ht(chi0_wvv)
                    ht(chi0_wxvG)
            print('Hilbert transform done', file=self.fd)

        if optical_limit and self.intraband:  # Add intraband contribution
            omega_w = self.omega_w.copy()
            if omega_w[0] == 0.0:
                omega_w[0] = 1e-14

            chi0_vv = self.chi0_vv
            self.world.broadcast(chi0_vv, 0)

            chi0_wvv += (
                chi0_vv[np.newaxis] /
                (omega_w[:, np.newaxis, np.newaxis] *
                 (omega_w[:, np.newaxis, np.newaxis] + 1j * self.eta)))

        return pd, chi0_wGG, chi0_wxvG, chi0_wvv