Python regex Examples, wpylib.regexps.regex Python Examples

Example #1

0

Show file

def get_gafqmc_run_snapshot(rundirs):
    """Fetches the GAFQMC run snapshot (the INFO files) in bulk.
  Puts them all in temporary subdirectory for subsequent examination.
  This tool is used for runs started by the run-gafqmc.sh script.
  Example usage for Ca+4H2 system:

      >>> get_qmc_run_snapshot(glob.glob("part*/rundir"))

  where each rundir is a softlink to local scratch where the QMC run
  output is stored temporarily.
  """
    from wpylib.regexps import regex
    destbasedir = tempfile.mkdtemp(dir=os.getcwd())
    print "Snapshot data is located in subdir: ", destbasedir
    # This is the standard location for rundir on local scratch as
    # defined by run-gafqmc.sh:
    rundir_rx = regex(r'/state/partition1/' + os.environ['USER'] +
                      '/([0-9]+)\.([-_a-zA-Z0-9]+)\.run')
    if isinstance(rundirs, basestring):
        rundirs = [rundirs]
    for (i, r) in enumerate(rundirs):
        destdir = os.path.join(destbasedir, "part%04d" % i)
        os.mkdir(destdir)
        rlink = os.path.realpath(r)
        if not (rundir_rx % rlink):
            raise ValueError, "Unparseable dirname: %s" % rlink
        jobid = rundir_rx[1]
        host = rundir_rx[2]
        print "Fetching: %s -> %s" % (r, rlink)
        sh.run('scp',
               ('-p', '%s:%s/INFO' % (host, rlink), '%s/INFO' % destdir))

Example #2

0

Show file

File: output.py Project: wirawan0/pyqmc

    def process_section_scf_(self):
        """**Internal routine**

    Process the output of 'scf' module.
    """
        Rx = self.rx_
        txtfile = self.txt_

        search_patterns = [
            (regex(r'^\s*ao basis *= *"([^"]+)"'), 'ao_basis', str),
            (regex(r'^\s*functions\s*=\s*([0-9]+)\s*$'), 'nbasis', int),
            (regex(r'^\s*atoms\s*=\s*([0-9]+)\s*$'), 'natoms', int),
            (regex(r'^\s*closed shells\s*=\s*([0-9]+)\s*$'), 'nclosed_shells',
             int),
            (regex(r'^\s*open shells\s*=\s*([0-9]+)\s*$'), 'nopen_shells',
             int),
            (regex(r'^\s*alpha\s+electrons\s*=\s*([0-9]+)\s*$'), 'nalpha_elec',
             int),
            (regex(r'^\s*beta\s+electrons\s*=\s*([0-9]+)\s*$'), 'nbeta_elec',
             int),
            (regex(r'^\s*charge\s*=\s*([0-9]+)\s*$'), 'charge', int),
            (regex(r'^\s*wavefunction\s*=\s*([0-9]+)\s*$'), 'scf_type', int),
        ]

        # Look for job title, if any
        self.skip_blank_lines_()
        L = txtfile.next()
        if not (Rx.scf_notitle % L):
            self['title_scf'] = L.strip()
            self.skip_blank_lines_()
        else:
            txtfile.file.push(L)

        for L in txtfile:
            flds = L.split()
            if len(flds) == 0:
                break
            else:
                for (pat, act, arg1) in search_patterns:
                    if pat % L:
                        if isinstance(act, basestring):
                            self[act] = arg1(pat[1])
                            break

        if 'nalpha_elec' not in self:
            # ROHF/RHF
            self['nalpha_elec'] = self.nclosed_shells + self.nopen_shells
            self['nbeta_elec'] = self.nclosed_shells
        else:
            # UHF
            pass

Example #3

0

Show file

def fetch_gafqmc_run_output(rundirs, force_update=False, save_walkers=False):
    """Fetches the GAFQMC run snapshot (INFO files and some additional files)
  in bulk.
  Putting them the corresponding result subdirectory for archival.
  Usually this tool is used for runs that fail to finish in time.
  Example usage for Ca+4H2 system:

      >>> fetch_qmc_run_output(glob.glob("part*/rundir"))

  where each rundir is a softlink to the local scratch where the QMC run
  output is stored temporarily.
  """
    from wpylib.regexps import regex
    #destbasedir = tempfile.mkdtemp(dir=os.getcwd())
    #print "Snapshot data is located in subdir: ", destbasedir
    rundir_rx = regex(r'/state/partition1/' + getusername() +
                      r'/([0-9]+)\.([-_a-zA-Z0-9]+)\.run')
    if isinstance(rundirs, basestring):
        rundirs = [rundirs]
    files_to_fetch = [
        'INFO',
        'stdout',
        'fort.17',
        'gafqmc-00000.stat',
        'fort.15',
        '*.in',
    ]
    if save_walkers:
        files_to_fetch += ['W_old_run_000']
    xfiles_to_fetch = ",".join(files_to_fetch)
    for (i, r) in enumerate(rundirs):
        destdir = os.path.dirname(r)
        if destdir == '': destdir = '.'
        destdir_abs = os.path.realpath(destdir)
        rlink = os.path.realpath(r)
        if not (rundir_rx % rlink):
            raise ValueError, "Unparseable dirname: %s" % rlink
        jobid = rundir_rx[1]
        host = rundir_rx[2]
        if not force_update:
            # Check first if the INFO file have the same timestamp:
            old_INFO = _file_search(destdir_abs, ['INFO', 'INFO.lzma'])
            if old_INFO:
                new_INFO = os.path.join(rlink, 'INFO')
                if cnode_shell(host, 'test %s -nt %s' %
                               (new_INFO, old_INFO)) != 0:
                    print "Skipping: %s -> %s (result already up-to-date)" % (
                        r, rlink)
                    continue
        print "Fetching: %s -> %s" % (r, rlink)
        # the rsync flags follow those in run-gafqmc.sh
        sh.run('rsync', ('-ptvb', '%s:%s/{%s}' %
                         (host, rlink, xfiles_to_fetch), '%s/' % destdir))

Example #4

0

Show file

File: output.py Project: wirawan0/pyqmc

    def process_section_mcscf_(self):
        """**Internal routine**

    Process the output of 'mcscf' module.
    """
        Rx = self.rx_
        txtfile = self.txt_

        search_patterns = [
            (regex(r'^\s*Basis functions *: *([0-9]+)'), 'nbasis_mcscf', int),
            (regex(r'^\s*Inactive shells\s*:\s*([0-9]+)\s*$'), 'nfc', int),
            (regex(r'^\s*Active shells\s*:\s*([0-9]+)\s*$'), 'nact_orb', int),
            (regex(r'^\s*Active electrons\s*:\s*([0-9]+)\s*$'), 'nact_elec',
             int),
            (regex(r'^\s*Symmetry\s*:\s*([A-Za-z0-9]+)\s*$'), 'symmetry_mcscf',
             str),
            (regex(r'^\s*Multiplicity\s*:\s*([0-9]+)\s*$'), 'mult_mcscf', int),
        ]

        # Look for job title, if any
        self.skip_blank_lines_()
        L = txtfile.next()
        if not (Rx.underline % L):
            self['title_scf'] = L.strip()
            self.skip_blank_lines_()
        else:
            txtfile.file.push(L)

        for L in txtfile:
            flds = L.split()
            if len(flds) == 0:
                break
            else:
                for (pat, act, arg1) in search_patterns:
                    if pat % L:
                        if isinstance(act, basestring):
                            self[act] = arg1(pat[1])
                            break

        if 'nalpha_elec' not in self:
            self['nelec'] = 2 * self.nfc + self.nact_elec
            two_nbeta_elec = (self.nelec - self.mult_mcscf + 1)
            if two_nbeta_elec % 2 != 0:
                raise PyqmcDataError, \
                  "Invalid combination of multiplicity and num of active electrons?"
            self['nbeta_elec'] = two_nbeta_elec // 2
            self['nalpha_elec'] = self.nbeta_elec + self.mult_mcscf - 1

Example #5

0

Show file

File: pwqmc_info.py Project: wirawan0/pyqmc

    def parse_INFO(self, INFO):
        '''Gets all the necessary info (calculation parameters) from the INFO file.
    This is a very old routine.
    We use this as temporary starting point.'''
        # FIXME: comment_char is temporarily set to ASCII 0, which should be
        # an invalid character in this output file.
        info_file = text_input(INFO, comment_char='\0', skip_blank_lines=False)
        self.clear()
        rslt = self
        rslt['info_file'] = INFO
        rslt['info_mtime'] = time.localtime(os.stat(INFO).st_mtime)
        rx_runtype = regex(r'^\s*runtype\s*=\s*([0-9]+)')
        for L in info_file:
            Ls = L.strip()
            ls = Ls.lower()
            flds = Ls.split()
            if len(flds) == 0:
                continue
            elif Ls.startswith("# of particles:"):
                u = int(flds[3])
                d = int(flds[4])
                if u < d:
                    sys.stderr.write(
                        "pwqmc_info.parse_INFO:Warning: nup < ndn in info file `%s'; autofixing this mistake!\n"
                        % (INFO))
                    t = u
                    u = d
                    d = t
                rslt['nup'] = u
                rslt['ndn'] = d
                rslt['nelec_up'] = u
                rslt['nelec_dn'] = d
            elif ls.startswith("majority and minority det are coupled"):
                rslt['udet'] = False
            elif ls.startswith("majority and minority det are independent"):
                rslt['udet'] = True
            elif flds[0] == "Nbasis":
                rslt['nbasis'] = int(flds[2])
            elif ls.startswith("input fft dimension ll ="):
                rslt['LL'] = (int(flds[5]), int(flds[6]), int(flds[7]))
            elif ls.startswith("trial wf from input: "):
                rslt['trial_wfn_file'] = Ls[20:].strip()
            elif flds[0] == "Subtotal":
                rslt["Evar_noconst"] = float(flds[2])
            elif flds[0] == "Variational" and flds[1] == "energy":
                rslt["Evar"] = float(flds[3])
                rslt["H0"] = rslt["Evar"] - rslt["Evar_noconst"]
            elif flds[0] == "deltau,":
                rslt["deltau"] = float(flds[3])
            elif flds[0] == "beta=":
                rslt["betablk"] = float(flds[1])
            elif Ls.startswith("Using reduced k-pts:"):
                kx = float(flds[3])
                ky = float(flds[4])
                if len(flds) > 5:
                    kz = float(flds[5])
                else:
                    kz = float(info_file.next().split()[0])
                rslt["kpt"] = (kx, ky, kz)
            elif Ls.startswith("Input Etrial="):
                rslt["Etrial_noconst"] = float(flds[2])  # no H0 yet
                #print Ls
            elif Ls.startswith("New etrial to be used in El_bound:"):
                rslt["Etrial_noconst"] = float(flds[7])  # no H0 yet
                #print Ls
            elif Ls.startswith("read in new anorm + etrial:"):
                rslt["anorm"] = float(flds[6])
                if len(flds) > 7:
                    rslt["Etrial_noconst"] = float(flds[7])
                else:
                    rslt["Etrial_noconst"] = float(info_file.next().split()[0])
                #print Ls
            elif Ls.startswith("itv_Em="):
                rslt["itv_Em"] = int(flds[1])
            elif Ls.startswith("itv_pc="):
                rslt["itv_pc"] = int(flds[1])
            elif Ls.startswith("itv_pc_eq="):
                rslt["itv_pc_eq"] = int(flds[1])
            elif Ls.startswith("nblk="):
                rslt["nblk"] = int(flds[1])
            elif Ls.startswith("neq="):
                rslt["neq"] = int(flds[1])
            elif Ls.startswith("ngrth="):
                rslt["ngrth"] = int(flds[1])
            elif Ls.startswith("trial E fixed at input value"):
                rslt["ngrth"] = 0
            elif Ls.startswith("nblkstep="):
                rslt["nblkstep"] = int(flds[1])
            elif Ls.startswith("nwlk="):
                rslt["nwlk"] = int(flds[1])
            elif Ls.startswith("nwlkmax="):
                rslt["nwlkmax"] = int(flds[1])
            elif Ls.startswith("nwlkmin="):
                rslt["nwlkmin"] = int(flds[1])
            elif rx_runtype % Ls:
                runtype = int(rx_runtype[1])
                rslt["runtype"] = runtype
                runtype_rec = self.runtype_map[runtype]
                rslt["constraint"], rslt["projector"] \
                  = runtype_rec[:2]

            # ---crystal and atom parameters, etc---
            # Wherever possible, we will use abinit-style keywords
            # to make this consistent with abinit.
            elif Ls.startswith("Volume of the unit cell ="):
                rslt["vol"] = float(flds[6])  # in bohr**3
            elif Ls.startswith("nspec_in ="):
                rslt["ntypat"] = int(flds[2])  # number of atomic species
                rslt["has_pseudo"] = (rslt["ntypat"] > 0)

            # ---runtime info below---
            elif Ls.startswith("OpenMP support enabled using"):
                rslt["num_threads"] = int(flds[4])
            elif Ls.startswith("parallel version using"):
                rslt["num_tasks"] = int(flds[3])
            elif Ls.startswith("hostname:"):
                rslt["run_host"] = flds[1]
            elif Ls.startswith("Program started on"):
                rslt["start_time"] = \
                  time.strptime(flds[3] + " " + flds[5][:6], "%Y%m%d %H%M%S")
            elif Ls.startswith(
                    "Planewave-AFQMC calculation for system: General electronic system"
            ):
                rslt["code_name"] = "pwqmc"

            # measurement and other complex data capture
            elif Ls.startswith("Measurement phase..."):
                self.parse_measurement0(info_file, rslt)

        info_file.close()
        rslt.setdefault("nwlkmax", rslt.nwlk * 2)
        rslt.setdefault("nwlkmin", max(rslt.nwlk / 2, 1))
        # fall back to original defaults:
        rslt.setdefault("runtype", 0)
        rslt.setdefault("constraint", "phaseless cosine")
        rslt.setdefault("projector", "Elocal")
        rslt["run_mpi"] = ("num_tasks" in rslt)
        rslt["run_openmp"] = ("num_threads" in rslt)
        return rslt

Example #6

0

Show file

File: datfile.py Project: wirawan0/pyqmc

    def read(self,
             infile,
             vec_select=1,
             verbose=0,
             nbasis=None,
             out=sys.stdout):
        """Reads off molecular orbital vectors.
    Usage:
      movecs = pyqmc.gamess.datfile.movecs(fname, [options])
    Valid options:
      vec_select = <integer>  (default: 1; 1-based choice)
      verbose    = 0|1        (default: 0)
      nbasis     = <integer>  (default: autodetected)
   
    This routine was translated from Gamess::ReadGamessMOVecs routine
    in my Gamess.pm perl module.
    The latter routine was derived from C2_UHF_gamess.pl dated ~20070813.
   
    CAUTION:
    The resulting orbital (orbitals_alpha, orbitals_beta) arrays are 1-based,
    both in the orbital index and the basis index.

    Strict vector ordering (1, 2, 3, ..., N) is required in the $VEC data.
    We will check orbital indices strictly.
    This requires the orbitals be strictly ordered, with no skipping, etc.
    Strict checking is necessary for proper reading when we have more than 99
    basis functions.
   
    In anticipating for large basis size, the rule for deducing UHF-type
    movecs is more complicated. For nbasis >= 100, the tag number rolls back
    to zero, unfortunately, which makes thing a bit difficult: when we see a
    tag of " 1" again, is it UHF beta sector, or movec #101?
    One way we can be assured that it IS an UHF movecs is prohibiting the size
    of movecs to be greater than the deduced nbasis, which is a reasonable
    restriction. Then, when we apparently encounter movec "101" when nbasis==100,
    we can be sure that the 101st vector is actually beta movec #1.
    Thus UHF movecs can be detected by the following rule:
    - ( old $VecTag != 0, or old $VecIndex == $nbasis already ) AND
      new $VecTag == 1 .
   
    FIXME:
    The solution above is still not foolproof in two cases:
   
    1) suppose we have an UHF-type movecs with nbasis=220,
    but each spin sector only has 100 orbitals listed.
    Then this will be interpreted as an RHF-type movecs with nbasis=220
    and norbitals=200.
    2) in spherical basis, maximum norbitals is <= nbasis.
    When this happens, then the deduced nbasis is not the right number of
    spherical basis functions (thus the nbasis deduced above is not right).
   
    But I haven't seen this case yet, so forget about them temporarily.
    """
        from wpylib.regexps import regex
        # MOVECS comments (always 3 lines preceding the $VEC block)
        rx_comment = regex(r'^--- ')
        rx_vec = regex(r'^(?i) \$vec')
        rx_endvec = regex(r'^(?i)  ?\$end')
        F = text_input(infile)
        comments = None
        found = False
        vec_blk_count = 0
        n_comment_lines = 0
        O = text_output(out, flush=True)
        spin = "alpha"
        udet = False
        AllVecs = {}
        for txt in F:
            if rx_comment % txt:
                comments = []
                n_comment_lines = 3
            if n_comment_lines > 0:
                comments.append(txt)
                n_comment_lines -= 1
            if rx_vec % txt:
                vec_blk_count += 1
                if vec_blk_count < vec_select:
                    # the comments we just read (if any) are irrelevant, so
                    # remove them.
                    comments = None
                    continue

                found = True
                # This is the actual movecs reading loop ---
                # The $END marker for initial orbital guess (PUNMO=.TRUE.) is
                # buggy--we must tolerate that
                txt = F.next()
                # VecIndex = MO index to identify the whole vector
                # VecTag = MO "tag" number
                # In general VecTag is equal to VecIndex except when we have >= 100
                # basis funcs (where VecTag has only the last two digits).
                # NOTE: VecTag is *always* a 2-character string!
                VecIndex = 1
                VecTag = " 1"
                AmplIndex = 0
                Ampl = []
                Vecs = [Ampl]
                while not (rx_endvec % txt):
                    NewVecTag = txt[0:2]
                    #print "H: $txt\n";
                    #print "V: $NewVecTag\n";

                    # We should safely assume that VecTag > 1 at the end
                    # of alpha orbitals; if that's not the case, that's
                    # YOUR fault (why doing 1-basis quantum chemistry?)
                    if NewVecTag != VecTag:
                        # Just in case, we are very pedantic in checking for errors here:
                        if nbasis != None:
                            # (1) nbasis must be consistent
                            if AmplIndex != nbasis:
                                raise PyqmcDataError, \
                                  ("%s:%d: Inconsistent nbasis " + \
                                   "(original guess was = %d, most recently deduced value = %d) " + \
                                   "for %s vector #%d") \
                                  % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex)
                        else:
                            # Deduce nbasis
                            nbasis = AmplIndex
                            if nbasis == 0:
                                raise PyqmcDataError, \
                                  ("%s:%d: nbasis detected as zero! " + \
                                   "Maybe there is corruption in the input file?") \
                                  % (infile, F.lineno)
                            if verbose > 0:
                                O("pyqmc.gamess.movecs.read:%s: Deduced nbasis = %d\n" \
                                  % (infile, nbasis))

                        # UHF-type vector detection scheme: see the notes above
                        if (VecTag != " 0"
                                or VecIndex == nbasis) and NewVecTag == " 1":
                            if verbose > 0:
                                O("pyqmc.gamess.movecs.read:%s: Found UHF-type movecs\n" \
                                  % (infile,))

                            if udet:
                                raise PyqmcDataError, \
                                  ("%s:%d: alpha and beta orbitals were already defined?! " + \
                                   "Maybe there is a mistake with your $VEC data?") \
                                  % (infile, F.lineno)

                            AllVecs[spin] = numpy.array(Vecs, dtype=float).T
                            # start all over with a new MO block
                            Ampl = []
                            Vecs = [Ampl]
                            spin = "beta"
                            udet = True
                            VecIndex = 0
                            # end UHF-type detection scheme
                        else:
                            # Some additional error checking(s)
                            if VecIndex >= nbasis:  # and nbasis != 100:
                                # NOTE: We disallow norbitals > nbasis in our reader.
                                raise PyqmcDataError, \
                                  ("%s:%d: The $VEC block has more than %d orbitals, " + \
                                   "which is prohibited by this routine") \
                                  % (infile, F.lineno, nbasis)

                            Ampl = []
                            Vecs.append(Ampl)

                        AmplIndex = 0
                        # Start off a new vector
                        VecIndex += 1
                        VecTag = NewVecTag
                    # end new vector/new spin sector detection

                    # Strict index vs. tag checking:
                    TagChk = "%2d" % (VecIndex % 100)
                    if TagChk != VecTag:
                        raise PyqmcDataError, \
                          ("%s:%d: Mismatch vector tag number in vector #%d " + \
                           "(wanted '%s', got '%s')") \
                          % (infile, F.lineno, VecIndex, TagChk, VecTag)

                    # the amplitudes are always stored in -n.nnnnnnnnE+nn fixed format
                    # (15 characters wide)
                    txtdata = txt[5:].rstrip()
                    lendata = len(txtdata) // 15

                    Ampl += [
                        float(txtdata[i * 15:i * 15 + 15])
                        for i in xrange(0, lendata)
                    ]
                    AmplIndex += lendata

                    # TODO: $i < 5 is allowed ONLY on the last line;
                    # Make sure we catch that.
                    #print $VecIndex, " ", $AmplIndex, "\n";

                    try:
                        txt = F.next()
                    except StopIteration:
                        raise PyqmcDataError, \
                          ("%s:%d: Unexpected EOF while reading in $VEC data") \
                          % (infile, F.lineno)
                # end loop for reading in $VEC data

                # Finalization: do final checks, etc.

                AllVecs[spin] = numpy.array(Vecs, dtype=float).T

                if AmplIndex != nbasis:
                    raise PyqmcDataError, \
                      ("%s:%d: Inconsistent nbasis " + \
                       "(original guess was = %d, most recently deduced value = %d) " + \
                       "for %s vector #%d") \
                      % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex)

                if udet:
                    if AllVecs['alpha'].shape != AllVecs['beta'].shape:
                        raise PyqmcDataError, \
                          ("%s:%d: Inconsistent shape of MO matrix: " + \
                           "(alpha = %s, beta = %s)") \
                          % (infile, F.lineno, \
                             AllVecs['alpha'].shape, \
                             AllVecs['beta'].shape, \
                            )
                if verbose > 0:
                    O("pyqmc.gamess.movecs.read:%s: Total MO vectors read = %s%s\n" \
                      % (infile, VecIndex, ifelse(udet, " (UHF-type)", "")))

                # stop reading if the desired vectors have been loaded
                break

        # end main text reading loop

        if not found:
            raise PyqmcDataError, \
              ("%s: Cannot find $VEC block number %s") \
              % (infile, vec_select)

        # Save the reading results to the "self" record:
        self.filename = infile
        self.vec_select = vec_select
        self.comments = comments
        self.udet = udet
        self.nbasis = nbasis
        for (spin, mo) in AllVecs.iteritems():
            setattr(self, spin, mo)
        return self

Example #7

0

Show file

File: output.py Project: wirawan0/pyqmc

 class rx_:
     # This sneaky info can be used to determine # of planewaves:
     kpt_npw = regex(
         r'P newkpt: treating\s+(?P<nband>[0-9]+) bands with npw=\s*(?P<npw>[0-9]+) for ikpt=\s*(?P<ikpt>[0-9]+)'
     )
     scf_begin = regex(
         r'^\s*iter\s+Etot\(hartree\)\s+deltaE\(h\)')  # marker of SCF block
     scf_line1 = regex(
         r'^\s*ETOT\s*[0-9\*]+\s+(?P<Etot>[-+eE0-9.]+)\s+(?P<Ediff>[-+eE0-9.]+)'
     )
     scf_convg1 = regex(
         r'^\s*At SCF step\s+(?P<numscf>[0-9]+)\s*,\s*etot is converged')
     E_begin = regex(r'^\s*Components of total free energy')
     E_end = regex(r'^\s*-+\s*$')
     E_parts_list = [
         (regex(r'^\s*Kinetic energy\s*=\s*([-+eE0-9.]+)'), 'E_kinetic',
          float),
         (regex(r'^\s*Hartree energy\s*=\s*([-+eE0-9.]+)'), 'E_hartree',
          float),
         (regex(r'^\s*Ewald energy\s*=\s*([-+eE0-9.]+)'), 'E_ewald', float),
         (regex(r'^\s*PspCore energy\s*=\s*([-+eE0-9.]+)'), 'E_pspcore',
          float),
         (regex(r'^\s*Loc\.?\s+psp\.?\s+energy\s*=\s*([-+eE0-9.]+)'),
          'E_psploc', float),
         (regex(r'^\s*NL\s+psp\.?\s+energy\s*=\s*([-+eE0-9.]+)'),
          'E_pspnonloc', float),
         (regex(r'^\s*>+ Internal E\s*=\s*([-+eE0-9.]+)'), 'E_internal',
          float),
         (regex(r'^\s*-kT\*entropy\s*=\s*([-+eE0-9.]+)'), 'E_smear', float),
         (regex(r'^\s*>+ Etotal\s*=\s*([-+eE0-9.]+)'), 'Etotal', float),
     ]
     eigen_begin = regex(
         r'^\s*Eigenvalues \(hartree\) for nkpt=\s*(?P<nkpt>[0-9]+)\s*k points(?:, SPIN (?P<spin>[A-Za-z]+))?:'
     )
     eigen_kpt1 = regex(
         r'^\s*kpt#\s*(?P<ikpt>[0-9]+),\s*nband=\s*(?P<nband>[0-9]+), wtk=\s*(?P<wtk>[.0-9]+), kpt=\s*(?P<kx>[-+.0-9]+)\s+(?P<ky>[-+.0-9]+)\s+(?P<kz>[-+.0-9]+)'
     )
     eigen_kpt_stop = regex(
         r'^\s*prteigrs\s*: .*do not print more k-points')
     densph_begin = regex(
         r'^\s*Atom\s+Sphere radius\s+Integrated_up_density\s+Integrated_dn_density\s+Total[^\s]+\s+Diff'
     )
     dataset_end = regex(
         r'^\s*==\s*(?:DATASET\s+[0-9]+|END DATASET\(S\))\s+=+')

Example #8

0

Show file

File: output.py Project: wirawan0/pyqmc

 class rx_:
     abinit_version = regex(
         r'^[. ]Version\s+(?P<version>[^\s]+)\s+of ABINIT')
     dataset_begin = regex(r'^\s*==\s*DATASET\s+(?P<dataset>[0-9]+)\s+=+')
     outvars_begin = regex(
         r'-outvars: echo values of preprocessed input variables')

Example #9

0

Show file

File: output.py Project: wirawan0/pyqmc

    def parse_file_(self, filename):
        """Extracts information from an nwchem output file (from its stdout).
    Right now, this parser is only good for single-point calculations
    (i.e. no multijob or geometry optimization at this point)."""

        Rx = self.rx_
        txtfile = text_input(filename,
                             skip_blank_lines=False,
                             comment_char='\0',
                             superize=True)

        self.clear()
        rslt = self
        self.txt_ = txtfile

        # This will also serve as an initial screening of the file
        try:
            L = txtfile.seek_text(Rx.nwchem_version.rx)
        except:
            raise PyqmcDataError, \
              "Cannot determine nwchem version in `%s'; perhaps it is not an nwchem output file" % filename
        rslt['info_code_version'] = (Rx.nwchem_version % L).group(1)
        rslt['info_mtime'] = time.localtime(os.path.getmtime(filename))

        search_patterns = [
            (regex(r'^\s*Total SCF energy *= *([-+eE0-9.]+)'), 'E_SCF', float),
            (regex(r'^\s*Total MCSCF energy *= *([-+eE0-9.]+)'), 'E_MCSCF',
             float),
            (regex(r'^\s*Total DFT energy *= *([-+eE0-9.]+)'), 'E_DFT', float),
            (regex(r'^\s*CCSD total energy */ *hartree *= *([-+eE0-9.]+)'),
             'E_CCSD', float),
            (regex(
                r'^\s*CCSD\(T\) total energy */ *hartree *= *([-+eE0-9.]+)'),
             'E_CCSD_T', float),
            (regex(r'^\s*Nuclear repulsion energy *= *([-+eE0-9.]+)'),
             'E_nuclear', float),
            (regex(r'^\s*Total MP2 energy\s+([-+eE0-9.]+)'), 'E_MP2',
             float),  # old MP2 module

            # The following are not that great.
            # We may want to use stricter scanner which takes into account the context
            # (which section) we are in
            #(regex(r'^\s*functions\s*=\s*([0-9]+)\s*$'),                         'nbasis', int),
        ]

        for L in txtfile:
            flds = L.split()
            if len(flds) == 0:
                continue
            elif self.detect_section_(Rx.hdr_input, L=L):
                # Input module output
                #print "L = ", L
                self.process_section_input_()
            elif self.detect_section_(Rx.hdr_scf, L=L):
                # SCF module output
                self.process_section_scf_()
            elif self.detect_section_(Rx.hdr_mcscf, L=L):
                # MCSCF module output
                self.process_section_mcscf_()
            else:
                for (pat, act, arg1) in search_patterns:
                    if pat % L:
                        if isinstance(act, basestring):
                            rslt[act] = arg1(pat[1])
                            break

        if 'nalpha_elec' in self:
            self['nelec_up'] = self.nalpha_elec
            self['nelec_dn'] = self.nbeta_elec

        return rslt

Example #10

0

Show file

File: output.py Project: wirawan0/pyqmc

 class rx_:
     nwchem_version = regex(
         r'^\s*Northwest Computational Chemistry Package \(NWChem\)\s+([-0-9a-zA-Z_.]+)'
     )
     # Obsoleted (not used)
     e_scf = regex(r'^\s*Total (SCF|DFT) energy *= *([-+eE0-9.]+)')
     e_ccsd = regex(r'^\s*CCSD total energy */ *hartree *= *([-+eE0-9.]+)')
     e_ccsd_t = regex(
         r'^\s*CCSD\(T\) total energy */ *hartree *= *([-+eE0-9.]+)')
     e_nucl = regex(r'^\s*Nuclear repulsion energy *= *([-+eE0-9.]+)')
     # Section headers
     hdr_input = regex(r'\s*NWChem Input Module\s*$')
     hdr_scf = regex(r'\s*NWChem SCF Module\s*$')
     hdr_mcscf = regex(r'\s*NWChem Direct MCSCF Module\s*$')
     underline = regex(r'\s*-+\s*$')
     # The following are not too foolproof, but nobody is so foolish to
     # use these as calculation title, isn't it?
     input_notitle = regex(
         r'^\s*Scaling coordinates for geometry ".*" by +[0-9.]+')
     scf_notitle = regex(r'^\s*ao basis += +".*"')