def get_gafqmc_run_snapshot(rundirs): """Fetches the GAFQMC run snapshot (the INFO files) in bulk. Puts them all in temporary subdirectory for subsequent examination. This tool is used for runs started by the run-gafqmc.sh script. Example usage for Ca+4H2 system: >>> get_qmc_run_snapshot(glob.glob("part*/rundir")) where each rundir is a softlink to local scratch where the QMC run output is stored temporarily. """ from wpylib.regexps import regex destbasedir = tempfile.mkdtemp(dir=os.getcwd()) print "Snapshot data is located in subdir: ", destbasedir # This is the standard location for rundir on local scratch as # defined by run-gafqmc.sh: rundir_rx = regex(r'/state/partition1/' + os.environ['USER'] + '/([0-9]+)\.([-_a-zA-Z0-9]+)\.run') if isinstance(rundirs, basestring): rundirs = [rundirs] for (i, r) in enumerate(rundirs): destdir = os.path.join(destbasedir, "part%04d" % i) os.mkdir(destdir) rlink = os.path.realpath(r) if not (rundir_rx % rlink): raise ValueError, "Unparseable dirname: %s" % rlink jobid = rundir_rx[1] host = rundir_rx[2] print "Fetching: %s -> %s" % (r, rlink) sh.run('scp', ('-p', '%s:%s/INFO' % (host, rlink), '%s/INFO' % destdir))
def process_section_scf_(self): """**Internal routine** Process the output of 'scf' module. """ Rx = self.rx_ txtfile = self.txt_ search_patterns = [ (regex(r'^\s*ao basis *= *"([^"]+)"'), 'ao_basis', str), (regex(r'^\s*functions\s*=\s*([0-9]+)\s*$'), 'nbasis', int), (regex(r'^\s*atoms\s*=\s*([0-9]+)\s*$'), 'natoms', int), (regex(r'^\s*closed shells\s*=\s*([0-9]+)\s*$'), 'nclosed_shells', int), (regex(r'^\s*open shells\s*=\s*([0-9]+)\s*$'), 'nopen_shells', int), (regex(r'^\s*alpha\s+electrons\s*=\s*([0-9]+)\s*$'), 'nalpha_elec', int), (regex(r'^\s*beta\s+electrons\s*=\s*([0-9]+)\s*$'), 'nbeta_elec', int), (regex(r'^\s*charge\s*=\s*([0-9]+)\s*$'), 'charge', int), (regex(r'^\s*wavefunction\s*=\s*([0-9]+)\s*$'), 'scf_type', int), ] # Look for job title, if any self.skip_blank_lines_() L = txtfile.next() if not (Rx.scf_notitle % L): self['title_scf'] = L.strip() self.skip_blank_lines_() else: txtfile.file.push(L) for L in txtfile: flds = L.split() if len(flds) == 0: break else: for (pat, act, arg1) in search_patterns: if pat % L: if isinstance(act, basestring): self[act] = arg1(pat[1]) break if 'nalpha_elec' not in self: # ROHF/RHF self['nalpha_elec'] = self.nclosed_shells + self.nopen_shells self['nbeta_elec'] = self.nclosed_shells else: # UHF pass
def fetch_gafqmc_run_output(rundirs, force_update=False, save_walkers=False): """Fetches the GAFQMC run snapshot (INFO files and some additional files) in bulk. Putting them the corresponding result subdirectory for archival. Usually this tool is used for runs that fail to finish in time. Example usage for Ca+4H2 system: >>> fetch_qmc_run_output(glob.glob("part*/rundir")) where each rundir is a softlink to the local scratch where the QMC run output is stored temporarily. """ from wpylib.regexps import regex #destbasedir = tempfile.mkdtemp(dir=os.getcwd()) #print "Snapshot data is located in subdir: ", destbasedir rundir_rx = regex(r'/state/partition1/' + getusername() + r'/([0-9]+)\.([-_a-zA-Z0-9]+)\.run') if isinstance(rundirs, basestring): rundirs = [rundirs] files_to_fetch = [ 'INFO', 'stdout', 'fort.17', 'gafqmc-00000.stat', 'fort.15', '*.in', ] if save_walkers: files_to_fetch += ['W_old_run_000'] xfiles_to_fetch = ",".join(files_to_fetch) for (i, r) in enumerate(rundirs): destdir = os.path.dirname(r) if destdir == '': destdir = '.' destdir_abs = os.path.realpath(destdir) rlink = os.path.realpath(r) if not (rundir_rx % rlink): raise ValueError, "Unparseable dirname: %s" % rlink jobid = rundir_rx[1] host = rundir_rx[2] if not force_update: # Check first if the INFO file have the same timestamp: old_INFO = _file_search(destdir_abs, ['INFO', 'INFO.lzma']) if old_INFO: new_INFO = os.path.join(rlink, 'INFO') if cnode_shell(host, 'test %s -nt %s' % (new_INFO, old_INFO)) != 0: print "Skipping: %s -> %s (result already up-to-date)" % ( r, rlink) continue print "Fetching: %s -> %s" % (r, rlink) # the rsync flags follow those in run-gafqmc.sh sh.run('rsync', ('-ptvb', '%s:%s/{%s}' % (host, rlink, xfiles_to_fetch), '%s/' % destdir))
def process_section_mcscf_(self): """**Internal routine** Process the output of 'mcscf' module. """ Rx = self.rx_ txtfile = self.txt_ search_patterns = [ (regex(r'^\s*Basis functions *: *([0-9]+)'), 'nbasis_mcscf', int), (regex(r'^\s*Inactive shells\s*:\s*([0-9]+)\s*$'), 'nfc', int), (regex(r'^\s*Active shells\s*:\s*([0-9]+)\s*$'), 'nact_orb', int), (regex(r'^\s*Active electrons\s*:\s*([0-9]+)\s*$'), 'nact_elec', int), (regex(r'^\s*Symmetry\s*:\s*([A-Za-z0-9]+)\s*$'), 'symmetry_mcscf', str), (regex(r'^\s*Multiplicity\s*:\s*([0-9]+)\s*$'), 'mult_mcscf', int), ] # Look for job title, if any self.skip_blank_lines_() L = txtfile.next() if not (Rx.underline % L): self['title_scf'] = L.strip() self.skip_blank_lines_() else: txtfile.file.push(L) for L in txtfile: flds = L.split() if len(flds) == 0: break else: for (pat, act, arg1) in search_patterns: if pat % L: if isinstance(act, basestring): self[act] = arg1(pat[1]) break if 'nalpha_elec' not in self: self['nelec'] = 2 * self.nfc + self.nact_elec two_nbeta_elec = (self.nelec - self.mult_mcscf + 1) if two_nbeta_elec % 2 != 0: raise PyqmcDataError, \ "Invalid combination of multiplicity and num of active electrons?" self['nbeta_elec'] = two_nbeta_elec // 2 self['nalpha_elec'] = self.nbeta_elec + self.mult_mcscf - 1
def parse_INFO(self, INFO): '''Gets all the necessary info (calculation parameters) from the INFO file. This is a very old routine. We use this as temporary starting point.''' # FIXME: comment_char is temporarily set to ASCII 0, which should be # an invalid character in this output file. info_file = text_input(INFO, comment_char='\0', skip_blank_lines=False) self.clear() rslt = self rslt['info_file'] = INFO rslt['info_mtime'] = time.localtime(os.stat(INFO).st_mtime) rx_runtype = regex(r'^\s*runtype\s*=\s*([0-9]+)') for L in info_file: Ls = L.strip() ls = Ls.lower() flds = Ls.split() if len(flds) == 0: continue elif Ls.startswith("# of particles:"): u = int(flds[3]) d = int(flds[4]) if u < d: sys.stderr.write( "pwqmc_info.parse_INFO:Warning: nup < ndn in info file `%s'; autofixing this mistake!\n" % (INFO)) t = u u = d d = t rslt['nup'] = u rslt['ndn'] = d rslt['nelec_up'] = u rslt['nelec_dn'] = d elif ls.startswith("majority and minority det are coupled"): rslt['udet'] = False elif ls.startswith("majority and minority det are independent"): rslt['udet'] = True elif flds[0] == "Nbasis": rslt['nbasis'] = int(flds[2]) elif ls.startswith("input fft dimension ll ="): rslt['LL'] = (int(flds[5]), int(flds[6]), int(flds[7])) elif ls.startswith("trial wf from input: "): rslt['trial_wfn_file'] = Ls[20:].strip() elif flds[0] == "Subtotal": rslt["Evar_noconst"] = float(flds[2]) elif flds[0] == "Variational" and flds[1] == "energy": rslt["Evar"] = float(flds[3]) rslt["H0"] = rslt["Evar"] - rslt["Evar_noconst"] elif flds[0] == "deltau,": rslt["deltau"] = float(flds[3]) elif flds[0] == "beta=": rslt["betablk"] = float(flds[1]) elif Ls.startswith("Using reduced k-pts:"): kx = float(flds[3]) ky = float(flds[4]) if len(flds) > 5: kz = float(flds[5]) else: kz = float(info_file.next().split()[0]) rslt["kpt"] = (kx, ky, kz) elif Ls.startswith("Input Etrial="): rslt["Etrial_noconst"] = float(flds[2]) # no H0 yet #print Ls elif Ls.startswith("New etrial to be used in El_bound:"): rslt["Etrial_noconst"] = float(flds[7]) # no H0 yet #print Ls elif Ls.startswith("read in new anorm + etrial:"): rslt["anorm"] = float(flds[6]) if len(flds) > 7: rslt["Etrial_noconst"] = float(flds[7]) else: rslt["Etrial_noconst"] = float(info_file.next().split()[0]) #print Ls elif Ls.startswith("itv_Em="): rslt["itv_Em"] = int(flds[1]) elif Ls.startswith("itv_pc="): rslt["itv_pc"] = int(flds[1]) elif Ls.startswith("itv_pc_eq="): rslt["itv_pc_eq"] = int(flds[1]) elif Ls.startswith("nblk="): rslt["nblk"] = int(flds[1]) elif Ls.startswith("neq="): rslt["neq"] = int(flds[1]) elif Ls.startswith("ngrth="): rslt["ngrth"] = int(flds[1]) elif Ls.startswith("trial E fixed at input value"): rslt["ngrth"] = 0 elif Ls.startswith("nblkstep="): rslt["nblkstep"] = int(flds[1]) elif Ls.startswith("nwlk="): rslt["nwlk"] = int(flds[1]) elif Ls.startswith("nwlkmax="): rslt["nwlkmax"] = int(flds[1]) elif Ls.startswith("nwlkmin="): rslt["nwlkmin"] = int(flds[1]) elif rx_runtype % Ls: runtype = int(rx_runtype[1]) rslt["runtype"] = runtype runtype_rec = self.runtype_map[runtype] rslt["constraint"], rslt["projector"] \ = runtype_rec[:2] # ---crystal and atom parameters, etc--- # Wherever possible, we will use abinit-style keywords # to make this consistent with abinit. elif Ls.startswith("Volume of the unit cell ="): rslt["vol"] = float(flds[6]) # in bohr**3 elif Ls.startswith("nspec_in ="): rslt["ntypat"] = int(flds[2]) # number of atomic species rslt["has_pseudo"] = (rslt["ntypat"] > 0) # ---runtime info below--- elif Ls.startswith("OpenMP support enabled using"): rslt["num_threads"] = int(flds[4]) elif Ls.startswith("parallel version using"): rslt["num_tasks"] = int(flds[3]) elif Ls.startswith("hostname:"): rslt["run_host"] = flds[1] elif Ls.startswith("Program started on"): rslt["start_time"] = \ time.strptime(flds[3] + " " + flds[5][:6], "%Y%m%d %H%M%S") elif Ls.startswith( "Planewave-AFQMC calculation for system: General electronic system" ): rslt["code_name"] = "pwqmc" # measurement and other complex data capture elif Ls.startswith("Measurement phase..."): self.parse_measurement0(info_file, rslt) info_file.close() rslt.setdefault("nwlkmax", rslt.nwlk * 2) rslt.setdefault("nwlkmin", max(rslt.nwlk / 2, 1)) # fall back to original defaults: rslt.setdefault("runtype", 0) rslt.setdefault("constraint", "phaseless cosine") rslt.setdefault("projector", "Elocal") rslt["run_mpi"] = ("num_tasks" in rslt) rslt["run_openmp"] = ("num_threads" in rslt) return rslt
def read(self, infile, vec_select=1, verbose=0, nbasis=None, out=sys.stdout): """Reads off molecular orbital vectors. Usage: movecs = pyqmc.gamess.datfile.movecs(fname, [options]) Valid options: vec_select = <integer> (default: 1; 1-based choice) verbose = 0|1 (default: 0) nbasis = <integer> (default: autodetected) This routine was translated from Gamess::ReadGamessMOVecs routine in my Gamess.pm perl module. The latter routine was derived from C2_UHF_gamess.pl dated ~20070813. CAUTION: The resulting orbital (orbitals_alpha, orbitals_beta) arrays are 1-based, both in the orbital index and the basis index. Strict vector ordering (1, 2, 3, ..., N) is required in the $VEC data. We will check orbital indices strictly. This requires the orbitals be strictly ordered, with no skipping, etc. Strict checking is necessary for proper reading when we have more than 99 basis functions. In anticipating for large basis size, the rule for deducing UHF-type movecs is more complicated. For nbasis >= 100, the tag number rolls back to zero, unfortunately, which makes thing a bit difficult: when we see a tag of " 1" again, is it UHF beta sector, or movec #101? One way we can be assured that it IS an UHF movecs is prohibiting the size of movecs to be greater than the deduced nbasis, which is a reasonable restriction. Then, when we apparently encounter movec "101" when nbasis==100, we can be sure that the 101st vector is actually beta movec #1. Thus UHF movecs can be detected by the following rule: - ( old $VecTag != 0, or old $VecIndex == $nbasis already ) AND new $VecTag == 1 . FIXME: The solution above is still not foolproof in two cases: 1) suppose we have an UHF-type movecs with nbasis=220, but each spin sector only has 100 orbitals listed. Then this will be interpreted as an RHF-type movecs with nbasis=220 and norbitals=200. 2) in spherical basis, maximum norbitals is <= nbasis. When this happens, then the deduced nbasis is not the right number of spherical basis functions (thus the nbasis deduced above is not right). But I haven't seen this case yet, so forget about them temporarily. """ from wpylib.regexps import regex # MOVECS comments (always 3 lines preceding the $VEC block) rx_comment = regex(r'^--- ') rx_vec = regex(r'^(?i) \$vec') rx_endvec = regex(r'^(?i) ?\$end') F = text_input(infile) comments = None found = False vec_blk_count = 0 n_comment_lines = 0 O = text_output(out, flush=True) spin = "alpha" udet = False AllVecs = {} for txt in F: if rx_comment % txt: comments = [] n_comment_lines = 3 if n_comment_lines > 0: comments.append(txt) n_comment_lines -= 1 if rx_vec % txt: vec_blk_count += 1 if vec_blk_count < vec_select: # the comments we just read (if any) are irrelevant, so # remove them. comments = None continue found = True # This is the actual movecs reading loop --- # The $END marker for initial orbital guess (PUNMO=.TRUE.) is # buggy--we must tolerate that txt = F.next() # VecIndex = MO index to identify the whole vector # VecTag = MO "tag" number # In general VecTag is equal to VecIndex except when we have >= 100 # basis funcs (where VecTag has only the last two digits). # NOTE: VecTag is *always* a 2-character string! VecIndex = 1 VecTag = " 1" AmplIndex = 0 Ampl = [] Vecs = [Ampl] while not (rx_endvec % txt): NewVecTag = txt[0:2] #print "H: $txt\n"; #print "V: $NewVecTag\n"; # We should safely assume that VecTag > 1 at the end # of alpha orbitals; if that's not the case, that's # YOUR fault (why doing 1-basis quantum chemistry?) if NewVecTag != VecTag: # Just in case, we are very pedantic in checking for errors here: if nbasis != None: # (1) nbasis must be consistent if AmplIndex != nbasis: raise PyqmcDataError, \ ("%s:%d: Inconsistent nbasis " + \ "(original guess was = %d, most recently deduced value = %d) " + \ "for %s vector #%d") \ % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex) else: # Deduce nbasis nbasis = AmplIndex if nbasis == 0: raise PyqmcDataError, \ ("%s:%d: nbasis detected as zero! " + \ "Maybe there is corruption in the input file?") \ % (infile, F.lineno) if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Deduced nbasis = %d\n" \ % (infile, nbasis)) # UHF-type vector detection scheme: see the notes above if (VecTag != " 0" or VecIndex == nbasis) and NewVecTag == " 1": if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Found UHF-type movecs\n" \ % (infile,)) if udet: raise PyqmcDataError, \ ("%s:%d: alpha and beta orbitals were already defined?! " + \ "Maybe there is a mistake with your $VEC data?") \ % (infile, F.lineno) AllVecs[spin] = numpy.array(Vecs, dtype=float).T # start all over with a new MO block Ampl = [] Vecs = [Ampl] spin = "beta" udet = True VecIndex = 0 # end UHF-type detection scheme else: # Some additional error checking(s) if VecIndex >= nbasis: # and nbasis != 100: # NOTE: We disallow norbitals > nbasis in our reader. raise PyqmcDataError, \ ("%s:%d: The $VEC block has more than %d orbitals, " + \ "which is prohibited by this routine") \ % (infile, F.lineno, nbasis) Ampl = [] Vecs.append(Ampl) AmplIndex = 0 # Start off a new vector VecIndex += 1 VecTag = NewVecTag # end new vector/new spin sector detection # Strict index vs. tag checking: TagChk = "%2d" % (VecIndex % 100) if TagChk != VecTag: raise PyqmcDataError, \ ("%s:%d: Mismatch vector tag number in vector #%d " + \ "(wanted '%s', got '%s')") \ % (infile, F.lineno, VecIndex, TagChk, VecTag) # the amplitudes are always stored in -n.nnnnnnnnE+nn fixed format # (15 characters wide) txtdata = txt[5:].rstrip() lendata = len(txtdata) // 15 Ampl += [ float(txtdata[i * 15:i * 15 + 15]) for i in xrange(0, lendata) ] AmplIndex += lendata # TODO: $i < 5 is allowed ONLY on the last line; # Make sure we catch that. #print $VecIndex, " ", $AmplIndex, "\n"; try: txt = F.next() except StopIteration: raise PyqmcDataError, \ ("%s:%d: Unexpected EOF while reading in $VEC data") \ % (infile, F.lineno) # end loop for reading in $VEC data # Finalization: do final checks, etc. AllVecs[spin] = numpy.array(Vecs, dtype=float).T if AmplIndex != nbasis: raise PyqmcDataError, \ ("%s:%d: Inconsistent nbasis " + \ "(original guess was = %d, most recently deduced value = %d) " + \ "for %s vector #%d") \ % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex) if udet: if AllVecs['alpha'].shape != AllVecs['beta'].shape: raise PyqmcDataError, \ ("%s:%d: Inconsistent shape of MO matrix: " + \ "(alpha = %s, beta = %s)") \ % (infile, F.lineno, \ AllVecs['alpha'].shape, \ AllVecs['beta'].shape, \ ) if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Total MO vectors read = %s%s\n" \ % (infile, VecIndex, ifelse(udet, " (UHF-type)", ""))) # stop reading if the desired vectors have been loaded break # end main text reading loop if not found: raise PyqmcDataError, \ ("%s: Cannot find $VEC block number %s") \ % (infile, vec_select) # Save the reading results to the "self" record: self.filename = infile self.vec_select = vec_select self.comments = comments self.udet = udet self.nbasis = nbasis for (spin, mo) in AllVecs.iteritems(): setattr(self, spin, mo) return self
class rx_: # This sneaky info can be used to determine # of planewaves: kpt_npw = regex( r'P newkpt: treating\s+(?P<nband>[0-9]+) bands with npw=\s*(?P<npw>[0-9]+) for ikpt=\s*(?P<ikpt>[0-9]+)' ) scf_begin = regex( r'^\s*iter\s+Etot\(hartree\)\s+deltaE\(h\)') # marker of SCF block scf_line1 = regex( r'^\s*ETOT\s*[0-9\*]+\s+(?P<Etot>[-+eE0-9.]+)\s+(?P<Ediff>[-+eE0-9.]+)' ) scf_convg1 = regex( r'^\s*At SCF step\s+(?P<numscf>[0-9]+)\s*,\s*etot is converged') E_begin = regex(r'^\s*Components of total free energy') E_end = regex(r'^\s*-+\s*$') E_parts_list = [ (regex(r'^\s*Kinetic energy\s*=\s*([-+eE0-9.]+)'), 'E_kinetic', float), (regex(r'^\s*Hartree energy\s*=\s*([-+eE0-9.]+)'), 'E_hartree', float), (regex(r'^\s*Ewald energy\s*=\s*([-+eE0-9.]+)'), 'E_ewald', float), (regex(r'^\s*PspCore energy\s*=\s*([-+eE0-9.]+)'), 'E_pspcore', float), (regex(r'^\s*Loc\.?\s+psp\.?\s+energy\s*=\s*([-+eE0-9.]+)'), 'E_psploc', float), (regex(r'^\s*NL\s+psp\.?\s+energy\s*=\s*([-+eE0-9.]+)'), 'E_pspnonloc', float), (regex(r'^\s*>+ Internal E\s*=\s*([-+eE0-9.]+)'), 'E_internal', float), (regex(r'^\s*-kT\*entropy\s*=\s*([-+eE0-9.]+)'), 'E_smear', float), (regex(r'^\s*>+ Etotal\s*=\s*([-+eE0-9.]+)'), 'Etotal', float), ] eigen_begin = regex( r'^\s*Eigenvalues \(hartree\) for nkpt=\s*(?P<nkpt>[0-9]+)\s*k points(?:, SPIN (?P<spin>[A-Za-z]+))?:' ) eigen_kpt1 = regex( r'^\s*kpt#\s*(?P<ikpt>[0-9]+),\s*nband=\s*(?P<nband>[0-9]+), wtk=\s*(?P<wtk>[.0-9]+), kpt=\s*(?P<kx>[-+.0-9]+)\s+(?P<ky>[-+.0-9]+)\s+(?P<kz>[-+.0-9]+)' ) eigen_kpt_stop = regex( r'^\s*prteigrs\s*: .*do not print more k-points') densph_begin = regex( r'^\s*Atom\s+Sphere radius\s+Integrated_up_density\s+Integrated_dn_density\s+Total[^\s]+\s+Diff' ) dataset_end = regex( r'^\s*==\s*(?:DATASET\s+[0-9]+|END DATASET\(S\))\s+=+')
class rx_: abinit_version = regex( r'^[. ]Version\s+(?P<version>[^\s]+)\s+of ABINIT') dataset_begin = regex(r'^\s*==\s*DATASET\s+(?P<dataset>[0-9]+)\s+=+') outvars_begin = regex( r'-outvars: echo values of preprocessed input variables')
def parse_file_(self, filename): """Extracts information from an nwchem output file (from its stdout). Right now, this parser is only good for single-point calculations (i.e. no multijob or geometry optimization at this point).""" Rx = self.rx_ txtfile = text_input(filename, skip_blank_lines=False, comment_char='\0', superize=True) self.clear() rslt = self self.txt_ = txtfile # This will also serve as an initial screening of the file try: L = txtfile.seek_text(Rx.nwchem_version.rx) except: raise PyqmcDataError, \ "Cannot determine nwchem version in `%s'; perhaps it is not an nwchem output file" % filename rslt['info_code_version'] = (Rx.nwchem_version % L).group(1) rslt['info_mtime'] = time.localtime(os.path.getmtime(filename)) search_patterns = [ (regex(r'^\s*Total SCF energy *= *([-+eE0-9.]+)'), 'E_SCF', float), (regex(r'^\s*Total MCSCF energy *= *([-+eE0-9.]+)'), 'E_MCSCF', float), (regex(r'^\s*Total DFT energy *= *([-+eE0-9.]+)'), 'E_DFT', float), (regex(r'^\s*CCSD total energy */ *hartree *= *([-+eE0-9.]+)'), 'E_CCSD', float), (regex( r'^\s*CCSD\(T\) total energy */ *hartree *= *([-+eE0-9.]+)'), 'E_CCSD_T', float), (regex(r'^\s*Nuclear repulsion energy *= *([-+eE0-9.]+)'), 'E_nuclear', float), (regex(r'^\s*Total MP2 energy\s+([-+eE0-9.]+)'), 'E_MP2', float), # old MP2 module # The following are not that great. # We may want to use stricter scanner which takes into account the context # (which section) we are in #(regex(r'^\s*functions\s*=\s*([0-9]+)\s*$'), 'nbasis', int), ] for L in txtfile: flds = L.split() if len(flds) == 0: continue elif self.detect_section_(Rx.hdr_input, L=L): # Input module output #print "L = ", L self.process_section_input_() elif self.detect_section_(Rx.hdr_scf, L=L): # SCF module output self.process_section_scf_() elif self.detect_section_(Rx.hdr_mcscf, L=L): # MCSCF module output self.process_section_mcscf_() else: for (pat, act, arg1) in search_patterns: if pat % L: if isinstance(act, basestring): rslt[act] = arg1(pat[1]) break if 'nalpha_elec' in self: self['nelec_up'] = self.nalpha_elec self['nelec_dn'] = self.nbeta_elec return rslt
class rx_: nwchem_version = regex( r'^\s*Northwest Computational Chemistry Package \(NWChem\)\s+([-0-9a-zA-Z_.]+)' ) # Obsoleted (not used) e_scf = regex(r'^\s*Total (SCF|DFT) energy *= *([-+eE0-9.]+)') e_ccsd = regex(r'^\s*CCSD total energy */ *hartree *= *([-+eE0-9.]+)') e_ccsd_t = regex( r'^\s*CCSD\(T\) total energy */ *hartree *= *([-+eE0-9.]+)') e_nucl = regex(r'^\s*Nuclear repulsion energy *= *([-+eE0-9.]+)') # Section headers hdr_input = regex(r'\s*NWChem Input Module\s*$') hdr_scf = regex(r'\s*NWChem SCF Module\s*$') hdr_mcscf = regex(r'\s*NWChem Direct MCSCF Module\s*$') underline = regex(r'\s*-+\s*$') # The following are not too foolproof, but nobody is so foolish to # use these as calculation title, isn't it? input_notitle = regex( r'^\s*Scaling coordinates for geometry ".*" by +[0-9.]+') scf_notitle = regex(r'^\s*ao basis += +".*"')