def parse(self): GI = file2(self.parsing_filename) FI = file2(self.file) G = Grammar(GI, FI) G.parse() self.G_parsed = G.parsed_container # import pickle # pickle.dump(G.parsed_container, open('Interface/G.p', 'wb')) # self.G_parsed = pickle.load(open('Interface/G.p', 'rb')) FI.close() GI.close() log.debug('%s parsed successfully' % (self.file)) return
def LookUpChkInLog(self): FL = file2(self.fileL) if not FL.skip_until(['^ #','(?i)%chk'],regexp=True): log.debug('Checkpoint file name not found in %s' % (self.fileL)) return '' log.debug('Checkpoint file name found in %s' % (self.fileL)) slash = self.fileL.rfind('/') + 1 path = self.fileL[:slash] return path + FL.s.strip().split('=')[1]
def postprocess(self): PI = file2(self.processing_filename) self.P = Processing(PI=PI, parsed=self.G_parsed) self.P.postprocess() # GAMESS-specific edits if self.P.parsed.last_value('gbasis') is not None: self.postprocess_basis() log.debug('%s postprocessed successfully' % self.file)
def parse(self): """ Parses Gaussian log file, step by step """ FI = file2(self.file) FI.skip_n(2) while True: step = ArchiveGauStep(FI) step.parse() step.postprocess() if step.blanc: break self.steps.append(step) FI.close() log.debug('%s parsed successfully' % (self.file)) return
def parse(self): """ Parses Orca file, step by step """ try: FI = file2(self.file) log.debug('%s was opened for reading' %(self.file)) except: log.error('Cannot open %s for reading' %(self.file)) while True: step = OrcaStep(FI) step.parse() step.postprocess() if step.blanc: break self.steps.append(step) FI.close() log.debug('%s parsed successfully' % (self.file)) return
if '->' in s: # has to be the last_value one self.command_assign(s) return def postprocess(self): """ :return: """ for s_full in self.PI: s_nocomment = s_full.split('#')[0].rstrip() self.process_command(s_nocomment) return if __name__ == "__main__": import sys sys.path.append('..') from Settings import Settings Top.settings = Settings() Top.settings.detailed_print = True from Tools.file2 import file2 PI = file2('../../parsing-rules/gamess_processing.txt') G_parsed = pickle.load( open('G.p', 'rb') ) P = Processing(PI=PI, parsed=G_parsed) P.postprocess() print(P)
def parse(self): if self.FI: FI = self.FI else: FI = file2(self.file) # Get NBO version FI.skip_until('***') self.NBO_version = re.search('\*+(.*?)\*+', FI.s).groups()[0].strip() # Options self.options = Tools.HTML.brn.join( FI.find_text_block(start_match='/ : ')) # Density self.comments += FI.nstrip() + Tools.HTML.brn # Job title FI.skip_n() title = FI.nstrip() self.comments = title[title.find(':') + 1:].strip() # Read some atomic information self.setAB.read_atomic_info(FI) # Population of Rydberg orbitals FI.skip_until('Natural Rydberg Basis') self.setAB.NatRydbergPop = FI.s.split()[3] where = FI.skip_until([ 'Alpha spin orbitals', 'Total non-Lewis', 'Please check you input data', 'NBO analysis skipped by request' ]) # If 'Alpha spin orbitals' then redo the analysis; # Expect beta spin block if where == 0: self.OpenShell = True setnames = ('setA', 'setB') log.debug( 'Open-shell molecule, NBO analysis will be performed separately for alpha- and beta-orbitals' ) elif where == 1: setnames = ('setAB', ) elif where == 2: log.warning('Please check your NBO file') return elif where == 3: log.info('Truncated NBO analysis detected') return # Here, start a huge loop over A/B sets (or just an A+B set if we have closed-shell) for rSet in setnames: if self.OpenShell: setattr(self, rSet, nbo_result()) nr = getattr(self, rSet) nr.read_atomic_info(FI) FI.skip_until('Natural Rydberg Basis') nr.NatRydbergPop = FI.s.split()[3] else: nr = self.setAB # Read in non-Lewis population if not FI.skip_until( ['NBO analysis skipped by request', 'Total non-Lewis']): continue nr.NonLewis = FI.s.split()[2] # Read in NBOs FI.skip_until('Bond orbital/ Coefficients/ Hybrids', offset=2) while FI.s.rstrip(): nbo = nbo_orb() nbo.parse(FI, nr) nr.orbs.append(nbo) # Read in directionality d = FI.find_text_block( start_match='NHO Directionality and "Bond Bending"', start_offset=10) for s in d: rs = re.search('^\D*(\d+)\.(?:.*?\.){4}.\s+(\S+).*?\s+(\S+)$', s) if rs: nbo_id, h1_dev, h2_dev = rs.groups() nbo = nr.nboByID(nbo_id) nbo.nhos[0].bond_bending = h1_dev nbo.nhos[1].bond_bending = h2_dev # Second order perturbation analysis #d = FI.find_text_block( #StartMatch = 'Second Order Perturbation Theory Analysis of Fock Matrix in NBO Basis', StartOffset = 9, #EndMatch = 'Natural Bond Orbitals (Summary):' #) for s in d: rs = re.search( '^\D*(\d+)\..*?\/(\d+)\..*?(\S+)\s+(\S+)\s+(\S+)$', s) if not rs: continue sopta = nbo_sopta() sopta.parse(rs, nr) nr.sopta.append(sopta) # Trying to understand where we are at try: i = FI.skip_until([ 'Leave Link 607', 'Reordering of NBOs for storage', 'Beta spin orbitals' ]) except StopIteration: # EOF reached break if i == 1: d = FI.find_text_block(end_match='Labels of output orbitals') reordered = [] for s in d: colon_pos = s.find(':') + 1 reordered.extend(s[colon_pos:].strip().split()) for i in range(len(reordered)): i_mo = str(i + 1) i_nbo = reordered[i] nbo = nr.nboByID(i_nbo) nbo.chk_index = i_mo return
def parse(self): """ Here, .fchk will be parsed as a text file Probably, we start here, because .fchk contains valuable information which might be used """ try: FI = file2(self.file) except: log.error('Cannot open %s for reading' %(self.file)) """ http://www.gaussian.com/g_tech/g_ur/f_formchk.htm All other data contained in the file is located in a labeled line/section set up in one of the following forms: Scalar values appear on the same line as their data label. This line consists of a string describing the data item, a flag indicating the data type, and finally the value: Integer scalars: Name,I,IValue, using format A40,3X,A1,5X,I12. Real scalars: Name,R,Value, using format A40,3X,A1,5X,E22.15. Character string scalars: Name,C,Value, using format A40,3X,A1,5X,A12. Logical scalars: Name,L,Value, using format A40,3X,A1,5X,L1. Vector and array data sections begin with a line naming the data and giving the type and number of values, followed by the data on one or more succeeding lines (as needed): Integer arrays: Name,I,Num, using format A40,3X,A1,3X,'N=',I12. The N= indicates that this is an array, and the string is followed by the number of values. The array elements then follow starting on the next line in format 6I12. Real arrays: Name,R,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string again indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 5E16.8. Note that the Real format has been chosen to ensure that at least one space is present between elements, to facilitate reading the data in C. Character string arrays (first type): Name,C,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 5A12. Character string arrays (second type): Name,H,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 9A8. Logical arrays: Name,H,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 72L1. All quantities are in atomic units and in the standard orientation, if that was determined by the Gaussian run. Standard orientation is seldom an interesting visual perspective, but it is the natural orientation for the vector fields. """ def split_array(s,reclength): v = [] nrec = int(math.ceil((len(s)-1.0)/reclength)) for i in range(nrec): rec = s[reclength*i:reclength*(i+1)].strip() v.append(rec) return v self.parsedProps = {} format_arrays = { 'I' : [6.,12], 'R' : [5.,16], 'C' : [5.,12], 'H' : [9.,8], } try: self.comments = next(FI).rstrip() s = next(FI).rstrip() self.JobType, self.lot, self.basis = s[0:10],s[10:20],s[70:80] while True: s = next(FI) if FI.eof: break s = s.rstrip() array_mark = (s[47:49] == 'N=') if array_mark: value = [] prop, vtype, nrec = s[:40].strip(), s[43], int(s[49:]) fa = format_arrays[vtype] nlines = int(math.ceil(nrec/fa[0])) for _ in range(nlines): s = next(FI) v5 = split_array(s,fa[1]) value.extend(v5) else: prop, vtype, value = s[:40].strip(), s[43], s[49:].strip() self.parsedProps[prop] = value except StopIteration: log.warning('Unexpected EOF') FI.close() log.debug('%s parsed successfully' % (self.file)) return
def parse(self): """ Here, .fchk will be parsed as a text file Probably, we start here, because .fchk contains valuable information which might be used """ try: FI = file2(self.file) except: log.error('Cannot open %s for reading' % (self.file)) """ http://www.gaussian.com/g_tech/g_ur/f_formchk.htm All other data contained in the file is located in a labeled line/section set up in one of the following forms: Scalar values appear on the same line as their data label. This line consists of a string describing the data item, a flag indicating the data type, and finally the value: Integer scalars: Name,I,IValue, using format A40,3X,A1,5X,I12. Real scalars: Name,R,Value, using format A40,3X,A1,5X,E22.15. Character string scalars: Name,C,Value, using format A40,3X,A1,5X,A12. Logical scalars: Name,L,Value, using format A40,3X,A1,5X,L1. Vector and array data sections begin with a line naming the data and giving the type and number of values, followed by the data on one or more succeeding lines (as needed): Integer arrays: Name,I,Num, using format A40,3X,A1,3X,'N=',I12. The N= indicates that this is an array, and the string is followed by the number of values. The array elements then follow starting on the next line in format 6I12. Real arrays: Name,R,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string again indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 5E16.8. Note that the Real format has been chosen to ensure that at least one space is present between elements, to facilitate reading the data in C. Character string arrays (first type): Name,C,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 5A12. Character string arrays (second type): Name,H,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 9A8. Logical arrays: Name,H,Num, using format A40,3X,A1,3X,'N=',I12, where the N= string indicates an array and is followed by the number of elements. The elements themselves follow on succeeding lines in format 72L1. All quantities are in atomic units and in the standard orientation, if that was determined by the Gaussian run. Standard orientation is seldom an interesting visual perspective, but it is the natural orientation for the vector fields. """ def split_array(s, reclength): v = [] nrec = int(math.ceil((len(s) - 1.0) / reclength)) for i in range(nrec): rec = s[reclength * i:reclength * (i + 1)].strip() v.append(rec) return v self.parsedProps = {} format_arrays = { 'I': [6., 12], 'R': [5., 16], 'C': [5., 12], 'H': [9., 8], } try: self.comments = next(FI).rstrip() s = next(FI).rstrip() self.JobType, self.lot, self.basis = s[0:10], s[10:20], s[70:80] while True: s = next(FI) if FI.eof: break s = s.rstrip() array_mark = (s[47:49] == 'N=') if array_mark: value = [] prop, vtype, nrec = s[:40].strip(), s[43], int(s[49:]) fa = format_arrays[vtype] nlines = int(math.ceil(nrec / fa[0])) for _ in range(nlines): s = next(FI) v5 = split_array(s, fa[1]) value.extend(v5) else: prop, vtype, value = s[:40].strip(), s[43], s[49:].strip() self.parsedProps[prop] = value except StopIteration: log.warning('Unexpected EOF') FI.close() log.debug('%s parsed successfully' % (self.file)) return