def process_input(raw_input, print_level=1): """Function to preprocess *raw input*, the text of the input file, then parse it, validate it for format, and convert it into legitimate Python. *raw_input* is printed to the output file unless *print_level* =0. Does a series of regular expression filters, where the matching portion of the input is replaced by the output of the corresponding function (in this module) call. Returns a string concatenating module import lines, a copy of the user's .psi4rc files, a setting of the scratch directory, a dummy molecule, and the processed *raw_input*. """ # Check if the infile is actually an outfile (yeah we did) psi4_id = re.compile(r'Psi4: An Open-Source Ab Initio Electronic Structure Package') if re.search(psi4_id, raw_input): input_lines = raw_input.split("\n") input_re = re.compile(r'^\s*?\=\=> Input File <\=\=') input_start = -1 for line_count in range(len(input_lines)): line = input_lines[line_count] if re.match(input_re, line): input_start = line_count + 3 break stop_re = re.compile(r'^-{74}') input_stop = -1 for line_count in range(input_start, len(input_lines)): line = input_lines[line_count] if re.match(stop_re, line): input_stop = line_count break if input_start == -1 or input_stop == -1: message = ('Cannot extract infile from outfile.') raise TestComparisonError(message) raw_input = '\n'.join(input_lines[input_start:input_stop]) raw_input += '\n' # Echo the infile on the outfile if print_level > 0: core.print_out("\n ==> Input File <==\n\n") core.print_out("--------------------------------------------------------------------------\n") core.print_out(raw_input) core.print_out("--------------------------------------------------------------------------\n") core.flush_outfile() #NOTE: If adding mulitline data to the preprocessor, use ONLY the following syntax: # function [objname] { ... } # which has the regex capture group: # # r'^(\s*?)FUNCTION\s*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE # # your function is in capture group #1 # your objname is in capture group #2 # your data is in capture group #3 # Sections that are truly to be taken literally (spaces included) # Must be stored then subbed in the end to escape the normal processing # Process "cfour name? { ... }" cfour = re.compile(r'^(\s*?)cfour[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(cfour, process_cfour_command, raw_input) # Return from handling literal blocks to normal processing # Nuke all comments comment = re.compile(r'(^|[^\\])#.*') temp = re.sub(comment, '', temp) # Now, nuke any escapes from comment lines comment = re.compile(r'\\#') temp = re.sub(comment, '#', temp) # Check the brackets and parentheses match up, as long as this is not a pickle input file #if not re.search(r'pickle_kw', temp): # check_parentheses_and_brackets(temp, 1) # First, remove everything from lines containing only spaces blankline = re.compile(r'^\s*$') temp = re.sub(blankline, '', temp, re.MULTILINE) # Look for things like # set matrix [ # [ 1, 2 ], # [ 3, 4 ] # ] # and put them on a single line temp = process_multiline_arrays(temp) # Process all "set name? { ... }" set_commands = re.compile(r'^(\s*?)set\s*([-,\w]*?)[\s=]*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(set_commands, process_set_commands, temp) # Process all individual "set (module_list) key {[value_list] or $value or value}" # N.B. We have to be careful here, because \s matches \n, leading to potential problems # with undesired multiline matches. Better the double-negative [^\S\n] instead, which # will match any space, tab, etc., except a newline set_command = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)[^\S\n]+)?(\w+)(?:[^\S\n]|=)+((\[.*\])|(\$?[-+,*()\.\w]+))\s*$', re.MULTILINE | re.IGNORECASE) temp = re.sub(set_command, process_set_command, temp) # Process "molecule name? { ... }" molecule = re.compile(r'^(\s*?)molecule[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(molecule, process_molecule_command, temp) # Process "external name? { ... }" external = re.compile(r'^(\s*?)external[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(external, process_external_command, temp) # Process "pcm name? { ... }" pcm = re.compile(r'^(\s*?)pcm[=\s]*(\w*?)\s*\{(.*?)^\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(pcm, process_pcm_command, temp) # Then remove repeated newlines multiplenewlines = re.compile(r'\n+') temp = re.sub(multiplenewlines, '\n', temp) # Process " extract" extract = re.compile(r'(\s*?)(\w+)\s*=\s*\w+\.extract_subsets.*', re.IGNORECASE) temp = re.sub(extract, process_extract_command, temp) # Process "print" and transform it to "core.print_out()" #print_string = re.compile(r'(\s*?)print\s+(.*)', re.IGNORECASE) #temp = re.sub(print_string, process_print_command, temp) # Process "memory ... " memory_string = re.compile(r'(\s*?)memory\s+([+-]?\d*\.?\d+)\s+([KMG]i?B)', re.IGNORECASE) temp = re.sub(memory_string, process_memory_command, temp) # Process "basis name? { ... }" basis_block = re.compile(r'^(\s*?)(basis|df_basis_scf|df_basis_mp2|df_basis_cc|df_basis_sapt)[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(basis_block, process_basis_block, temp) # Process literal blocks by substituting back in lit_block = re.compile(r'literals_psi4_yo-(\d*\d)') temp = re.sub(lit_block, process_literal_blocks, temp) future_imports = [] def future_replace(m): future_imports.append(m.group(0)) return '' future_string = re.compile('^from __future__ import .*$', flags=re.MULTILINE) temp = re.sub(future_string, future_replace, temp) # imports imports = '\n'.join(future_imports) + '\n' imports += 'import psi4\n' imports += 'from psi4 import *\n' imports += 'from psi4.core import *\n' imports += 'from psi4.driver.diatomic import anharmonicity\n' imports += 'from psi4.driver.gaussian_n import *\n' imports += 'from psi4.driver.aliases import *\n' imports += 'from psi4.driver.driver_cbs import xtpl_highest_1, scf_xtpl_helgaker_2, scf_xtpl_helgaker_3, corl_xtpl_helgaker_2\n' imports += 'from psi4.driver.wrapper_database import database, db, DB_RGT, DB_RXN\n' imports += 'from psi4.driver.wrapper_autofrag import auto_fragments\n' imports += 'from psi4.driver.p4const.physconst import *\n' imports += 'psi4_io = core.IOManager.shared_object()\n' # psirc (a baby PSIthon script that might live in ~/.psi4rc) psirc_file = os.path.expanduser('~') + os.path.sep + '.psi4rc' if os.path.isfile(psirc_file): fh = open(psirc_file) psirc = fh.read() fh.close() psirc = psirc.replace('psi4.IOManager', 'psi4.core.IOManager') else: psirc = '' # Override scratch directory if user specified via env_var scratch = '' scratch_env = core.get_environment('PSI_SCRATCH') if len(scratch_env): scratch += 'psi4_io.set_default_path("%s")\n' % (scratch_env) blank_mol = 'geometry("""\n' blank_mol += '0 1\nH\nH 1 0.74\n' blank_mol += '""","blank_molecule_psi4_yo")\n' temp = imports + psirc + scratch + blank_mol + temp # Move up the psi4.core namespace for func in dir(core): temp = temp.replace("psi4." + func, "psi4.core." + func) return temp
def process_input(raw_input, print_level=1): """Function to preprocess *raw input*, the text of the input file, then parse it, validate it for format, and convert it into legitimate Python. *raw_input* is printed to the output file unless *print_level* =0. Does a series of regular expression filters, where the matching portion of the input is replaced by the output of the corresponding function (in this module) call. Returns a string concatenating module import lines, a copy of the user's .psi4rc files, a setting of the scratch directory, a dummy molecule, and the processed *raw_input*. """ # Check if the infile is actually an outfile (yeah we did) psi4_id = re.compile( r'Psi4: An Open-Source Ab Initio Electronic Structure Package') if re.search(psi4_id, raw_input): input_lines = raw_input.split("\n") input_re = re.compile(r'^\s*?\=\=> Input File <\=\=') input_start = -1 for line_count in range(len(input_lines)): line = input_lines[line_count] if re.match(input_re, line): input_start = line_count + 3 break stop_re = re.compile(r'^-{74}') input_stop = -1 for line_count in range(input_start, len(input_lines)): line = input_lines[line_count] if re.match(stop_re, line): input_stop = line_count break if input_start == -1 or input_stop == -1: message = ('Cannot extract infile from outfile.') raise TestComparisonError(message) raw_input = '\n'.join(input_lines[input_start:input_stop]) raw_input += '\n' # Echo the infile on the outfile if print_level > 0: core.print_out("\n ==> Input File <==\n\n") core.print_out( "--------------------------------------------------------------------------\n" ) core.print_out(raw_input) core.print_out( "--------------------------------------------------------------------------\n" ) core.flush_outfile() #NOTE: If adding mulitline data to the preprocessor, use ONLY the following syntax: # function [objname] { ... } # which has the regex capture group: # # r'^(\s*?)FUNCTION\s*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE # # your function is in capture group #1 # your objname is in capture group #2 # your data is in capture group #3 # Sections that are truly to be taken literally (spaces included) # Must be stored then subbed in the end to escape the normal processing # Process "cfour name? { ... }" cfour = re.compile(r'^(\s*?)cfour[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(cfour, process_cfour_command, raw_input) # Return from handling literal blocks to normal processing # Nuke all comments comment = re.compile(r'(^|[^\\])#.*') temp = re.sub(comment, '', temp) # Now, nuke any escapes from comment lines comment = re.compile(r'\\#') temp = re.sub(comment, '#', temp) # Check the brackets and parentheses match up, as long as this is not a pickle input file #if not re.search(r'pickle_kw', temp): # check_parentheses_and_brackets(temp, 1) # First, remove everything from lines containing only spaces blankline = re.compile(r'^\s*$') temp = re.sub(blankline, '', temp, re.MULTILINE) # Look for things like # set matrix [ # [ 1, 2 ], # [ 3, 4 ] # ] # and put them on a single line temp = process_multiline_arrays(temp) # Process all "set name? { ... }" set_commands = re.compile(r'^(\s*?)set\s*([-,\w]*?)[\s=]*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(set_commands, process_set_commands, temp) # Process all individual "set (module_list) key {[value_list] or $value or value}" # N.B. We have to be careful here, because \s matches \n, leading to potential problems # with undesired multiline matches. Better the double-negative [^\S\n] instead, which # will match any space, tab, etc., except a newline set_command = re.compile( r'^(\s*?)set\s+(?:([-,\w]+)[^\S\n]+)?(\w+)(?:[^\S\n]|=)+((\[.*\])|(\$?[-+,*()\.\w]+))\s*$', re.MULTILINE | re.IGNORECASE) temp = re.sub(set_command, process_set_command, temp) # Process "molecule name? { ... }" molecule = re.compile(r'^(\s*?)molecule[=\s]*(\S*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(molecule, process_molecule_command, temp) # Process "external name? { ... }" external = re.compile(r'^(\s*?)external[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(external, process_external_command, temp) # Process "pcm name? { ... }" pcm = re.compile(r'^(\s*?)pcm[=\s]*(\w*?)\s*\{(.*?)^\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(pcm, process_pcm_command, temp) # Then remove repeated newlines multiplenewlines = re.compile(r'\n+') temp = re.sub(multiplenewlines, '\n', temp) # Process " extract" extract = re.compile(r'(\s*?)(\w+)\s*=\s*\w+\.extract_subsets.*', re.IGNORECASE) temp = re.sub(extract, process_extract_command, temp) # Process "print" and transform it to "core.print_out()" #print_string = re.compile(r'(\s*?)print\s+(.*)', re.IGNORECASE) #temp = re.sub(print_string, process_print_command, temp) # Process "memory ... " memory_string = re.compile(r'(\s*?)memory\s+(\d*\.?\d+)\s*([KMGTPBE]i?B)', re.IGNORECASE) temp = re.sub(memory_string, process_memory_command, temp) # Process "basis name? { ... }" basis_block = re.compile( r'^(\s*?)(basis|df_basis_scf|df_basis_mp2|df_basis_cc|df_basis_sapt)[=\s]*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE) temp = re.sub(basis_block, process_basis_block, temp) # Process literal blocks by substituting back in lit_block = re.compile(r'literals_psi4_yo-(\w{8})') temp = re.sub(lit_block, process_literal_blocks, temp) future_imports = [] def future_replace(m): future_imports.append(m.group(0)) return '' future_string = re.compile('^from __future__ import .*$', flags=re.MULTILINE) temp = re.sub(future_string, future_replace, temp) # imports imports = '\n'.join(future_imports) + '\n' imports += 'import psi4\n' imports += 'from psi4 import *\n' imports += 'from psi4.core import *\n' imports += 'from psi4.driver.diatomic import anharmonicity\n' imports += 'from psi4.driver.gaussian_n import *\n' imports += 'from psi4.driver.aliases import *\n' imports += 'from psi4.driver.driver_cbs import *\n' imports += 'from psi4.driver.wrapper_database import database, db, DB_RGT, DB_RXN\n' imports += 'from psi4.driver.wrapper_autofrag import auto_fragments\n' imports += 'from psi4.driver.constants.physconst import *\n' imports += 'psi4_io = core.IOManager.shared_object()\n' # psirc (a baby PSIthon script that might live in ~/.psi4rc) psirc_file = os.path.expanduser('~') + os.path.sep + '.psi4rc' if os.path.isfile(psirc_file): fh = open(psirc_file) psirc = fh.read() fh.close() psirc = psirc.replace('psi4.IOManager', 'psi4.core.IOManager') else: psirc = '' # Override scratch directory if user specified via env_var scratch = '' scratch_env = core.get_environment('PSI_SCRATCH') if len(scratch_env): scratch += 'psi4_io.set_default_path("%s")\n' % (scratch_env) blank_mol = 'geometry("""\n' blank_mol += '0 1\nH\nH 1 0.74\n' blank_mol += '""","blank_molecule_psi4_yo")\n' temp = imports + psirc + scratch + blank_mol + temp # Move up the psi4.core namespace for func in dir(core): temp = temp.replace("psi4." + func, "psi4.core." + func) # Move pseudonamespace for physconst into proper namespace from psi4.driver.p4util import constants for pc in dir(constants.physconst): if not pc.startswith('__'): temp = temp.replace('psi_' + pc, 'psi4.constants.' + pc) return temp