Ejemplo n.º 1
0
def process_input(raw_input, print_level=1):
    """Function to preprocess *raw input*, the text of the input file, then
    parse it, validate it for format, and convert it into legitimate Python.
    *raw_input* is printed to the output file unless *print_level* =0. Does
    a series of regular expression filters, where the matching portion of the
    input is replaced by the output of the corresponding function (in this
    module) call. Returns a string concatenating module import lines, a copy
    of the user's .psi4rc files, a setting of the scratch directory, a dummy
    molecule, and the processed *raw_input*.

    """
    # Check if the infile is actually an outfile (yeah we did)
    psi4_id = re.compile(
        r'Psi4: An Open-Source Ab Initio Electronic Structure Package')
    if re.search(psi4_id, raw_input):
        input_lines = raw_input.split("\n")
        input_re = re.compile(r'^\s*?\=\=> Input File <\=\=')
        input_start = -1
        for line_count in range(len(input_lines)):
            line = input_lines[line_count]
            if re.match(input_re, line):
                input_start = line_count + 3
                break

        stop_re = re.compile(r'^-{74}')
        input_stop = -1
        for line_count in range(input_start, len(input_lines)):
            line = input_lines[line_count]
            if re.match(stop_re, line):
                input_stop = line_count
                break

        if input_start == -1 or input_stop == -1:
            message = ('Cannot extract infile from outfile.')
            raise TestComparisonError(message)

        raw_input = '\n'.join(input_lines[input_start:input_stop])
        raw_input += '\n'

    # Echo the infile on the outfile
    if print_level > 0:
        core.print_out("\n  ==> Input File <==\n\n")
        core.print_out(
            "--------------------------------------------------------------------------\n"
        )
        core.print_out(raw_input)
        core.print_out(
            "--------------------------------------------------------------------------\n"
        )
        core.flush_outfile()

    #NOTE: If adding mulitline data to the preprocessor, use ONLY the following syntax:
    #   function [objname] { ... }
    #   which has the regex capture group:
    #
    #   r'^(\s*?)FUNCTION\s*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE
    #
    #   your function is in capture group #1
    #   your objname is in capture group #2
    #   your data is in capture group #3

    # Sections that are truly to be taken literally (spaces included)
    #   Must be stored then subbed in the end to escape the normal processing

    # Process "cfour name? { ... }"
    cfour = re.compile(r'^(\s*?)cfour[=\s]*(\w*?)\s*\{(.*?)\}',
                       re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(cfour, process_cfour_command, raw_input)

    # Return from handling literal blocks to normal processing

    # Nuke all comments
    comment = re.compile(r'(^|[^\\])#.*')
    temp = re.sub(comment, '', temp)
    # Now, nuke any escapes from comment lines
    comment = re.compile(r'\\#')
    temp = re.sub(comment, '#', temp)

    # Check the brackets and parentheses match up, as long as this is not a pickle input file
    #if not re.search(r'pickle_kw', temp):
    #    check_parentheses_and_brackets(temp, 1)

    # First, remove everything from lines containing only spaces
    blankline = re.compile(r'^\s*$')
    temp = re.sub(blankline, '', temp, re.MULTILINE)

    # Look for things like
    # set matrix [
    #              [ 1, 2 ],
    #              [ 3, 4 ]
    #            ]
    # and put them on a single line
    temp = process_multiline_arrays(temp)

    # Process all "set name? { ... }"
    set_commands = re.compile(r'^(\s*?)set\s*([-,\w]*?)[\s=]*\{(.*?)\}',
                              re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(set_commands, process_set_commands, temp)

    # Process all individual "set (module_list) key  {[value_list] or $value or value}"
    # N.B. We have to be careful here, because \s matches \n, leading to potential problems
    # with undesired multiline matches.  Better the double-negative [^\S\n] instead, which
    # will match any space, tab, etc., except a newline
    set_command = re.compile(
        r'^(\s*?)set\s+(?:([-,\w]+)[^\S\n]+)?(\w+)(?:[^\S\n]|=)+((\[.*\])|(\$?[-+,*()\.\w]+))\s*$',
        re.MULTILINE | re.IGNORECASE)
    temp = re.sub(set_command, process_set_command, temp)

    # Process "molecule name? { ... }"
    molecule = re.compile(r'^(\s*?)molecule[=\s]*(\S*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(molecule, process_molecule_command, temp)

    # Process "external name? { ... }"
    external = re.compile(r'^(\s*?)external[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(external, process_external_command, temp)

    # Process "pcm name? { ... }"
    pcm = re.compile(r'^(\s*?)pcm[=\s]*(\w*?)\s*\{(.*?)^\}',
                     re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(pcm, process_pcm_command, temp)

    # Then remove repeated newlines
    multiplenewlines = re.compile(r'\n+')
    temp = re.sub(multiplenewlines, '\n', temp)

    # Process " extract"
    extract = re.compile(r'(\s*?)(\w+)\s*=\s*\w+\.extract_subsets.*',
                         re.IGNORECASE)
    temp = re.sub(extract, process_extract_command, temp)

    # Process "print" and transform it to "core.print_out()"
    #print_string = re.compile(r'(\s*?)print\s+(.*)', re.IGNORECASE)
    #temp = re.sub(print_string, process_print_command, temp)

    # Process "memory ... "
    memory_string = re.compile(r'(\s*?)memory\s+(\d*\.?\d+)\s*([KMGTPBE]i?B)',
                               re.IGNORECASE)
    temp = re.sub(memory_string, process_memory_command, temp)

    # Process "basis name? { ... }"
    basis_block = re.compile(
        r'^(\s*?)(basis|df_basis_scf|df_basis_mp2|df_basis_cc|df_basis_sapt|df_basis_sad)[=\s]*(\w*?)\s*\{(.*?)\}',
        re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(basis_block, process_basis_block, temp)

    # Process literal blocks by substituting back in
    lit_block = re.compile(r'literals_psi4_yo-(\w{8})')
    temp = re.sub(lit_block, process_literal_blocks, temp)

    future_imports = []

    def future_replace(m):
        future_imports.append(m.group(0))
        return ''

    future_string = re.compile('^from __future__ import .*$',
                               flags=re.MULTILINE)
    temp = re.sub(future_string, future_replace, temp)

    # imports
    imports = '\n'.join(future_imports) + '\n'
    imports += 'import psi4\n'
    imports += 'from psi4 import *\n'
    imports += 'from psi4.core import *\n'
    imports += 'from psi4.driver.diatomic import anharmonicity\n'
    imports += 'from psi4.driver.gaussian_n import *\n'
    imports += 'from psi4.driver.frac import ip_fitting, frac_traverse\n'
    imports += 'from psi4.driver.aliases import *\n'
    imports += 'from psi4.driver.driver_cbs import *\n'
    imports += 'from psi4.driver.wrapper_database import database, db, DB_RGT, DB_RXN\n'
    imports += 'from psi4.driver.wrapper_autofrag import auto_fragments\n'
    imports += 'psi4_io = core.IOManager.shared_object()\n'

    # psirc (a baby PSIthon script that might live in ~/.psi4rc)
    psirc_file = os.path.expanduser('~') + os.path.sep + '.psi4rc'
    if os.path.isfile(psirc_file):
        fh = open(psirc_file)
        psirc = fh.read()
        fh.close()
        psirc = psirc.replace('psi4.IOManager', 'psi4.core.IOManager')
    else:
        psirc = ''

    blank_mol = 'geometry("""\n'
    blank_mol += '0 1\nH 0 0 0\nH 0.74 0 0\n'
    blank_mol += '""","blank_molecule_psi4_yo")\n'

    temp = imports + psirc + blank_mol + temp

    # Move up the psi4.core namespace
    for func in dir(core):
        temp = temp.replace("psi4." + func, "psi4.core." + func)

    # Move pseudonamespace for physconst into proper namespace
    from psi4.driver import constants
    for pc in dir(constants):
        if not pc.startswith('__'):
            temp = temp.replace('psi_' + pc, 'psi4.constants.' + pc)

    return temp
Ejemplo n.º 2
0
def process_input(raw_input, print_level=1):
    """Function to preprocess *raw input*, the text of the input file, then
    parse it, validate it for format, and convert it into legitimate Python.
    *raw_input* is printed to the output file unless *print_level* =0. Does
    a series of regular expression filters, where the matching portion of the
    input is replaced by the output of the corresponding function (in this
    module) call. Returns a string concatenating module import lines, a copy
    of the user's .psi4rc files, a setting of the scratch directory, a dummy
    molecule, and the processed *raw_input*.

    """
    # Check if the infile is actually an outfile (yeah we did)
    psi4_id = re.compile(r'Psi4: An Open-Source Ab Initio Electronic Structure Package')
    if re.search(psi4_id, raw_input):
        input_lines = raw_input.split("\n")
        input_re = re.compile(r'^\s*?\=\=> Input File <\=\=')
        input_start = -1
        for line_count in range(len(input_lines)):
            line = input_lines[line_count]
            if re.match(input_re, line):
                input_start = line_count + 3
                break

        stop_re = re.compile(r'^-{74}')
        input_stop = -1
        for line_count in range(input_start, len(input_lines)):
            line = input_lines[line_count]
            if re.match(stop_re, line):
                input_stop = line_count
                break

        if input_start == -1 or input_stop == -1:
            message = ('Cannot extract infile from outfile.')
            raise TestComparisonError(message)

        raw_input = '\n'.join(input_lines[input_start:input_stop])
        raw_input += '\n'

    # Echo the infile on the outfile
    if print_level > 0:
        core.print_out("\n  ==> Input File <==\n\n")
        core.print_out("--------------------------------------------------------------------------\n")
        core.print_out(raw_input)
        core.print_out("--------------------------------------------------------------------------\n")
        core.flush_outfile()

    #NOTE: If adding mulitline data to the preprocessor, use ONLY the following syntax:
    #   function [objname] { ... }
    #   which has the regex capture group:
    #
    #   r'^(\s*?)FUNCTION\s*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE
    #
    #   your function is in capture group #1
    #   your objname is in capture group #2
    #   your data is in capture group #3

    # Sections that are truly to be taken literally (spaces included)
    #   Must be stored then subbed in the end to escape the normal processing

    # Process "cfour name? { ... }"
    cfour = re.compile(r'^(\s*?)cfour[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(cfour, process_cfour_command, raw_input)

    # Return from handling literal blocks to normal processing

    # Nuke all comments
    comment = re.compile(r'(^|[^\\])#.*')
    temp = re.sub(comment, '', temp)
    # Now, nuke any escapes from comment lines
    comment = re.compile(r'\\#')
    temp = re.sub(comment, '#', temp)

    # Check the brackets and parentheses match up, as long as this is not a pickle input file
    #if not re.search(r'pickle_kw', temp):
    #    check_parentheses_and_brackets(temp, 1)

    # First, remove everything from lines containing only spaces
    blankline = re.compile(r'^\s*$')
    temp = re.sub(blankline, '', temp, re.MULTILINE)

    # Look for things like
    # set matrix [
    #              [ 1, 2 ],
    #              [ 3, 4 ]
    #            ]
    # and put them on a single line
    temp = process_multiline_arrays(temp)

    # Process all "set name? { ... }"
    set_commands = re.compile(r'^(\s*?)set\s*([-,\w]*?)[\s=]*\{(.*?)\}',
                              re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(set_commands, process_set_commands, temp)

    # Process all individual "set (module_list) key  {[value_list] or $value or value}"
    # N.B. We have to be careful here, because \s matches \n, leading to potential problems
    # with undesired multiline matches.  Better the double-negative [^\S\n] instead, which
    # will match any space, tab, etc., except a newline
    set_command = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)[^\S\n]+)?(\w+)(?:[^\S\n]|=)+((\[.*\])|(\$?[-+,*()\.\w]+))\s*$',
                             re.MULTILINE | re.IGNORECASE)
    temp = re.sub(set_command, process_set_command, temp)

    # Process "molecule name? { ... }"
    molecule = re.compile(r'^(\s*?)molecule[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(molecule, process_molecule_command, temp)

    # Process "external name? { ... }"
    external = re.compile(r'^(\s*?)external[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(external, process_external_command, temp)

    # Process "pcm name? { ... }"
    pcm = re.compile(r'^(\s*?)pcm[=\s]*(\w*?)\s*\{(.*?)^\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(pcm, process_pcm_command, temp)

    # Then remove repeated newlines
    multiplenewlines = re.compile(r'\n+')
    temp = re.sub(multiplenewlines, '\n', temp)

    # Process " extract"
    extract = re.compile(r'(\s*?)(\w+)\s*=\s*\w+\.extract_subsets.*',
                         re.IGNORECASE)
    temp = re.sub(extract, process_extract_command, temp)

    # Process "print" and transform it to "core.print_out()"
    #print_string = re.compile(r'(\s*?)print\s+(.*)', re.IGNORECASE)
    #temp = re.sub(print_string, process_print_command, temp)

    # Process "memory ... "
    memory_string = re.compile(r'(\s*?)memory\s+([+-]?\d*\.?\d+)\s+([KMG]i?B)',
                               re.IGNORECASE)
    temp = re.sub(memory_string, process_memory_command, temp)

    # Process "basis name? { ... }"
    basis_block = re.compile(r'^(\s*?)(basis|df_basis_scf|df_basis_mp2|df_basis_cc|df_basis_sapt)[=\s]*(\w*?)\s*\{(.*?)\}',
                             re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(basis_block, process_basis_block, temp)

    # Process literal blocks by substituting back in
    lit_block = re.compile(r'literals_psi4_yo-(\d*\d)')
    temp = re.sub(lit_block, process_literal_blocks, temp)

    future_imports = []
    def future_replace(m):
        future_imports.append(m.group(0))
        return ''

    future_string = re.compile('^from __future__ import .*$', flags=re.MULTILINE)
    temp = re.sub(future_string, future_replace, temp)

    # imports
    imports = '\n'.join(future_imports) + '\n'
    imports += 'import psi4\n'
    imports += 'from psi4 import *\n'
    imports += 'from psi4.core import *\n'
    imports += 'from psi4.driver.diatomic import anharmonicity\n'
    imports += 'from psi4.driver.gaussian_n import *\n'
    imports += 'from psi4.driver.aliases import *\n'
    imports += 'from psi4.driver.driver_cbs import xtpl_highest_1, scf_xtpl_helgaker_2, scf_xtpl_helgaker_3, corl_xtpl_helgaker_2\n'
    imports += 'from psi4.driver.wrapper_database import database, db, DB_RGT, DB_RXN\n'
    imports += 'from psi4.driver.wrapper_autofrag import auto_fragments\n'
    imports += 'from psi4.driver.p4const.physconst import *\n'
    imports += 'psi4_io = core.IOManager.shared_object()\n'

    # psirc (a baby PSIthon script that might live in ~/.psi4rc)
    psirc_file = os.path.expanduser('~') + os.path.sep + '.psi4rc'
    if os.path.isfile(psirc_file):
        fh = open(psirc_file)
        psirc = fh.read()
        fh.close()
        psirc = psirc.replace('psi4.IOManager', 'psi4.core.IOManager')
    else:
        psirc = ''

    # Override scratch directory if user specified via env_var
    scratch = ''
    scratch_env = core.get_environment('PSI_SCRATCH')
    if len(scratch_env):
        scratch += 'psi4_io.set_default_path("%s")\n' % (scratch_env)

    blank_mol = 'geometry("""\n'
    blank_mol += '0 1\nH\nH 1 0.74\n'
    blank_mol += '""","blank_molecule_psi4_yo")\n'

    temp = imports + psirc + scratch + blank_mol + temp

    # Move up the psi4.core namespace
    for func in dir(core):
        temp = temp.replace("psi4." + func, "psi4.core." + func)

    return temp