예제 #1
0
def convert(input_file, gen_dir, mech_dir, mcm_dir):
    """
    This is the main function of this file. It takes as input an MCM file, and from it generates
    5 files for use by AtChem2's Fortran code:

    - 'Generic Rate Coefficients' and 'Complex reactions' go to gen_dir/mechanism.f90 with little more than formatting
      changes - each line is replicated in full but with each named rate converted to an element in the vector q.
    - The rates defined in 'Reaction definitions' also go to gen_dir/mechanism.f90 as elements of the vector p.
    - The species involved as reactants (respectively products) in reactions in 'Reaction definitions' are split up into individual
      species, and their species and reactions numbers go to mechanism.reac (respectively mechanism.prod). Combining
      mechanism.reac, mechanism.prod and the last section of gen_dir/mechanism.f90 gives the original information
      contained in 'Reaction definitions' but in the format that AtChem2 can parse.
    - The numbers and names of all species encountered go to mechanism.species.
    - The numbers and names of all RO2 species in 'Peroxy radicals' end up in mechanism.ro2.

    :param input_file: string containing a relative or absolute reference to the mcm file to be processed.
    :param gen_dir: string containing a relative or absolute reference to the directory in which the function should
      place mechanism.f90. This is normally model/configuration.
    :param mech_dir: string containing a relative or absolute reference to the directory in which the function should
      place mechanism.{prod,reac,ro2,species}. This is normally model/configuration/ for the given model.
    :param mcm_dir: string containing a relative or absolute reference to the directory housing the reference file peroxy-radicals_v3.3.1.
      This is normally mcm/
    """
    # Work out the values of directory and filename of input_file, and check their existence.
    input_directory = os.path.dirname(os.path.abspath(input_file))
    input_filename = os.path.basename(input_file)
    assert os.path.isfile(os.path.join(input_directory, input_filename)), 'The input file ' + str(
        os.path.join(input_directory, input_filename)) + ' does not exist.'
    print input_directory

    # Fix the input contents of any errant newlines
    fix_mechanism_fac.fix_fac_full_file(os.path.join(input_directory, input_filename))

    # Read in the input file
    print 'Reading input file'
    with open(os.path.join(input_directory, input_filename), 'r') as input_file:
        s = input_file.readlines()

    # split the lines into the following sections:
    # - Ignore everything up to Generic Rate Coefficients
    # - Generic Rate Coefficients
    # - Complex reactions
    # - Peroxy radicals
    # - Reaction definitions
    section_headers_indices = [0, 1, 2, 3]
    section_headers = ['Generic Rate Coefficients', 'Complex reactions', 'Peroxy radicals', 'Reaction definitions']
    generic_rate_coefficients = []
    complex_reactions = []
    peroxy_radicals = []
    reaction_definitions = []

    section = 0
    for line in s:
        for header_index in section_headers_indices:
            if section_headers[header_index] in line:
                section += 1
        if section == 1:
            generic_rate_coefficients.append(line)
        elif section == 2:
            complex_reactions.append(line)
        elif section == 3:
            peroxy_radicals.append(line)
        elif section == 4:
            reaction_definitions.append(line)
        else:
            assert section == 0, "Error, section is not in [0,4]"


    # Convert peroxy_radicals to a list of strings, each of the RO2 species from 'Peroxy radicals'
    ro2List = []
    for item in peroxy_radicals:
        if not re.match('\*', item):
            # We have an equals sign on the first line. Handle this by splitting against =, then taking the last element of the
            # resulting list, which will either be the right-hand side of the first line, or the whole of any other line.
            # Similarly, the final line will end with a colon. Handle in a similar way.
            # Then split by +. Append each item to ro2_input: multiple appends use 'extend'
            ro2List.extend([elem.strip() for elem in item.split('=')[-1].split(';')[0].strip().split('+')])
    # Remove empty strings
    ro2List = filter(None, ro2List)


    # Read in the reference RO2 species from the peroxy-radicals_v3.3.1 file
    with open(os.path.join(mcm_dir, 'peroxy-radicals_v3.3.1'), 'r') as RO2List_file:
        RO2List_reference = [r.rstrip() for r in RO2List_file.readlines()]

    # Check each of the RO2s from 'Peroxy radicals' are in the reference RO2 list. If not print a warning at the top of
    # mechanism.f90 for each errant species.
    # TODO: This will break the exected format when mechanism.f90 is replaced by a parsable format.
    print 'looping over inputted ro2s'

    with open(os.path.join(gen_dir, 'mechanism.f90'), 'w') as mech_rates_file:
        mech_rates_file.write("""! Note that this file is generated by tools/mech_converter.py
! based upon the file tools/mcm_example.fac
! Any manual edits to this file will be overwritten when
! calling tools/mech_converter.py

""")
        for ro2_species in [element for  element in ro2List if element not in RO2List_reference]:
            print ' ****** Warning: ' + ro2_species + ' NOT found in RO2List ****** '
            mech_rates_file.write('! ' + ro2_species +
                                  ' is not in the MCM list of RO2 species. Should it be in the RO2 sum?\n')





    # Initialise list, dictionary and a counter.
    mechanism_rates_coeff_list = []
    variablesDict = dict()
    reactionNumber = 0
    # Process sections 1 and 2
    # - copy comment lines across
    # - other lines are reformatted to be Fortran syntax, then its contents edited to convert individual
    #   rate names to elements in a vector q.
    for line in generic_rate_coefficients + complex_reactions:
        # Check for comments (beginning with a !), or blank lines
        if (re.match('!', line) is not None) or (line.isspace()):
            mechanism_rates_coeff_list.append(line)
        # Check for lines starting with either ; or *, and write these as comments
        elif (re.match(';', line) is not None) or (re.match('[*]', line) is not None):
            mechanism_rates_coeff_list.append('!' + line)
        # Otherwise assume all remaining lines are in the correct format, and so process them
        else:
            # reactionNumber keeps track of the line we are processing
            reactionNumber += 1

            # This matches anything like @-dd.d and replaces with **(-dd.d). This uses (?<=@) as a lookbehind assertion,
            # then matches - and any combination of digits and decimal points. This replaces the negative number by its
            # bracketed version.
            # It also then converts all @ to ** etc.
            line2 = re.sub('(?<=@)-[0-9.]*',
                           '(\g<0>)',
                           line.replace(';', '').strip()
                           ).replace('@', '**')
            # Append _DP to the end of all digits that aren't followed by more digits or letters (targets a few too many)
            line2 = re.sub('[0-9]+(?![a-zA-Z0-9\.])',
                           '\g<0>_DP',
                           line2)
            # Undo the suffix _DP for any species names and for LOG10
            line2 = re.sub(r'\b(?P<speciesnames>[a-zA-Z][a-zA-Z0-9]*)_DP',
                           '\g<speciesnames>',
                           line2)
            # Undo the suffix _DP for any numbers like 1D7 or 2.3D-8
            line2 = re.sub(r'\b(?P<doubles1>[0-9][0-9\.]*)[dDeE](?P<doubles2>[+-]*[0-9]+)_DP',
                           '\g<doubles1>e\g<doubles2>_DP',
                           line2)
            # Add .0 to any literals that don't have a decimal place - this is necessary as it seems you can't use extended
            # precision on such a number - gfortran complains about an unknown integer kind, when it should really be a real kind
            line2 = re.sub(r'(?<![\.0-9+-dDeE])(?P<doubles>[0-9]+)_DP',
                           '\g<doubles>.0_DP',
                           line2)

            # strip whitespace, ; and %
            cleaned_line = line2.strip().strip('%;').strip()

            # Process the assignment: split by = into variable names and values
            [lhs, rhs] = re.split('=', cleaned_line)

            # Strip each.
            variable_name = lhs.strip()
            value = rhs.strip()

            # TODO: check for duplicates
            variablesDict[variable_name] = reactionNumber

            # Replace any variables declared here with references to q, with each new variable assigned
            # to a new element of q.
            new_rhs = tokenise_and_process(value, variablesDict)

            # Save the resulting string to mechanism_rates_coeff_list
            mechanism_rates_coeff_list.append('q('+str(variablesDict[variable_name]) + ') = ' + new_rhs + '  !' + cleaned_line + '\n')

    # Save the number of such equations to be output to mechanism.{prod,reac}
    numberOfGenericComplex = reactionNumber




    # Initialise a few variables
    speciesList = []
    rateConstants = []
    reactionNumber = 0
    # Process 'Reaction definitions'. We process this before 'Peroxy radicals' because that relies on our
    # SpeciesList generated here.
    # - copy comment lines across
    # - other lines are split into their consituent parts:
    #   - rateConstants are the reaction rates - these are processed via reformatting and tokenisation to use the vector q where needed.
    #   - the reactants and products of each species are collected up, numbered as necessary, and their placements output to mechanism.{prod,reac,species}
    mech_reac_list = []
    mech_prod_list = []
    # Loop over all lines in the reaction_definitions section of the input file
    for line in reaction_definitions:

        # Check for comments (beginning with a !), or blank lines
        if (re.match('!', line) is not None) or (line.isspace()):
            rateConstants.append(line)
        # Check for lines starting with either ; or *, and write these as comments
        elif (re.match(';', line) is not None) or (re.match('[*]', line) is not None):
            rateConstants.append('!' + line)
        # Otherwise assume all remaining lines are in the correct format, and so process them
        else:
            # reactionNumber keeps track of the line we are processing
            reactionNumber += 1

            # strip whitespace, ; and %
            line = line.strip().strip('%;').strip()

            # split by the semi-colon : lhs is reaction rate, rhs is reaction equation
            [lhs, rhs] = re.split(':', line)

            # Add reaction rate to rateConstants
            rateConstants.append(lhs)

            # Process the reaction: split by = into reactants and products
            [reactantsList, productsList] = re.split('=', rhs)

            # Process each of reactants and products by splitting by +. Strip each at this stage.
            reactants = [item.strip() for item in re.split('[+]', reactantsList)]
            products = [item.strip() for item in re.split('[+]', productsList)]

            # Ignore empty reactantsList
            if not reactantsList == '':
                # Compare each reactant against known species.
                reactantNums = []
                for x in reactants:
                    # If the reactant is a known species then add its number to reactantNums
                    if x in speciesList:
                        reactantNums.append(speciesList.index(x)+1)
                    else:
                        # Reactant x is not a known species.
                        # Add reactant to speciesList, and add this number to
                        # reactantNums to record this reaction.
                        speciesList.append(x)
                        reactantNums.append(len(speciesList))

                # Write the reactants to mech_reac_list
                mech_reac_list.extend([str(reactionNumber) + ' ' + str(z) + '\n' for z in reactantNums])

            if not productsList == '':
                # Compare each product against known species.
                productNums = []
                for x in products:
                    # If the reactant is a known species then add its number to reactantNums
                    if x in speciesList:
                        productNums.append(speciesList.index(x)+1)
                    else:
                        # Product x is not a known species.
                        # Add product to speciesList, add this number to
                        # productNums to record this reaction.
                        speciesList.append(x)
                        productNums.append(len(speciesList))

                # Write the products to mechanism.prod
                mech_prod_list.extend([str(reactionNumber) + ' ' + str(z) + '\n' for z in productNums])

    with open(os.path.join(mech_dir, 'mechanism.prod'), 'w') as prod_file:
        # Output number of species and number of reactions
        prod_file.write(str(len(speciesList)) + ' ' + str(reactionNumber) + ' ' + str(numberOfGenericComplex) + ' numberOfSpecies numberOfReactions numberOfGenericComplex\n')
        # Write all other lines
        for line in mech_prod_list:
            prod_file.write(line)

    with open(os.path.join(mech_dir, 'mechanism.reac'), 'w') as reac_file:
        # Output number of species and number of reactions
        reac_file.write(str(len(speciesList)) + ' ' + str(reactionNumber) + ' ' + str(numberOfGenericComplex) + ' numberOfSpecies numberOfReactions numberOfGenericComplex\n')
        # Write all other lines
        for line in mech_reac_list:
            reac_file.write(line)

    # Write speciesList to mechanism.species, indexed by (1 to len(speciesList))
    with open(os.path.join(mech_dir, 'mechanism.species'), 'w') as species_file:
        for i, x in zip(range(1, len(speciesList) + 1), speciesList):
            species_file.write(str(i) + ' ' + str(x) + '\n')


    # Write out rate coefficients
    i = 0
    mech_rates_list = []
    for rate_counter, x in zip(range(len(s)), rateConstants):
        if (re.match('!', x) is not None) | (x.isspace()):
            mech_rates_list.append(str(x))
        else:
            # This matches anything like @-dd.d and replaces with **(-dd.d). This uses (?<=@) as a lookbehind assertion,
            # then matches - and any combination of digits and decimal points. This replaces the negative number by its
            # bracketed version.
            i += 1
            string = re.sub('(?<=@)-[0-9.]*', '(\g<0>)', x)
            # Now convert all @ to ** etc.
            string = string.replace('@', '**')
            string = string.replace('<', '(')
            string = string.replace('>', ')')
            # Replace any float-type numbers (xxx.xxxE+xx) with double-type - (xxx.xxxD+xx)
            string = re.sub(r'(?P<single>[0-9]+\.[0-9]+)[eE]',
                           '\g<single>D',
                           string)
            mech_rates_list.append('p(' + str(i) + ') = ' + \
              tokenise_and_process(string, variablesDict) + '  !' + reaction_definitions[rate_counter])


    # # Combine mechanism rates and RO2 sum files
    with open(os.path.join(gen_dir, 'mechanism.f90'), 'a') as mech_rates_coeff_file:
        mech_rates_coeff_file.write("""
module mechanism_mod
    use, intrinsic :: iso_c_binding
contains

    subroutine update_p(p, q, TEMP, N2, O2, M, RH, H2O, DEC, BLHEIGHT, DILUTE, JFAC, ROOFOPEN, J, RO2) bind(c,name='update_p')
        implicit none

        integer, parameter :: DP = selected_real_kind( p = 15, r = 307 )
           real(c_double), intent(inout) :: p(:), q(:)
        real(c_double), intent(in) :: TEMP, N2, O2, M, RH, H2O, DEC, BLHEIGHT, DILUTE, JFAC, ROOFOPEN, J(:), RO2
        """)
# Write out Generic Rate Coefficients and Complex reactions
        for item in mechanism_rates_coeff_list:
            mech_rates_coeff_file.write(item)
        # Write out Reaction definitions
        for r in mech_rates_list:
            mech_rates_coeff_file.write(r)
        mech_rates_coeff_file.write("""
    end subroutine update_p
end module mechanism_mod
""")




    # Finally, now that we have the full species list, we can output the RO2s to mechanism.ro2
    # loop over RO2 and write the necessary line to mechanism.ro2, using the species number of the RO2
    print 'adding RO2 to model/configuration/mechanism.ro2'
    with open(os.path.join(mech_dir, 'mechanism.ro2'), 'w') as ro2_file:
        ro2_file.write("""! Note that this file is generated by tools/mech_converter.py based upon the file tools/mcm_example.fac. Any manual edits to this file will be overwritten when calling tools/mech_converter.py
""")

        for ro2List_i in ro2List:
            for speciesNumber, y in zip(range(1, len(speciesList) + 1), speciesList):
                if ro2List_i.strip() == y.strip():
                    ro2_file.write(str(speciesNumber) + ' !' + ro2List_i.strip() + '\n')
                    # Exit loop early if species found
                    break
            # This code only executes if the break is NOT called, i.e. if the loop runs to completion without the RO2 being
            # found in the species list
            else:
                ro2_file.write('0 ! error RO2 not in mechanism: ' + ro2List_i + '\n')
예제 #2
0
def convert(input_file):
    script_directory = os.path.dirname(os.path.abspath(__file__))
    input_directory = os.path.dirname(os.path.abspath(input_file))
    input_filename = os.path.basename(input_file)
    assert os.path.isfile(os.path.join(
        input_directory, input_filename)), 'The input file ' + str(
            os.path.join(input_directory, input_filename)) + ' does not exist.'
    print input_directory
    # Fix the input contents of any errant newlines
    fix_mechanism_fac.fix_fac_full_file(
        os.path.join(input_directory, input_filename))

    # Read in the input file
    print 'Reading input file'
    with open(os.path.join(input_directory, input_filename),
              'r') as input_file:
        s = input_file.readlines()
        # print s

    # split the lines into the following sections:
    # - Ignore everything up to Generic Rate Coefficients
    # - Generic Rate Coefficients
    # - Complex reactions
    # - Peroxy radicals
    # - Reaction definitions
    section_headers_indices = [0, 1, 2, 3]
    section_headers = [
        'Generic Rate Coefficients', 'Complex reactions', 'Peroxy radicals',
        'Reaction definitions'
    ]
    generic_rate_coefficients = []
    complex_reactions = []
    peroxy_radicals = []
    reaction_definitions = []

    section = 0
    for line in s:
        for header_index in section_headers_indices:
            if section_headers[header_index] in line:
                section += 1
        if section == 1:
            # print line
            generic_rate_coefficients.append(line)
        elif section == 2:
            # print line
            complex_reactions.append(line)
        elif section == 3:
            # print line
            peroxy_radicals.append(line)
        elif section == 4:
            # print line
            reaction_definitions.append(line)
        else:
            assert section == 0, "Error, section is not in [0,4]"
            # print line

    # print 'generic_rate_coefficients'
    # print generic_rate_coefficients
    # print 'complex_reactions'
    # print complex_reactions
    # print 'peroxy_radicals'
    # print peroxy_radicals
    # print 'reaction_definitions'
    # print reaction_definitions
    # Initialise a few variables
    speciesList = []
    rateConstants = []
    reactionNumber = 0

    with open(os.path.join(input_directory, 'mechanism.reactemp'),
              'w') as reac_temp_file, open(
                  os.path.join(input_directory, 'mechanism.prod'),
                  'w') as prod_file:
        # Loop over all lines in the reaction_definitions section of the input file
        for line in reaction_definitions:

            # Check for comments (beginning with a !), or blank lines
            if (re.match('!', line) is not None) | (line.isspace()):
                rateConstants.append(line)
            # Check for lines starting with either ; or *, and write these as comments
            elif (re.match(';', line) is not None) | (re.match('[*]', line)
                                                      is not None):
                rateConstants.append('!' + line)
            # Otherwise assume all remaining lines are in the correct format, and so process them
            else:
                # reactionNumber keeps track of the line we are processing
                reactionNumber += 1
                # print 'line =', line
                # strip whitespace, ; and %
                line = line.strip().strip('%').strip(';').strip()

                print ''
                print 'line =', line
                # split by the semi-colon : a[0] is reaction rate, a[1] is reaction equation
                a = re.split(':', line)
                # print 'a = ', a

                # Add reaction rate to rateConstants
                rateConstants.append(a[0])
                # print 'rate =', a[0]

                # Process the reaction: split by = into reactants and products
                # print 'reaction = ', a[1]

                reaction_parts = re.split('=', a[1])
                # print reaction_parts

                reactantsList = reaction_parts[0]
                productsList = reaction_parts[1]

                # print 'reactantlist = ', reactantsList
                # print 'productlist = ', productsList

                # Process each of reactants and products by splitting by +. Strip each at this stage.
                reactants = [
                    item.strip() for item in re.split('[+]', reactantsList)
                ]
                products = [
                    item.strip() for item in re.split('[+]', productsList)
                ]

                # print 'reactants =', reactants
                # print 'products =', products

                # Ignore empty reactantsList
                if not reactantsList == '':
                    # Compare each reactant against known species.
                    reactantNums = []
                    for x in reactants:
                        j = 0
                        for y in speciesList:
                            # Check for equality: if equality, then the reactant is a known species and its number should be
                            # added to the reactantNums variable
                            if x == y:
                                reactantNums.append(j + 1)
                                # print 'found:', y + ', j =', j
                                break
                            j += 1
                        # This code only executes if the break is NOT called, i.e. if the loop runs to completion without
                        # the reactant being found in the known species
                        else:
                            # Add reactant to speciesList, and add this number to
                            # reactantNums to record this reaction.
                            speciesList.append(x)
                            reactantNums.append(len(speciesList))
                            print 'adding', x, 'to speciesList'

                    # Write the reactants to mechanism.reactemp
                    for z in reactantNums:
                        reac_temp_file.write(
                            str(reactionNumber) + ' ' + str(z) + '\n')

                if not productsList == '':
                    # Compare each product against known species.
                    productNums = []
                    for x in products:
                        j = 0
                        for y in speciesList:
                            # Check for equality: if equality, then the product is a known species and its number should be
                            # added to the productNums variable
                            if x == y:
                                productNums.append(j + 1)
                                # print 'found:', y + ', j =', j
                                break
                            j += 1
                        # This code only executes if the break is NOT called, i.e. if the loop runs to completion without
                        # the product being found in the known species
                        else:
                            # Add product to speciesList, add this number to
                            # productNums to record this reaction.
                            speciesList.append(x)
                            productNums.append(len(speciesList))
                            print 'adding', x, 'to speciesList'

                    # Write the products to mechanism.prod
                    for z in productNums:
                        prod_file.write(
                            str(reactionNumber) + ' ' + str(z) + '\n')

        # Mark end of file with zeros
        reac_temp_file.write('0\t0\t0\t0 \n')
        prod_file.write('0\t0\t0\t0')
        # Output number of species and number of reactions
        reac_temp_file.write(
            str(len(speciesList)) + ' ' + str(reactionNumber) +
            ' numberOfSpecies numberOfReactions\n')

    # Copy mechanism.reactemp to mechanism.reac in a different order to make it readable by the model (move the last line to
    # the first line).
    with open(os.path.join(input_directory,
                           'mechanism.reactemp')) as reac_temp_file, open(
                               os.path.join(input_directory, 'mechanism.reac'),
                               'w') as reac_file:
        st = reac_temp_file.readlines()
        # Write last line
        reac_file.write(st[len(st) - 1])
        # Write all other lines
        for line in st[:-1]:
            reac_file.write(line)

    # Write speciesList to mechanism.species, indexed by (1 to len(speciesList))
    with open(os.path.join(input_directory, 'mechanism.species'),
              'w') as species_file:
        for i, x in zip(range(1, len(speciesList) + 1), speciesList):
            species_file.write(str(i) + ' ' + str(x) + '\n')

    # Write out rate coefficients
    i = 1
    with open(
            os.path.join(input_directory, 'mechanism-rate-coefficients.ftemp'),
            'w') as mech_rates_temp_file:
        for rate_counter, x in zip(range(len(s)), rateConstants):
            if (re.match('!', x) is not None) | (x.isspace()):
                mech_rates_temp_file.write(str(x))
            else:
                # This matches anything like @-dd.d and replaces with **(-dd.d). This uses (?<=@) as a lookbehind assertion,
                # then matches - and any combination of digits and decimal points. This replaces the negative number by its
                # bracketed version.
                string = re.sub('(?<=@)-[0-9.]*', '(\g<0>)', x)
                # Now convert all @ to ** etc.
                string = string.replace('@', '**')
                string = string.replace('<', '(')
                string = string.replace('>', ')')
                mech_rates_temp_file.write('  p(' + str(i) + ') = ' + string +
                                           '  !' +
                                           reaction_definitions[rate_counter])
                i += 1

    # Write RO2 data to file
    in_RO2_lines = False
    ro2_input = []
    for item in peroxy_radicals:
        if not in_RO2_lines:
            # Check to see whether we are entering the 'Reaction definitions' section
            if 'RO2 = ' in item:
                in_RO2_lines = True
        if in_RO2_lines:
            if not re.match('\*', item):
                ro2_input.append(item)
            else:
                in_RO2_lines = False

    ro2List = []
    for l in ro2_input:
        # We have an equals sign on the first line. Handle this by splitting against =, then taking the last element of the
        # resulting list, which will either be the right-hand side of the first line, or the whole of any other line.
        # Then split by +.
        strArray = l.split('=')[-1].split('+')

        # print strArray
        # For each element, remove any semi-colons, strip, and then append if non-empty.
        for x in strArray:
            x = x.replace(';', '').strip()
            if x == '':
                pass
                # print 'doing nothing'
            else:
                # print x
                ro2List.append(x)

    # check RO2s are in RO2 list
    with open(os.path.join(script_directory, 'RO2listv3.3.1')) as RO2List_file:
        RO2List_input = RO2List_file.readlines()

    for r in RO2List_input:
        r = r.strip()
        # print r

    # Check that each species is in the RO2 list. If so, just print to screen. Otherwise, print a warning at the top of
    # mechanism-rate-coefficients.f90 for each errant species.
    print 'looping over inputted ro2s'
    # print 'The RO2List is: ', ro2List

    with open(os.path.join(input_directory, 'mechanism-rate-coefficients.f90'),
              'w') as mech_rates_file:
        mech_rates_file.write(
            """! Note that this file is generated by tools/mech_converter.py,
! based upon the file tools/mcm_subset.fac. Any manual edits to this file will be overwritten
! when calling tools/mech_converter.py
""")
        for ro2List_i in ro2List:
            for ro2List_input_j in RO2List_input:
                if ro2List_i.strip() == ro2List_input_j.strip():
                    # print ro2List_i.strip() + ' found in RO2List'
                    break
            # This code only executes if the break is NOT called, i.e. if the loop runs to completion without the species
            # being found in the RO2 list
            else:
                print ' ****** Warning: ' + ro2List_i.strip(
                ) + ' NOT found in RO2List ****** '
                mech_rates_file.write(
                    '! ' + ro2List_i.strip() +
                    ' is not in the MCM list of RO2 species. Should it be in the RO2 sum?\n'
                )

        # loop over RO2 and write the necessary line to mechanism-rate-coefficients.f90, using the species number of the RO2
        mech_rates_file.write('  ro2 = 0.00e+00\n')
        print 'adding RO2 to mechanism-rate-coefficients.f90'
        for ro2List_i in ro2List:
            # print 'ro2List_i: ' + ro2List_i
            for speciesNumber, y in zip(range(1,
                                              len(speciesList) + 1),
                                        speciesList):
                if ro2List_i.strip() == y.strip():
                    mech_rates_file.write('  ro2 = ro2 + y(' +
                                          str(speciesNumber) + ')!' +
                                          ro2List_i.strip() + '\n')
                    # Exit loop early if species found
                    break
            # This code only executes if the break is NOT called, i.e. if the loop runs to completion without the RO2 being
            # found in the species list
            else:
                mech_rates_file.write('\t ! error RO2 not in mechanism: ' +
                                      ro2List_i + '\n')

        mech_rates_file.write('\n\n')
        # Read in NOY data, which is arrange as 'NOY = blah + blah + blah \n + blah + blah ;'
        # with open('./NOY.fac') as NOY_fac_file
        #   NOY_input = NOY_fac_file.readlines()
        #
        # NOYList = []
        # for n in NOY_input:
        # # We have an equals sign on the first line. Handle this by splitting against =, then taking the last element of the
        # # resulting list, which will either be the right-hand side of the first line, or the whole of any other line.
        # # Then split by +
        #   strArray = n.split('=')[-1].split('+')
        #
        #	print strArray
        # # For each element, remove any semi-colons, strip, and then append if non-empty.
        #	for x in strArray:
        #		x = x.replace(';', '').strip()
        #		if x == '':
        #			print 'doing nothing'
        #		else:
        #			print x
        #			NOYList.append(x)
        #
        # # loop over NOY and write the necessary line to mechanism-rate-coefficients.f90, using the species number of the NOY
        # mech_rates_file.write('\tNOY = 0.00e+00\n')
        # for NOYList_i in NOYList:
        #	print 'NOYList_i: ' + NOYList_i
        #	for speciesNumber, y in zip(range(1, len(speciesList)+1), speciesList):
        #		if NOYList_i.strip() == y.strip():
        #			mech_rates_file.write('  NOY = NOY + y(' + str(speciesNumber) + ')!' + NOYList_i.strip() + '\n')
        #			# Exit loop early if species found
        #		    break
        # # This code only executes if the break is NOT called, i.e. if the loop runs to completion without the NOY being
        # # found in the species list
        #   else:
        #		mech_rates_file.write('\t !error NOY not in mechanism: ' + NOYList_i + '\n')
        #
        # mech_rates_file.write('\n\n')
        # # Combine mechanism rates and RO2 / NOY sum files
        with open(
                os.path.join(input_directory,
                             'mechanism-rate-coefficients.ftemp')
        ) as mech_rates_temp_file:
            rs = mech_rates_temp_file.readlines()

        for r in rs:
            mech_rates_file.write(r)

    os.remove(os.path.join(input_directory, 'mechanism.reactemp'))
    os.remove(
        os.path.join(input_directory, 'mechanism-rate-coefficients.ftemp'))

    mechanism_rates_list = [
        """
! Note that this file is generated by tools/mech_converter.py,
! based upon the file tools/mcm_subset.fac. Any manual edits to this file will be overwritten
! when calling tools/mech_converter.py

SUBROUTINE mechanism_rates (p, t, y, mnsp)
  USE photolysisRates
  USE zenithData1
  USE constraints
  USE envVars, ONLY: ro2

  IMPLICIT NONE

  ! calculates rate constants from arrhenius information
  DOUBLE PRECISION, INTENT (out) :: p(*)
  DOUBLE PRECISION, INTENT (in) :: t
  INTEGER, INTENT (in) :: mnsp
  DOUBLE PRECISION, INTENT (in) :: y(mnsp)
  DOUBLE PRECISION :: temp, pressure, dummy
"""
    ]
    mechanism_rates_list.append("""
  ! declare variables missed in MCM definition
  INTEGER :: i
  DOUBLE PRECISION :: photoRateAtT

  INCLUDE 'modelConfiguration/mechanism-rate-declarations.f90'

  CALL ro2sum (ro2, y)
  dummy = y(1)

  dec = -1e16

  CALL getEnvVarsAtT (t, temp, rh, h2o, dec, pressure, m, blh, dilute, jfac, roofOpen)

  CALL atmosphere (o2, n2, m)

  !O2 = 0.2095*m
  !N2 = 0.7809*m

  ! * **** SIMPLE RATE COEFFICIENTS *****                     *""")
    coeffSpeciesList = [
        'N2', 'O2', 'M', 'RH', 'H2O', 'DEC', 'BLH', 'DILUTE', 'JFAC',
        'ROOFOPEN'
    ]
    reactionNumber = 0
    # P
    for line in generic_rate_coefficients + complex_reactions:
        # Check for comments (beginning with a !), or blank lines
        if (re.match('!', line) is not None) | (line.isspace()):
            mechanism_rates_list.append('  ' + line)
        # Check for lines starting with either ; or *, and write these as comments
        elif (re.match(';', line) is not None) | (re.match('[*]', line)
                                                  is not None):
            mechanism_rates_list.append('  !' + line)
        # Otherwise assume all remaining lines are in the correct format, and so process them
        else:
            # This matches anything like @-dd.d and replaces with **(-dd.d). This uses (?<=@) as a lookbehind assertion,
            # then matches - and any combination of digits and decimal points. This replaces the negative number by its
            # bracketed version.
            # It also then converts all @ to ** etc.
            # Save the resulting string to mechanism_rates_list
            mechanism_rates_list.append(
                '  ' +
                re.sub('(?<=@)-[0-9.]*', '(\g<0>)',
                       line.replace(';', '').strip()).replace('@', '**') +
                '\n')

            # Now we need to find the list of all species that are used in these equations, so we can declare them
            # at the top of the Fortran source file.

            # reactionNumber keeps track of the line we are processing
            reactionNumber += 1
            # print 'line =', line
            # strip whitespace, ; and %
            line = line.strip().strip('%').strip(';').strip()
            print 'line =', line
            # split by the semi-colon : a[0] is reaction rate, a[1] is reaction equation
            a = line
            # print 'a = ', a

            # Add reaction rate to rateConstants
            # rateConstant = a[0]
            # rateConstants.append(rateConstant)
            # print rateConstant

            # Process the reaction: split by = into reactants and products
            # print 'reaction =', a

            reaction_parts = re.split('=', a)
            # print reaction_parts

            LHSList = reaction_parts[0]
            RHSList = reaction_parts[1]

            # print 'reactantlist = ', LHSList
            # print 'productlist = ', RHSList

            # Process each of reactants and products by splitting by +. Strip each at this stage.
            reactant = LHSList.strip(
            )  # [item.strip() for item in re.split('[+]', LHSList)]
            products = RHSList.strip(
            )  # [item.strip() for item in re.split('[+]', RHSList)]

            # print 'reactants = ', reactants
            # print 'products = ', products

            # Compare reactant against known species.
            if reactant in coeffSpeciesList:
                pass
                # print 'found:', reactant
            else:
                # Add reactant to coeffSpeciesList, and add this number to
                # reactantNums to record this reaction.
                coeffSpeciesList.append(reactant)
                print 'adding', reactant, 'to coeffSpeciesList'

            if not RHSList.isspace():
                # Compare each product against known species.
                productNums = []
                # print RHSList
                # Replace all math characters and brackets with spaces, and split the remaining string by spaces.
                # Now, each string in the sublist will:
                # - start with a digit
                # - be a 'reserved word' i.e. LOG10, EXP, TEMP, PRESSURE
                # - otherwise, be a species
                RHSList_sub = re.sub('[()\-+*@/]', ' ', RHSList).split(' ')
                # print RHSList_sub
                RHSList_sub = [item.upper() for item in RHSList_sub]
                for x in RHSList_sub:
                    # Filter out spaces, numbers, and maths symbols
                    if (not re.match('[0-9]', x)) and (not x == ''):
                        # Filter out our 'reserved words'
                        if not any(x == reserved for reserved in
                                   ['EXP', 'TEMP', 'PRESSURE', 'LOG10', 'T']):
                            # print x
                            if x in coeffSpeciesList:
                                pass
                                # print 'found: ', x
                            else:
                                coeffSpeciesList.append(x)
                                print 'adding', x, 'to coeffSpeciesList'

    # Recombine the species found into lines of 10 in the right format to declare them as Fortran variables.
    # Begin wthe first line as necessary
    newline = '  DOUBLE PRECISION ::'
    mechanism_rates_decl = []
    # Loop over all species
    for i, item in zip(range(1, len(coeffSpeciesList) + 1), coeffSpeciesList):
        # Add the next species
        newline += ' ' + item.strip()
        # If it's the last species, then exit the loop with some extra newlines
        if i == len(coeffSpeciesList):
            mechanism_rates_decl.append(newline + '\n\n\n')
            continue
        # Otherwise, every tenth species gets rounded off with a newline and a prefix to the next line
        if i % 10 == 0:
            mechanism_rates_decl.append(newline + '\n')
            newline = '  DOUBLE PRECISION ::'
        else:
            # If not, add a spacer
            newline += ','

    # Insert the list generated above into the right place in the master list
    mechanism_rates_list = list(
        mechanism_rates_list[0]) + mechanism_rates_decl + list(
            mechanism_rates_list[1:])

    mechanism_rates_list.append("""  DO i = 1, nrOfPhotoRates
     IF (useConstantValues==0) THEN
        IF (cosx<1.00d-10) THEN
           j(ck(i)) = 1.0d-30
        ELSE
           j(ck(i)) = cl(i)*cosx**(cmm(i))*EXP(-cnn(i)*secx)*transmissionFactor(i)*roofOpen*jfac
        ENDIF
     ELSE
        j(ck(i)) = cl(i)
     ENDIF
  ENDDO

  DO i = 1, numConPhotoRates
     CALL getConstrainedQuantAtT2D (t, photoX, photoY, photoY2, photoNumberOfPoints(i), photoRateAtT, 2, i, &
          maxNumberOfDataPoints, numConPhotoRates)
     j(constrainedPhotoRatesNumbers(i)) = photoRateAtT
  ENDDO

  INCLUDE 'modelConfiguration/mechanism-rate-coefficients.f90'
  RETURN
END SUBROUTINE mechanism_rates

INCLUDE 'modelConfiguration/extraOutputSubroutines.f90'
""")
    # print mechanism_rates_list
    with open(os.path.join(script_directory, '../mechanism-rates.f90'),
              'w+') as mr2_file:
        for item in mechanism_rates_list:
            mr2_file.write(item)