Example #1
0
def eqn_interr(num_eqn, naked_list_eqn, rindx, rstoi, pindx, pstoi,
               chem_scheme_markers, reac_coef, spec_namelist, spec_name,
               spec_smil, spec_list, Pybel_objects, nreac, nprod, comp_num,
               phase):

    # inputs: ----------------------------------------------------------------------------
    # num_eqn - number of equations (scalar)
    # naked_list_eqn - equations in strings
    # rindx - to hold indices of reactants
    # rstoi - to hold stoichiometries of reactants
    # pindx - to hold indices of products
    # pstoi - to hold stoichiometries of products
    # chem_scheme_markers - markers for separating sections of the chemical scheme
    # reac_coef - to hold reaction rate coefficients
    # spec_namelist - name strings of components present in the scheme (not SMILES)
    # spec_name - name string of components in xml file (not SMILES)
    # spec_smil - SMILES from xml file
    # spec_list - SMILES of components present in scheme
    # Pybel_objects - list containing pybel objects
    # nreac - to hold number of reactions per equation
    # nprod - number of products per equation
    # comp_num - number of unique components in reactions across all phases
    # phase - marker for the phase being considered: 0 for gas, 1 for particulates
    # ------------------------------------------------------------------------------------

    max_no_reac = 0.0  # log maximum number of reactants in a reaction
    max_no_prod = 0.0  # log maximum number of products in a reaction

    # Loop through equations line by line and extract the required information
    for eqn_step in range(num_eqn):

        line = naked_list_eqn[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if eqn_start_indx > rrc_start_indx:
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # with stoich number; rule out the photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        products = [t for t in eqn_split[eqmark_pos + 1:]
                    if t != '+']  # with stoich number

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed
        while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]):
            rindx = np.append(rindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn, 1))), axis=1)
        while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]):
            pindx = np.append(pindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn, 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print(rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the specii in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.0
                name_only = reactant

            # store stoichometry
            rstoi[eqn_step, reactant_step] = stoich_num

            if name_only not in spec_namelist:  # if new component encountered
                spec_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                if name_only in spec_name:
                    # index where xml file name matches reaction component name
                    name_indx = spec_name.index(name_only)
                    name_SMILE = spec_smil[name_indx]  # SMILES of component
                else:
                    sys.exit(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))

                spec_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # Generate pybel
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered it will be in spec_list
                # existing index
                name_indx = spec_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get pre-existing index of this component
                exist_indx = np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx)))
                # add to pre-existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                rstoi[eqn_step,
                      reactant_step] = 0  # remove stoichiometry added above
                reactant_step -= 1  # ignore this duplicate product
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.0
                name_only = product

            # store stoichometry
            pstoi[eqn_step, product_step] = stoich_num

            if name_only not in spec_namelist:  # if new component encountered
                spec_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                if name_only in spec_name:
                    name_indx = spec_name.index(name_only)
                    name_SMILE = spec_smil[name_indx]
                else:
                    sys.exit(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))

                spec_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species
                # Generate pybel

                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = spec_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                exist_indx = np.where(pindx[eqn_step, 0:product_step] == (int(
                    name_indx)))  # get pre-existing index of this component
                # add to pre-existing stoichometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                pstoi[eqn_step,
                      product_step] = 0  # remove stoichometry added above
                product_step -= 1  # ignore this duplicate product
            else:
                pindx[eqn_step, product_step] = int(name_indx)
            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)

    return (rindx, rstoi, pindx, pstoi, reac_coef, spec_namelist, spec_list,
            Pybel_objects, nreac, nprod, comp_num)
Example #2
0
def eqn_interr(num_eqn, eqn_list, aqeqn_list, chem_scheme_markers, comp_name,
               comp_smil, num_sb, wall_on):

    # inputs: ----------------------------------------------------------------------------
    # num_eqn - number of equations
    # eqn_list - gas-phase equations in list of strings
    # aqeqn_list - aqueous-phase equations in list of strings
    # chem_scheme_markers - markers for separating sections of the chemical scheme
    # comp_name - name string of components in xml file (not SMILES)
    # comp_smil - SMILES from xml file
    # num_sb - number of size bins
    # wall_on - marker for whether to include wall partitioning
    # ------------------------------------------------------------------------------------

    # preparatory part ----------------------------------------------------
    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = np.zeros((num_eqn[0], 1)).astype(int)
    # matrix of indices to arrange reactant concentrations when
    # reaction rate coefficient calculated
    y_arr = (np.ones((num_eqn[0], 1)).astype(int)) * -9999
    # array to arrange reaction rates so they align with reactant stoichiometries
    rr_arr = np.empty((0))
    # same but for products
    rr_arr_p = np.empty((0))
    # index array for extracting required reactant concentrations for the
    # reaction rate coefficient calculation
    y_rind = np.empty((0))
    # index array for identifying products when assigning gains from reactions
    y_pind = np.empty((0))
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn[0], 1)).astype(int)
    # matrix to record stoichiometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn[0], 1))
    jac_stoi = np.zeros((num_eqn[0], 1))
    # 1D array to record stoichiometries of reactants per equarion
    rstoi_flat = np.empty((0))
    # 1D array to record stoichiometries of products per equarion
    pstoi_flat = np.empty((0))
    # matrix to record stoichiometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn[0], 1))
    # arrays to store number of reactants and products in gas-phase equations
    nreac = np.empty(num_eqn[0], dtype=np.int8)
    nprod = np.empty(num_eqn[0], dtype=np.int8)
    # colptrs for sparse matrix
    reac_col = np.empty(num_eqn[0], dtype=np.int8)
    prod_col = np.empty(num_eqn[0], dtype=np.int8)
    # list for equation reaction rate coefficients
    reac_coef = []
    # matrix containing index of components who are denominators in the
    # calculation of equation derivatives in the Jacobian
    jac_den_indx = np.zeros((num_eqn[0], 1))
    # total number of Jacobian elements per equation
    njac = np.zeros((num_eqn[0], 1))
    # indices of Jacobian to affect per equation (rows)
    jac_indx = np.zeros((num_eqn[0], 1))
    # a new list for the name strings of components presented in the scheme (not SMILES)
    comp_namelist = []
    comp_list = [
    ]  # list for the SMILE strings of components present in the chemical scheme
    # list of Pybel objects of components in chemical scheme
    Pybel_objects = []
    comp_num = 0  # count the number of unique components in the chemical scheme
    RO_indx = []  # empty list for holding indices of alkoxy components
    # ---------------------------------------------------------------------

    max_no_reac = 0.  # log maximum number of reactants in a reaction
    max_no_prod = 0.  # log maximum number of products in a reaction

    # loop through gas-phase equations line by line and extract the required information
    for eqn_step in range(num_eqn[0]):

        line = eqn_list[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if (eqn_start_indx > rrc_start_indx):
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # reactants with stoichiometry number and omit any photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        # products with stoichiometry number
        products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+']

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed because maximum number of reactants increases
        while (max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1])):
            rindx = np.append(rindx, (np.zeros((num_eqn[0], 1))).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn[0], 1))), axis=1)
            y_arr = np.append(y_arr, (np.ones(
                (num_eqn[0], 1)) * -9999).astype(int),
                              axis=1)
            y_arr_fixer = ((np.arange(0, num_eqn[0],
                                      dtype='int')).reshape(-1, 1))
            y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac)))
            y_arr[y_arr !=
                  -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999]

        while (max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1])):
            pindx = np.append(pindx, (np.zeros((num_eqn[0], 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn[0], 1))), axis=1)
        while ((len(reactants)**2.0 + len(reactants) * len(products)) >
               jac_indx.shape[1]):
            jac_indx = np.append(jac_indx, (np.zeros((num_eqn[0], 1))), axis=1)
            jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[0], 1))),
                                     axis=1)
            jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[0], 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print('Error in reaction rate coefficient expression: ', rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the component in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.
                name_only = reactant

            # store stoichiometry
            rstoi[eqn_step, reactant_step] = stoich_num
            jac_stoi[eqn_step, reactant_step] = -1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                name_indx = comp_name.index(name_only)
                name_SMILE = comp_smil[name_indx]  # SMILES of component

                comp_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # generate pybel object
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it is a component already encountered it will be in comp_list
                # existing index
                name_indx = comp_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                jac_stoi[eqn_step,
                         exist_indx] += -1 * rstoi[eqn_step, reactant_step]
                # remove stoichiometry added above
                rstoi[eqn_step, reactant_step] = 0
                jac_stoi[eqn_step, reactant_step] = 0
                reactant_step -= 1  # ignore this duplicate
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)
                y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) +
                                                     reactant_step)
                y_rind = np.append(y_rind, int(name_indx))
                rr_arr = np.append(rr_arr, int(eqn_step))

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # record 1D array of stoichiometries per equation
        rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step,
                                                 0:int(reactant_step)])

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.
                name_only = product

            # store stoichiometry
            pstoi[eqn_step, product_step] = stoich_num
            jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                name_indx = comp_name.index(name_only)
                name_SMILE = comp_smil[name_indx]

                comp_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species
                # Generate pybel

                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = comp_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    pindx[eqn_step, 0:product_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                jac_stoi[eqn_step, reactant_step +
                         exist_indx] += 1 * pstoi[eqn_step, product_step]
                # remove stoichiometry added above
                pstoi[eqn_step, product_step] = 0
                jac_stoi[eqn_step, reactant_step + product_step] = 0
                product_step -= 1  # ignore this duplicate
            else:
                pindx[eqn_step, product_step] = int(name_indx)
                rr_arr_p = np.append(rr_arr_p, int(eqn_step))
                y_pind = np.append(y_pind, int(name_indx))

            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)
        # record 1D array of stoichiometries per equation
        pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step,
                                                 0:int(product_step)])

        # now that total number of components (reactants and products)
        # in an equation is known, replicate the reactant indices over all
        # components
        tot_comp = nreac[eqn_step] + nprod[eqn_step]
        for i in range(nreac[eqn_step]):
            jac_den_indx[eqn_step,
                         i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i]
            # also replicate the stoichiometries for every reactant
            if (i > 0):
                jac_stoi[eqn_step, i * tot_comp:(i + 1) *
                         tot_comp] = jac_stoi[eqn_step, 0:tot_comp]
# number of Jacobian elements affected by this equation
        njac[eqn_step, 0] = tot_comp * nreac[eqn_step]

    # remove fillers and flatten index for arranging concentrations
    # ready for reaction rate coefficient calculation
    y_arr_g = y_arr[y_arr != -9999]
    y_rind_g = y_rind.astype(int)  # ensure integer type
    uni_y_rind_g = (np.unique(y_rind)).astype(int)  # unique index of reactants
    y_pind_g = y_pind.astype(int)  # ensure integer type
    uni_y_pind_g = (np.unique(y_pind)).astype(int)  # unique index of products
    rr_arr_g = rr_arr.astype(int)  # ensure integer type
    rr_arr_p_g = rr_arr_p.astype(int)  # ensure integer type
    # colptrs for sparse matrix of the change to reactants per equation
    reac_col_g = np.cumsum(nreac) - nreac
    # colptrs for sparse matrix of the change to products per equation
    prod_col_g = np.cumsum(nprod) - nprod
    if (len(reac_col_g) > 0):  # if gas-phase reaction present
        # include final columns
        reac_col_g = np.append(reac_col_g, reac_col_g[-1] + nreac[-1])
        prod_col_g = np.append(prod_col_g, prod_col_g[-1] + nprod[-1])

    # tag other gas-phase arrays
    rindx_g = rindx
    pindx_g = pindx
    rstoi_g = rstoi
    pstoi_g = pstoi
    jac_stoi_g = jac_stoi
    rstoi_flat_g = rstoi_flat
    pstoi_flat_g = pstoi_flat
    nreac_g = nreac
    nprod_g = nprod
    reac_coef_g = reac_coef
    jac_den_indx_g = jac_den_indx.astype(int)
    njac_g = njac.astype(int)
    jac_indx_g = jac_indx
    jac_indx_g = jac_indx_g.astype(int)

    # same for aqueous-phase reactions ----------------------------------
    # preparatory part ----------------------------------------------------
    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = (np.ones((num_eqn[1], 1)) * -2).astype(int)
    # matrix of indices to arrange reactant concentrations when
    # reaction rate coefficient calculated
    y_arr = (np.ones((num_eqn[1], 1)).astype(int)) * -9999
    # array to arrange reaction rates so they align with reactant stoichiometries
    rr_arr = np.empty((0))
    # same but for products
    rr_arr_p = np.empty((0))
    # index array for extracting required reactant concentrations for the
    # reaction rate coefficient calculation
    y_rind = np.empty((0))
    # index array for identifying products when assigning gains from reactions
    y_pind = np.empty((0))
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn[1], 1)).astype(int)
    # matrix to record stoichiometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn[1], 1))
    jac_stoi = np.zeros((num_eqn[1], 1))
    # 1D array to record stoichiometries of reactants per equation
    rstoi_flat = np.empty((0))
    # 1D array to record stoichiometries of products per equation
    pstoi_flat = np.empty((0))
    # matrix to record stoichiometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn[1], 1))
    # arrays to store number of reactants and products of equations
    nreac = np.empty(num_eqn[1], dtype=np.int8)
    nprod = np.empty(num_eqn[1], dtype=np.int8)
    # list for equation reaction rate coefficients
    reac_coef = []
    # matrix containing index of components who are denominators in the
    # calculation of equation derivatives in the Jacobian
    jac_den_indx = np.zeros((num_eqn[1], 1))
    # total number of Jacobian elements per equation
    njac = np.zeros((num_eqn[1], 1))
    # indices of Jacobian to affect per equation (rows)
    jac_indx = np.zeros((num_eqn[1], 1))
    # ---------------------------------------------------------------------

    max_no_reac = 0.  # log maximum number of reactants in a reaction
    max_no_prod = 0.  # log maximum number of products in a reaction

    # loop through aqueous-phase equations line by line and extract the required information
    for eqn_step in range(num_eqn[1]):

        line = aqeqn_list[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if eqn_start_indx > rrc_start_indx:
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # with stoich number; rule out the photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        products = [t for t in eqn_split[eqmark_pos + 1:]
                    if t != '+']  # with stoich number

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed
        while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]):
            rindx = np.append(rindx, (np.ones(
                (num_eqn[1], 1)) * -2).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn[1], 1))), axis=1)
            y_arr = np.append(y_arr, (np.ones(
                (num_eqn[1], 1)) * -9999).astype(int),
                              axis=1)
            y_arr_fixer = ((np.arange(0, num_eqn[1],
                                      dtype='int')).reshape(-1, 1))
            y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac)))
            y_arr[y_arr !=
                  -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999]
        while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]):
            pindx = np.append(pindx, (np.zeros((num_eqn[1], 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn[1], 1))), axis=1)
        while ((len(reactants)**2.0 + len(reactants) * len(products)) >
               jac_indx.shape[1]):
            jac_indx = np.append(jac_indx, (np.zeros((num_eqn[1], 1))), axis=1)
            jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[1], 1))),
                                     axis=1)
            jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[1], 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print('Error in reaction rate coefficient expression: ', rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the component in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.0
                name_only = reactant

            # store stoichiometry
            rstoi[eqn_step, reactant_step] = stoich_num
            jac_stoi[eqn_step, reactant_step] = -1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                if name_only in comp_name:
                    # index where xml file name matches reaction component name
                    name_indx = comp_name.index(name_only)
                    name_SMILE = comp_smil[name_indx]  # SMILES of component
                else:
                    print(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))
                    sys.exit()

                comp_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # Generate pybel
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered it will be in comp_list
                # existing index
                name_indx = comp_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            # as a reactant in this reaction
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                jac_stoi[eqn_step,
                         exist_indx] += -1 * rstoi[eqn_step, reactant_step]
                # remove stoichiometry added above
                rstoi[eqn_step, reactant_step] = 0
                jac_stoi[eqn_step, reactant_step] = 0
                reactant_step -= 1  # ignore this duplicate
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)
                y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) +
                                                     reactant_step)
                y_rind = np.append(y_rind, int(name_indx))
                rr_arr = np.append(rr_arr, int(eqn_step))

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # record 1D array of stoichiometries per equation
        rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step,
                                                 0:int(reactant_step)])

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.0
                name_only = product

            # store stoichiometry
            pstoi[eqn_step, product_step] = stoich_num
            jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num
            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                if name_only in comp_name:
                    name_indx = comp_name.index(name_only)
                    name_SMILE = comp_smil[name_indx]
                else:
                    print('Error: inside eqn_interr, chemical scheme name ' +
                          str(name_only) + ' not found in xml file')
                    sys.exit()

                comp_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species

                # generate pybel object
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = comp_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    pindx[eqn_step, 0:product_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                jac_stoi[eqn_step, reactant_step +
                         exist_indx] += 1 * pstoi[eqn_step, product_step]
                # remove stoichiometry added above
                pstoi[eqn_step, product_step] = 0
                jac_stoi[eqn_step, reactant_step + product_step] = 0
                product_step -= 1  # ignore this duplicate
            else:
                pindx[eqn_step, product_step] = int(name_indx)
                rr_arr_p = np.append(rr_arr_p, int(eqn_step))
                y_pind = np.append(y_pind, int(name_indx))

            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)
        # record 1D array of stoichiometries per equation
        pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step,
                                                 0:int(product_step)])

        # now that total number of components (reactants and products)
        # in an equation is known, replicate the reactant indices over all
        # components
        tot_comp = nreac[eqn_step] + nprod[eqn_step]
        for i in range(nreac[eqn_step]):
            jac_den_indx[eqn_step,
                         i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i]
            # also replicate the stoichiometries for every reactant
            if (i > 0):
                jac_stoi[eqn_step, i * tot_comp:(i + 1) *
                         tot_comp] = jac_stoi[eqn_step, 0:tot_comp]
# number of Jacobian elements affected by this equation
        njac[eqn_step, 0] = tot_comp * nreac[eqn_step]

    # account for gas-phase in Jacobian denominator index
    jac_den_indx += (comp_num + 2)

    # remove fillers and flatten index for arranging concentrations ready for reaction rate coefficient calculation
    y_arr_aq = y_arr[y_arr != -9999]  # remove fillers
    y_rind_aq = y_rind.astype(int)  # ensure integer type
    uni_y_rind_aq = (np.unique(y_rind)).astype(
        int)  # unique index of reactants
    y_pind_aq = y_pind.astype(int)  # ensure integer type
    uni_y_pind_aq = (np.unique(y_pind)).astype(int)  # unique index of products
    rr_arr_aq = rr_arr.astype(int)  # ensure integer type
    rr_arr_p_aq = rr_arr_p.astype(int)  # ensure integer type
    # colptrs for sparse matrix of the change to reactants per equation
    reac_col_aq = np.cumsum(nreac) - nreac
    # colptrs for sparse matrix of the change to products per equation
    prod_col_aq = np.cumsum(nprod) - nprod
    if (len(reac_col_aq) > 0):  # if aqueous-phase reaction present
        # include final columns
        reac_col_aq = np.append(reac_col_aq, reac_col_aq[-1] + nreac[-1])
        prod_col_aq = np.append(prod_col_aq, prod_col_aq[-1] + nprod[-1])

    # tag other aqueous-phase arrays
    rindx_aq = rindx
    pindx_aq = pindx
    rstoi_aq = rstoi
    pstoi_aq = pstoi
    jac_stoi_aq = jac_stoi
    rstoi_flat_aq = rstoi_flat
    pstoi_flat_aq = pstoi_flat
    nreac_aq = nreac
    nprod_aq = nprod
    reac_coef_aq = reac_coef
    jac_den_indx_aq = jac_den_indx.astype(int)
    njac_aq = njac.astype(int)
    jac_indx_aq = jac_indx
    jac_indx_aq = jac_indx_aq.astype(int)

    return (rindx_g, rstoi_g, pindx_g, pstoi_g, reac_coef_g, nreac_g, nprod_g,
            jac_stoi_g, jac_den_indx_g, njac_g, jac_indx_g, y_arr_g, y_rind_g,
            uni_y_rind_g, y_pind_g, uni_y_pind_g, reac_col_g, prod_col_g,
            rstoi_flat_g, pstoi_flat_g, rr_arr_g, rr_arr_p_g, rindx_aq,
            rstoi_aq, pindx_aq, pstoi_aq, reac_coef_aq, nreac_aq, nprod_aq,
            jac_stoi_aq, jac_den_indx_aq, njac_aq, jac_indx_aq, y_arr_aq,
            y_rind_aq, uni_y_rind_aq, y_pind_aq, uni_y_pind_aq, reac_col_aq,
            prod_col_aq, rstoi_flat_aq, pstoi_flat_aq, rr_arr_aq, rr_arr_p_aq,
            comp_namelist, comp_list, Pybel_objects, comp_num, RO_indx)
Example #3
0
def extract_mechanism(filename, xmlname, TEMP, PInit, Comp0, testf):

    # inputs:
    # testf - flag for operating in normal mode (0) or testing mode (1)

    if testf == 1:
        return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

    # Open the file
    f_open_eqn = open(filename, mode='r')

    # read the file and store everything into a list
    # reaction rates have units /s
    total_list_eqn = f_open_eqn.readlines()
    f_open_eqn.close()
    if (f_open_eqn.closed == False):
        print('IOError')
        print('Eqn file not closed')
        sys.exit()

    # equation list without comments
    naked_list_eqn = formatting.remove_comments(total_list_eqn)

    # calculate gas-phase concentrations of M, N2 and O2 (molecules/cc (air))
    # 1.0e-6 converts from molecules/m3 to molecules/cc
    M_val = (PInit / (8.3144598 * TEMP) * 6.0221409e+23) * 1.0e-6
    N2_val = M_val * 0.79
    O2_val = M_val * 0.2096

    # format the equation list

    # naked_list_eqn contains everything except the comments starting with //
    print('Now parsing the eqn info...\n')
    num_eqn = len(naked_list_eqn)

    # --open and initialise the xml file for converting chemical names to SMILES-----
    with open(xmlname) as fd:
        doc = xmltodict.parse(fd.read())

    a = doc['mechanism']['species_defs']['species']
    spec_numb = list(('0', ) * len(a))
    spec_name = list(('0', ) * len(a))
    spec_smil = list(('0', ) * len(a))

    for i in range(len(a)):
        spec_numb[i] = a[i]['@species_number']
        spec_name[i] = a[i]['@species_name']
        if "smiles" in a[i]:
            spec_smil[i] = a[i]['smiles']
        elif spec_name[i][0] == 'O' or spec_name[i][0] == 'H':
            spec_smil[i] = '[' + spec_name[i] + ']'
        else:
            spec_smil[i] = spec_name[i]

    species_step = 0  # log the number of unique species
    max_no_reac = 0.0  # log maximum number of reactants in a reaction
    max_no_prod = 0.0  # log maximum number of products in a reaction

    # convert input component names for components present in gas phase at experiment
    # start from chemical scheme names to SMILES
    init_SMIL = []

    for species_step in range(len(Comp0)):
        name_indx = spec_name.index(Comp0[species_step])
        init_SMIL.append(spec_smil[name_indx])
    species_step = 0  # ready for equation loop

    # initialising lists

    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = np.zeros((num_eqn, 1)).astype(int)
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn, 1)).astype(int)
    # matrix to record stoichometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn, 1))
    # matrix to record stoichometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn, 1))
    # array to store number of reactants and products in an equation
    nreac = np.zeros((num_eqn))
    nprod = np.zeros((num_eqn))
    # list for equation reaction rate coefficients
    reac_coef = []
    # list for species
    spec_list = []
    # list of Pybel objects
    Pybel_objects = []
    # a new list for the name strings of species presenting in the scheme (not SMILES)
    spec_namelist = []

    # Loop through the equations line by line and extract the information
    for eqn_step in range(num_eqn):

        line = naked_list_eqn[eqn_step]

        # split the line into 2 parts: equation; rate coef
        # (fac format doesnt have id for each equation)
        # extract the equation (in a string)
        eqn_regex = r"\:.*\;"  # eqn starts with a : and end with a ;
        eqn = re.findall(eqn_regex, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # with stoich number; rule out the photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        products = [t for t in eqn_split[eqmark_pos + 1:]
                    if t != '+']  # with stoich number

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed
        while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]):
            rindx = np.append(rindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn, 1))), axis=1)
        while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]):
            pindx = np.append(pindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn, 1))), axis=1)

        # extract the rate constant (in a string)
        rate_regex = r"\%.*\:"  # rate coef starts with a : and end with a ;
        # rate_ex: rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()
        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)

        # store the reaction rate for this equation (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the specii in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if reactant not in spec_namelist:
                spec_namelist.append(reactant)
            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                name_only = re.sub(stoich_regex, '',
                                   reactant)  # name with no stoich number
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.0
                name_only = reactant

            # store stoichometry
            rstoi[eqn_step, reactant_step] = stoich_num

            # convert MCM chemical names to SMILES
            # index where xml file MCM name matches MCM name
            if name_only in spec_name:

                name_indx = spec_name.index(name_only)
                name_only = spec_smil[name_indx]

            if (name_only not in spec_list):
                spec_list.append(name_only)  # log parsed species
                name_indx = species_step  # allocate index to this species
                # Generate pybel
                Pybel_object = pybel.readstring('smi', name_only)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                species_step += 1  # number of unique species

            else:  # if it's a species already encountered
                # pre-defined number of species
                name_indx = spec_list.index(name_only)

            # store reactant index
            rindx[eqn_step, reactant_step] = int(name_indx)

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = reactant_step

        # right hand side of equations (gains)
        for product in products:
            if product not in spec_namelist:
                spec_namelist.append(product)
            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number
            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.0
                name_only = product

            # store stoichometry
            pstoi[eqn_step, product_step] = stoich_num

            # convert MCM chemical names to SMILES
            # index where xml file MCM name matches MCM name
            if name_only in spec_name:

                name_indx = spec_name.index(name_only)
                name_only = spec_smil[name_indx]

            if (name_only not in spec_list):
                spec_list.append(name_only)  # log parsed species
                name_indx = species_step  # allocate index to this species
                # Generate pybel

                Pybel_object = pybel.readstring('smi', name_only)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                species_step += 1  # number of unique species

            else:  # if it's a species already encountered
                # pre-defined number of species
                name_indx = spec_list.index(name_only)

            # store product index
            pindx[eqn_step, product_step] = int(name_indx)

            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = product_step

    # number of columns in rindx and pindx
    reacn = rindx.shape[1]
    prodn = pindx.shape[1]

    # create a 2 column array, the first column with the RO2 list index of any RO2 species
    # that appear in the species list, the second column for its index in the species list
    RO2_indices = write_RO2_indices(spec_namelist)

    # print the brief info for the simulation to the screen
    print('Briefing:')
    print('Total number of equations: %i' % (num_eqn))
    print('Total number of species: %i\n' % (species_step))

    # outputs:

    # rindx  - matrix to record indices of reactants (cols) in each equation (rows)
    # pindx - indices of equation products (cols) in each equation (rows)
    # rstoi - matrix to record stoichometries of reactants (cols) in each equation (rows)
    # pstoi - matrix to record stoichometries of products (cols) in each equation (rows)
    # reac_coef - list for equation reaction rate coefficients
    # spec_list - list for species
    # Pybel_objects - list of Pybel objects
    # species_step - number of species
    # num_eqn - number of equations
    # nreac - number of reactants in each equation
    # max_no_jaci - number of columns for Jacobian index matrix
    # nprod - number of products per equation
    # prodn - number of columns in pindx
    # reacn - rindx number of columns
    # M_val - gas-phase concentration of M (molecules/cc (air))
    # N2_val - gas-phase concentration of nitrogen (molecules/cc (air))
    # O2_val - gas-phase concentration of oxygen (molecules/cc (air))
    # init_SMIL - SMILE string for each component
    # spec_namelist - names of components as given in the equation file

    return (rindx, pindx, rstoi, pstoi, reac_coef, spec_list, Pybel_objects,
            num_eqn, species_step, RO2_indices, nreac, nprod, prodn, reacn,
            M_val, N2_val, O2_val, init_SMIL, spec_namelist)
Example #4
0
def sch_interr(total_list_eqn, chm_sch_mrk):

    # inputs: ------------------------------------------------------------------
    # total_list_eqn - all lines from the chemical scheme file
    # chm_sch_mrk - markers to denote different section of the chemical scheme
    # --------------------------------------------------------------------------

    # preparatory part ---------------------------------------------------------
    eqn_list = []  # empty list for gas-phase reaction equation
    aqeqn_list = []  # empty list for particle-phase reaction equation
    RO2_names = []  # empty list for peroxy radicals
    rrc = []  # empty list for reaction rate coefficients
    rrc_name = []  # empty list for reaction rate coefficient labels
    eqn_flag = 0  # don't collate reaction equations until seen
    pr_flag = 0  # don't collate peroxy radicals until seen
    RO2_count = 0  # count on number of lines considered in peroxy radical list
    # -------------------------------------------------------------------------

    # obtain lists for reaction rate coefficients, peroxy radicals
    # and equation reactions using markers for separating chemical scheme elements
    for line in total_list_eqn:

        line1 = line.strip()  # remove bounding white space

        # --------------------------------------------------------------------------------
        # generic reaction rate coefficients part
        # marker at end of generic reaction rate coefficient lines
        # the first \ allows python to interpret the second \ as a dash
        # to use in regex which means an escape in case the marker is a
        # regex special character
        # the $ means occurs at end of string
        end_mark = str('\\' + chm_sch_mrk[7] + '$')
        # look out for start of generic reaction rate coefficients
        # could be generic reaction coefficient if just one = in line

        if (len(line1.split('=')) == 2):
            rrc_flag = 1
            # don't record if nothing preceding '=' (can occur in KPP files, e.g.
            # =IGNORE)
            if (len((line1.split('=')[0]).strip()) == 0):
                rrc_flag = 0
            # don't record if this just an IGNORE command
            if len((line1.split('=')[1]).strip()) >= 6:
                if (line1.split('=')[1]).strip()[0:6] == 'IGNORE':
                    rrc_flag = 0
            # don't record if marker (if one present) for end of generic reaction rate
            # coefficient lines not present
            if (len(chm_sch_mrk[7]) > 0):
                if re.search(end_mark, line1.strip()) == None:
                    rrc_flag = 0

            if (rrc_flag == 1):

                # dont consider if start of peroxy radical list
                if (line1.split('=')[0]).strip() != chm_sch_mrk[1]:
                    # don't consider if a gas-phase chemical scheme reaction
                    if ((line1.split('=')[0]).strip())[0] != chm_sch_mrk[0]:
                        # don't consider if an aqueous-phase chemical scheme reaction
                        if ((line1.split('=')[0]).strip()
                            )[0] != chm_sch_mrk[8]:

                            # remove end characters
                            line2 = line1.replace(str(chm_sch_mrk[7]), '')
                            # remove all white space
                            line2 = line2.replace(' ', '')
                            # convert fortran-type scientific notation to python type
                            line2 = formatting.SN_conversion(line2)
                            # ensure rate coefficient is python readable
                            line2 = formatting.convert_rate_mcm(line2)
                            rrc.append(line2.strip())
                            # get just name of generic reaction rate coefficient
                            rrc_name.append((line2.split('=')[0]).strip())

        # --------------------------------------------------------------------------------
        # peroxy radical part
        # start logging peroxy radicals
        RO2_start_mark = str('^' + chm_sch_mrk[1])

        # if starting marker for peroxy radical list seen, flag that recording needed
        if (re.match(RO2_start_mark, line1) != None):

            # to double check that recording needed for peroxy radicals (in case
            # chm_sch_mrk[1] is not unique)

            # first check whether the RO2 list comprises just one line, as this will
            # mean its end marker is present
            if (len(chm_sch_mrk[5].strip()) > 0):
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chm_sch_mrk[5])
                if (re.match(mark, line1) != None):
                    pr_flag = 1

            # look for presence of marker for RO2 list continuing onto next line, which
            # confirms this is the RO2 list when it covers more than one line
            # .* allows search across all elements of line, \\ ensures marker is
            # recognised as string
            mark = str('.*\\' + chm_sch_mrk[6])
            if (re.match(mark, line1) != None):
                pr_flag = 1

            # if line end or continuation marker not supplied then assume the RO2 start
            # marker is unique
            if (len(chm_sch_mrk[5].strip()) == 0
                    and len(chm_sch_mrk[6].strip()) == 0):
                pr_flag = 1

        if (pr_flag == 1):
            # get the elements in line separated by peroxy radical separator
            line2 = line1.split(chm_sch_mrk[2])

            RO2_count += 1  # count on number of lines considered in peroxy radical list

            for line3 in line2:  # loop through elements in line
                if len(line3.split('=')) > 1:  # in case of RO2 = ...
                    line3 = (line3.split('='))[1]
                if len(line3.split(
                        ';')) > 1:  # in case of RO2 list finishing with ...;
                    line3 = (line3.split(';'))[0]
                if len(line3.split(
                        '&')) > 1:  # in case of RO2 list finishing with &
                    line3 = (line3.split('&'))[0]

                # remove any white space
                line3 = line3.strip()
                # don't include white space or ampersands
                if (line3 == '' or line3 == '&'):
                    continue

                else:  # if not these, then first strip surrounding marks
                    if line3[0:len(chm_sch_mrk[3])] == chm_sch_mrk[3]:
                        line3 = line3[len(chm_sch_mrk[3])::]
                    if line3[-len(chm_sch_mrk[4])::] == chm_sch_mrk[4]:
                        line3 = line3[0:-len(chm_sch_mrk[4])]

                    RO2_names.append(line3)
            # check for end of RO2 list - given either by marker for end or absence of
            # marker for continuation onto next line of RO2
            # check for marker for end of RO2 list
            if (len(chm_sch_mrk[5].strip()) > 0):
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chm_sch_mrk[5])
                if (re.match(mark, line1) != None):
                    pr_flag = 0

            else:  # look for absence of marker for RO2 list continuing onto next line
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chm_sch_mrk[6])
                if (re.match(mark, line1) == None):
                    pr_flag = 0

        # --------------------------------------------------------------------------------
        # gas-phase reaction equation part
        # ^ means occurs at start of line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        marker = str('^\\' + chm_sch_mrk[0])
        # first check is whether equation start marker is present
        if (re.match(marker, line1) != None):
            # second check is whether markers for starting reaction rate coefficients
            # part, and markers for end of equation lines, are present
            eqn_markers = [
                str('.*\\' + chm_sch_mrk[9]),
                str('.*\\' + chm_sch_mrk[11])
            ]
            if (re.match(eqn_markers[0], line1) != None
                    and re.match(eqn_markers[1], line1) != None):
                eqn_list.append(line1)  # store reaction equations

        # aqueous-phase reaction equation part
        # ^ means occurs at start of line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        # first, check if a marker given, if not bypass
        if chm_sch_mrk[8] == '':
            continue
        else:

            marker = str('^\\' + chm_sch_mrk[8])

            if (re.match(marker, line1) != None):
                # second check is whether markers for starting reaction rate coefficients
                # part, and markers for end of equation lines, are present
                eqn_markers = [
                    str('.*\\' + chm_sch_mrk[9]),
                    str('.*\\' + chm_sch_mrk[11])
                ]
                if (re.match(eqn_markers[0], line1) != None
                        and re.match(eqn_markers[1], line1) != None):
                    aqeqn_list.append(line1)  # store reaction equations

    # number of equations
    eqn_num = np.array((len(eqn_list), len(aqeqn_list)))

    return (eqn_list, aqeqn_list, eqn_num, rrc, rrc_name, RO2_names)
Example #5
0
def extract_mechanism(filename, xmlname, PInit, testf, RH, start_sim_time, lat,
                      lon, act_flux_path, DayOfYear, chem_scheme_markers,
                      photo_par_file):

    # inputs: ----------------------------------------------------------------------------
    # testf - flag for operating in normal mode (0) or testing mode (1)
    # chem_scheme_markers - markers for different sections of the chemical scheme,
    #						default input is for the kinetic pre-processor (KPP) format
    # photo_par_file - path (from PyCHAM home directory) to file containing photolysis
    #					information (absorption cross sections and quantum yields)
    # ------------------------------------------------------------------------------------

    if testf == 1:  # for just testing mode
        return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

    print('Now parsing the equation information ... \n')

    # open the chemical scheme file
    f_open_eqn = open(filename, mode='r')

    # read the file and store everything into a list
    # reaction rates have units /s
    total_list_eqn = f_open_eqn.readlines()
    f_open_eqn.close()
    if (f_open_eqn.closed == False):
        print('IOError')
        print('Eqn file not closed')
        sys.exit()

    naked_list_eqn = []  # empty list for gas-phase equation reactions
    naked_list_peqn = []  # empty list for other equation reactions
    RO2_names = []  # empty list for peroxy radicals
    rrc = []  # empty list for reaction rate coefficients
    rrc_name = []  # empty list for reaction rate coefficient labels

    eqn_flag = 0  # don't collate reaction equations until seen
    pr_flag = 0  # don't collate peroxy radicals until seen
    RO2_count = 0  # count on number of lines considered in peroxy radical list

    # obtain lists for reaction rate coefficients, peroxy radicals and equation reactions
    # using markers for separating chemical scheme elements
    for line in total_list_eqn:

        line1 = line.strip()  # remove bounding white space

        # --------------------------------------------------------------------------------
        # generic reaction rate coefficients part
        # marker at end of generic reaction rate coefficient lines
        end_mark = str('$\\' + chem_scheme_markers[7])
        # look out for start of generic reaction rate coefficients
        # could be generic reaction coefficient if just one = in line

        if len(line1.split('=')) == 2:
            rrc_flag = 1
            # don't record if nothing preceding '=' (can occur in KPP files, e.g.
            # =IGNORE)
            if (len((line1.split('=')[0]).strip()) == 0):
                rrc_flag = 0
                # don't record if this just an IGNORE command
            if len((line1.split('=')[1]).strip()) >= 6:
                if (line1.split('=')[1]).strip()[0:6] == 'IGNORE':
                    rrc_flag = 0
            # don't record if marker (if one present) for end of generic reaction rate
            # coefficient lines not present
            if len(chem_scheme_markers[7]) > 0:
                if re.match(end_mark, line1) == None:
                    rrc_flag = 0

            if rrc_flag == 1:

                # dont consider if start of peroxy radical list
                if (line1.split('=')[0]).strip() != chem_scheme_markers[1]:
                    # don't consider if a gas-phase chemical scheme reaction
                    if ((line1.split('=')[0]).strip()
                        )[0] != chem_scheme_markers[0]:
                        # don't consider if an aqueous-phase chemical scheme reaction
                        if ((line1.split('=')[0]).strip()
                            )[0] != chem_scheme_markers[8]:
                            # remove end characters
                            line2 = line1.replace(str(chem_scheme_markers[7]),
                                                  '')
                            # remove all white space
                            line2 = line2.replace(' ', '')
                            # convert fortran-type scientific notation to python type
                            line2 = formatting.SN_conversion(line2)
                            # ensure rate coefficient is python readable
                            line2 = formatting.convert_rate_mcm(line2)
                            rrc.append(line2.strip())
                            # get just name of generic reaction rate coefficient
                            rrc_name.append((line2.split('=')[0]).strip())

        # --------------------------------------------------------------------------------
        # peroxy radical part
        # now start logging peroxy radicals

        RO2_start_mark = str('.*' + chem_scheme_markers[1])

        # if starting marker for peroxy radical list seen, flag that recording needed
        if (re.match(RO2_start_mark, line1) != None):
            # to double check that recording needed for peroxy radicals (in case
            # chem_scheme_markers[1] is not unique

            # first check whether the RO2 list comprises just one line, as this will
            # mean its end marker is present
            if (len(chem_scheme_markers[5].strip()) > 0):
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chem_scheme_markers[5])
                if (re.match(mark, line1) != None):
                    pr_flag = 1

            # look for presence of marker for RO2 list continuing onto next line, which
            # confirms this is the RO2 list when it covers more than one line
            # .* allows search across all elements of line, \\ ensures marker is
            # recognised as string
            mark = str('.*\\' + chem_scheme_markers[6])
            if (re.match(mark, line1) != None):
                pr_flag = 1

        if (pr_flag == 1):
            # get the elements in line separated by peroxy radical separator
            line2 = line1.split(chem_scheme_markers[2])

            RO2_count += 1  # count on number of lines considered in peroxy radical list

            for line3 in line2:  # loop through elements in line
                if len(line3.split('=')) > 1:  # in case of RO2 = ...
                    line3 = (line3.split('='))[1]
                if len(line3.split(
                        ';')) > 1:  # in case of RO2 list finishing with ...;
                    line3 = (line3.split(';'))[0]
                if len(line3.split(
                        '&')) > 1:  # in case of RO2 list finishing with &
                    line3 = (line3.split('&'))[0]

                # remove any white space
                line3 = line3.strip()
                # don't include white space or ampersands
                if (line3 == '' or line3 == '&'):
                    continue

                else:  # if not these, then first strip surrounding marks
                    if line3[0:len(chem_scheme_markers[3]
                                   )] == chem_scheme_markers[3]:
                        line3 = line3[len(chem_scheme_markers[3])::]
                    if line3[-len(chem_scheme_markers[4]
                                  )::] == chem_scheme_markers[4]:
                        line3 = line3[0:-len(chem_scheme_markers[4])]

                    RO2_names.append(line3)
            # check for end of RO2 list - given either by marker for end or absence of
            # marker for continuation onto next line of RO2
            # check for marker for end of RO2 list
            if (len(chem_scheme_markers[5].strip()) > 0):
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chem_scheme_markers[5])
                if (re.match(mark, line1) != None):
                    pr_flag = 0

            else:  # look for absence of marker for RO2 list continuing onto next line
                # .* allows search across all elements of line, \\ ensures marker is
                # recognised as string
                mark = str('.*\\' + chem_scheme_markers[6])
                if (re.match(mark, line1) == None):
                    pr_flag = 0
        # --------------------------------------------------------------------------------
        # gas-phase reaction equation part
        # ^ means occurs at start of line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        marker = str('^\\' + chem_scheme_markers[0])
        # first check is whether equation start marker is present
        if (re.match(marker, line1) != None):
            # second check is whether markers for starting reaction rate coefficients
            # part, and markers for end of equation lines, are present
            eqn_markers = [
                str('.*\\' + chem_scheme_markers[9]),
                str('.*\\' + chem_scheme_markers[11])
            ]
            if (re.match(eqn_markers[0], line1) != None
                    and re.match(eqn_markers[1], line1) != None):
                naked_list_eqn.append(line1)  # store reaction equations
        # aqueous-phase reaction equation part
        # ^ means occurs at start of line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        # first, check if a marker given, if not bypass
        if chem_scheme_markers[8] == '':
            continue
        else:

            marker = str('^\\' + chem_scheme_markers[8])
            if (re.match(marker, line1) != None):

                # second check is whether markers for starting reaction rate coefficients
                # part, and markers for end of equation lines, are present
                eqn_markers = [
                    str('.*\\' + chem_scheme_markers[9]),
                    str('.*\\' + chem_scheme_markers[11])
                ]
                if (re.match(eqn_markers[0], line1) != None
                        and re.match(eqn_markers[1], line1) != None):
                    naked_list_peqn.append(line1)  # store reaction equations

        # --------------------------------------------------------------------------------
    # format the equation list

    # get number of equations for phases
    num_eqn = np.array((len(naked_list_eqn), len(naked_list_peqn)))

    # --open and initialise the xml file for converting chemical names to SMILES-----
    with open(xmlname) as fd:
        doc = xmltodict.parse(fd.read())

    a = doc['mechanism']['species_defs']['species']
    spec_numb = list(('0', ) * len(a))
    spec_name = list(('0', ) * len(a))
    spec_smil = list(('0', ) * len(a))

    for i in range(len(a)):
        spec_numb[i] = a[i]['@species_number']
        spec_name[i] = a[i]['@species_name']
        if "smiles" in a[i]:
            spec_smil[i] = a[i]['smiles']
        elif spec_name[i][0] == 'O' or spec_name[i][0] == 'H':
            spec_smil[i] = '[' + spec_name[i] + ']'
        else:
            spec_smil[i] = spec_name[i]

    # initialising variables for equation interrogator (eqn_interr)

    comp_num = 0  # count the number of unique components
    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = np.zeros((num_eqn[0], 1)).astype(int)
    rindx_p = np.zeros((num_eqn[1], 1)).astype(int)
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn[0], 1)).astype(int)
    pindx_p = np.zeros((num_eqn[1], 1)).astype(int)
    # matrix to record stoichiometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn[0], 1))
    rstoi_p = np.zeros((num_eqn[1], 1))
    # matrix to record stoichiometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn[0], 1))
    pstoi_p = np.zeros((num_eqn[1], 1))
    # arrays to store number of reactants and products in gas-phase equations
    nreac = np.empty(num_eqn[0], dtype=np.int8)
    nprod = np.empty(num_eqn[0], dtype=np.int8)
    nreac_p = np.empty(num_eqn[1], dtype=np.int8)
    nprod_p = np.empty(num_eqn[1], dtype=np.int8)
    # list for equation reaction rate coefficients
    reac_coef = []
    reac_coef_p = []
    # list for components' SMILE strings
    spec_list = []
    # list of Pybel objects
    Pybel_objects = []
    # a new list for the name strings of species presented in the scheme (not SMILES)
    spec_namelist = []

    # get equation information for gas-phase reactions
    [
        rindx, rstoi, pindx, pstoi, reac_coef, spec_namelist, spec_list,
        Pybel_objects, nreac, nprod, comp_num
    ] = eqn_interr(num_eqn[0], naked_list_eqn, rindx, rstoi, pindx, pstoi,
                   chem_scheme_markers, reac_coef, spec_namelist, spec_name,
                   spec_smil, spec_list, Pybel_objects, nreac, nprod, comp_num,
                   0)
    # get equation information for aqueous-phase reactions
    [
        rindx_aq, rstoi_aq, pindx_aq, pstoi_aq, reac_coef_aq, spec_namelist,
        spec_list, Pybel_objects, nreac_aq, nprod_aq, comp_num
    ] = eqn_interr(num_eqn[1], naked_list_peqn, rindx_p, rstoi_p, pindx_p,
                   pstoi_p, chem_scheme_markers, reac_coef_p, spec_namelist,
                   spec_name, spec_smil, spec_list, Pybel_objects, nreac_p,
                   nprod_p, comp_num, 1)

    if len(spec_list) != len(spec_namelist):
        sys.exit(
            'Error: inside eqn_parser, length of spec_list is different to length of spec_namelist and the SMILES in the former should align with the chemical scheme names in the latter'
        )

    # number of columns in rindx and pindx
    reacn = rindx.shape[1]
    prodn = pindx.shape[1]
    reacn_aq = rindx_aq.shape[1]
    prodn_aq = pindx_aq.shape[1]

    # create a 2 column array, the first column with the RO2 list index of any RO2 species
    # that appears in the species list, the second column for its index in the species
    # list
    RO2_indices = write_RO2_indices(spec_namelist, RO2_names)

    # automatically generate the Rate_coeffs module that will allow rate coefficients to
    # be calculated inside ode_gen module (/s) for gas phase

    write_rate_file(reac_coef, rrc, rrc_name, testf)
    # repeat for aqueous phase - creates a different file to gas phase one
    write_rate_file(reac_coef_aq, rrc, rrc_name, 3)

    # number of photolysis reactions, if this relevant
    cwd = os.getcwd()  # address of current working directory
    if photo_par_file == str(cwd + '/PyCHAM/photofiles/MCMv3.2'):
        Jlen = 62  # for MCM (default name of photolysis parameters)
    else:  # need to find out number of photolysis reactions
        # use Fortran indexing to be consistent with MCM photochemical reaction numbers
        Jlen = 1
        # open file to read
        f = open(str(photo_par_file), 'r')
        for line in f:  # loop through line
            if line.strip() == str('J_' + str(Jlen) + '_axs'):
                Jlen += 1

    # print the brief info for the simulation to the screen
    print('Briefing:')
    print('Total number of gas-phase equations: %i' % (num_eqn[0]))
    print('Total number of aqueous-phase equations: %i' % (num_eqn[1]))
    print('Total number of components found in chemical scheme file: %i\n' %
          (comp_num))

    # outputs: ---------------------------------------------------------------------------

    # rindx  - matrix to record indices of reactants (cols) in each equation (rows)
    # pindx - indices of equation products (cols) in each equation (rows)
    # rstoi - matrix to record stoichometries of reactants (cols) in each equation (rows)
    # pstoi - matrix to record stoichometries of products (cols) in each equation (rows)
    # reac_coef - list for equation reaction rate coefficients
    # comp_num - list for components' SMILE strings
    # Pybel_objects - list of Pybel objects
    # species_step - number of species
    # num_eqn - number of equations
    # nreac - number of reactants in each equation
    # max_no_jaci - number of columns for Jacobian index matrix
    # nprod - number of products per equation
    # prodn - number of columns in pindx
    # reacn - rindx number of columns
    # spec_namelist - list of component names used in the chemical reaction file
    # ------------------------------------------------------------------------------------
    return (rindx, pindx, rstoi, pstoi, reac_coef, spec_list, Pybel_objects,
            num_eqn, comp_num, RO2_indices, nreac, nprod, prodn, reacn,
            spec_namelist, Jlen, rindx_aq, pindx_aq, rstoi_aq, pstoi_aq,
            reac_coef_aq, nreac_aq, nprod_aq, prodn_aq, reacn_aq)