def eqn_interr(num_eqn, naked_list_eqn, rindx, rstoi, pindx, pstoi, chem_scheme_markers, reac_coef, spec_namelist, spec_name, spec_smil, spec_list, Pybel_objects, nreac, nprod, comp_num, phase): # inputs: ---------------------------------------------------------------------------- # num_eqn - number of equations (scalar) # naked_list_eqn - equations in strings # rindx - to hold indices of reactants # rstoi - to hold stoichiometries of reactants # pindx - to hold indices of products # pstoi - to hold stoichiometries of products # chem_scheme_markers - markers for separating sections of the chemical scheme # reac_coef - to hold reaction rate coefficients # spec_namelist - name strings of components present in the scheme (not SMILES) # spec_name - name string of components in xml file (not SMILES) # spec_smil - SMILES from xml file # spec_list - SMILES of components present in scheme # Pybel_objects - list containing pybel objects # nreac - to hold number of reactions per equation # nprod - number of products per equation # comp_num - number of unique components in reactions across all phases # phase - marker for the phase being considered: 0 for gas, 1 for particulates # ------------------------------------------------------------------------------------ max_no_reac = 0.0 # log maximum number of reactants in a reaction max_no_prod = 0.0 # log maximum number of products in a reaction # Loop through equations line by line and extract the required information for eqn_step in range(num_eqn): line = naked_list_eqn[eqn_step] # extract this line # work out whether equation or reaction rate coefficient part comes first eqn_start = str('.*\\' + chem_scheme_markers[10]) rrc_start = str('.*\\' + chem_scheme_markers[9]) # get index of these markers, note span is the property of the match object that # gives the location of the marker eqn_start_indx = (re.match(eqn_start, line)).span()[1] rrc_start_indx = (re.match(rrc_start, line)).span()[1] if eqn_start_indx > rrc_start_indx: eqn_sec = 1 # equation is second part else: eqn_sec = 0 # equation is first part # split the line into 2 parts: equation and rate coefficient # . means match with anything except a new line character., when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, so final part is stating the character(s) we # are specifically looking for, \\ ensures the marker is recognised if eqn_sec == 1: eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[11]) else: # end of equation part is start of reaction rate coefficient part eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[9]) # extract the equation as a string ([0] extracts the equation section and # [1:-1] removes the bounding markers) eqn = re.findall(eqn_markers, line)[0][1:-1].strip() eqn_split = eqn.split() eqmark_pos = eqn_split.index('=') # with stoich number; rule out the photon reactants = [ i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv' ] products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+'] # with stoich number # record maximum number of reactants across all equations max_no_reac = np.maximum(len(reactants), max_no_reac) # record maximum number of products across all equations max_no_prod = np.maximum(len(products), max_no_prod) # append columns if needed while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]): rindx = np.append(rindx, (np.zeros((num_eqn, 1))).astype(int), axis=1) rstoi = np.append(rstoi, (np.zeros((num_eqn, 1))), axis=1) while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]): pindx = np.append(pindx, (np.zeros((num_eqn, 1))).astype(int), axis=1) pstoi = np.append(pstoi, (np.zeros((num_eqn, 1))), axis=1) # .* means occurs anywhere in line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker rate_coeff_start_mark = str('\\' + chem_scheme_markers[9]) # . means match with anything except a new line character, when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, \\ ensures the marker # is recognised if eqn_sec == 1: # end of reaction rate coefficient part is start of equation part rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10]) else: # end of reaction rate coefficient part is end of line rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11]) # rate coefficient starts and end punctuation rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark) # rate coefficient expression in a string rate_ex = re.findall(rate_regex, line)[0][1:-1].strip() # convert fortran-type scientific notation to python type rate_ex = formatting.SN_conversion(rate_ex) # convert the rate coefficient expressions into Python readable commands rate_ex = formatting.convert_rate_mcm(rate_ex) if (rate_ex.find('EXP') != -1): print(rate_ex) sys.exit() # store the reaction rate coefficient for this equation # (/s once any inputs applied) reac_coef.append(rate_ex) # extract the stoichiometric number of the specii in current equation reactant_step = 0 product_step = 0 stoich_regex = r"^\d*\.\d*|^\d*" numr = len(reactants) # number of reactants in this equation # left hand side of equations (losses) for reactant in reactants: if (re.findall(stoich_regex, reactant)[0] != ''): stoich_num = float(re.findall(stoich_regex, reactant)[0]) # name with no stoich number name_only = re.sub(stoich_regex, '', reactant) elif (re.findall(stoich_regex, reactant)[0] == ''): stoich_num = 1.0 name_only = reactant # store stoichometry rstoi[eqn_step, reactant_step] = stoich_num if name_only not in spec_namelist: # if new component encountered spec_namelist.append( name_only) # add to chemical scheme name list # convert MCM chemical names to SMILES if name_only in spec_name: # index where xml file name matches reaction component name name_indx = spec_name.index(name_only) name_SMILE = spec_smil[name_indx] # SMILES of component else: sys.exit( str('Error: inside eqn_parser, chemical scheme name ' + str(name_only) + ' not found in xml file')) spec_list.append(name_SMILE) # list SMILE names name_indx = comp_num # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) comp_num += 1 # number of unique species else: # if it's a species already encountered it will be in spec_list # existing index name_indx = spec_namelist.index(name_only) # store reactant index # check if index already present - i.e. component appears more than once if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0: # get pre-existing index of this component exist_indx = np.where( rindx[eqn_step, 0:reactant_step] == (int(name_indx))) # add to pre-existing stoichiometry rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step] rstoi[eqn_step, reactant_step] = 0 # remove stoichiometry added above reactant_step -= 1 # ignore this duplicate product else: rindx[eqn_step, reactant_step] = int(name_indx) reactant_step += 1 # number of reactants in this equation nreac[eqn_step] = int(reactant_step) # right hand side of equations (gains) for product in products: if (re.findall(stoich_regex, product)[0] != ''): stoich_num = float(re.findall(stoich_regex, product)[0]) name_only = re.sub(stoich_regex, '', product) # name with no stoich number elif (re.findall(stoich_regex, product)[0] == ''): stoich_num = 1.0 name_only = product # store stoichometry pstoi[eqn_step, product_step] = stoich_num if name_only not in spec_namelist: # if new component encountered spec_namelist.append(name_only) # convert MCM chemical names to SMILES # index where xml file name matches reaction component name if name_only in spec_name: name_indx = spec_name.index(name_only) name_SMILE = spec_smil[name_indx] else: sys.exit( str('Error: inside eqn_parser, chemical scheme name ' + str(name_only) + ' not found in xml file')) spec_list.append( name_SMILE) # list SMILE string of parsed species name_indx = comp_num # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) comp_num += 1 # number of unique species else: # if it's a species already encountered # index of component already listed name_indx = spec_namelist.index(name_only) # store product index # check if index already present - i.e. component appears more than once if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0: exist_indx = np.where(pindx[eqn_step, 0:product_step] == (int( name_indx))) # get pre-existing index of this component # add to pre-existing stoichometry pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step] pstoi[eqn_step, product_step] = 0 # remove stoichometry added above product_step -= 1 # ignore this duplicate product else: pindx[eqn_step, product_step] = int(name_indx) product_step += 1 # number of products in this equation nprod[eqn_step] = int(product_step) return (rindx, rstoi, pindx, pstoi, reac_coef, spec_namelist, spec_list, Pybel_objects, nreac, nprod, comp_num)
def eqn_interr(num_eqn, eqn_list, aqeqn_list, chem_scheme_markers, comp_name, comp_smil, num_sb, wall_on): # inputs: ---------------------------------------------------------------------------- # num_eqn - number of equations # eqn_list - gas-phase equations in list of strings # aqeqn_list - aqueous-phase equations in list of strings # chem_scheme_markers - markers for separating sections of the chemical scheme # comp_name - name string of components in xml file (not SMILES) # comp_smil - SMILES from xml file # num_sb - number of size bins # wall_on - marker for whether to include wall partitioning # ------------------------------------------------------------------------------------ # preparatory part ---------------------------------------------------- # matrix to record indices of reactants (cols) in each equation (rows) rindx = np.zeros((num_eqn[0], 1)).astype(int) # matrix of indices to arrange reactant concentrations when # reaction rate coefficient calculated y_arr = (np.ones((num_eqn[0], 1)).astype(int)) * -9999 # array to arrange reaction rates so they align with reactant stoichiometries rr_arr = np.empty((0)) # same but for products rr_arr_p = np.empty((0)) # index array for extracting required reactant concentrations for the # reaction rate coefficient calculation y_rind = np.empty((0)) # index array for identifying products when assigning gains from reactions y_pind = np.empty((0)) # matrix to record indices of products (cols) in each equation (rows) pindx = np.zeros((num_eqn[0], 1)).astype(int) # matrix to record stoichiometries of reactants (cols) in each equation (rows) rstoi = np.zeros((num_eqn[0], 1)) jac_stoi = np.zeros((num_eqn[0], 1)) # 1D array to record stoichiometries of reactants per equarion rstoi_flat = np.empty((0)) # 1D array to record stoichiometries of products per equarion pstoi_flat = np.empty((0)) # matrix to record stoichiometries of products (cols) in each equation (rows) pstoi = np.zeros((num_eqn[0], 1)) # arrays to store number of reactants and products in gas-phase equations nreac = np.empty(num_eqn[0], dtype=np.int8) nprod = np.empty(num_eqn[0], dtype=np.int8) # colptrs for sparse matrix reac_col = np.empty(num_eqn[0], dtype=np.int8) prod_col = np.empty(num_eqn[0], dtype=np.int8) # list for equation reaction rate coefficients reac_coef = [] # matrix containing index of components who are denominators in the # calculation of equation derivatives in the Jacobian jac_den_indx = np.zeros((num_eqn[0], 1)) # total number of Jacobian elements per equation njac = np.zeros((num_eqn[0], 1)) # indices of Jacobian to affect per equation (rows) jac_indx = np.zeros((num_eqn[0], 1)) # a new list for the name strings of components presented in the scheme (not SMILES) comp_namelist = [] comp_list = [ ] # list for the SMILE strings of components present in the chemical scheme # list of Pybel objects of components in chemical scheme Pybel_objects = [] comp_num = 0 # count the number of unique components in the chemical scheme RO_indx = [] # empty list for holding indices of alkoxy components # --------------------------------------------------------------------- max_no_reac = 0. # log maximum number of reactants in a reaction max_no_prod = 0. # log maximum number of products in a reaction # loop through gas-phase equations line by line and extract the required information for eqn_step in range(num_eqn[0]): line = eqn_list[eqn_step] # extract this line # work out whether equation or reaction rate coefficient part comes first eqn_start = str('.*\\' + chem_scheme_markers[10]) rrc_start = str('.*\\' + chem_scheme_markers[9]) # get index of these markers, note span is the property of the match object that # gives the location of the marker eqn_start_indx = (re.match(eqn_start, line)).span()[1] rrc_start_indx = (re.match(rrc_start, line)).span()[1] if (eqn_start_indx > rrc_start_indx): eqn_sec = 1 # equation is second part else: eqn_sec = 0 # equation is first part # split the line into 2 parts: equation and rate coefficient # . means match with anything except a new line character., when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, so final part is stating the character(s) we # are specifically looking for, \\ ensures the marker is recognised if eqn_sec == 1: eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[11]) else: # end of equation part is start of reaction rate coefficient part eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[9]) # extract the equation as a string ([0] extracts the equation section and # [1:-1] removes the bounding markers) eqn = re.findall(eqn_markers, line)[0][1:-1].strip() eqn_split = eqn.split() eqmark_pos = eqn_split.index('=') # reactants with stoichiometry number and omit any photon reactants = [ i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv' ] # products with stoichiometry number products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+'] # record maximum number of reactants across all equations max_no_reac = np.maximum(len(reactants), max_no_reac) # record maximum number of products across all equations max_no_prod = np.maximum(len(products), max_no_prod) # append columns if needed because maximum number of reactants increases while (max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1])): rindx = np.append(rindx, (np.zeros((num_eqn[0], 1))).astype(int), axis=1) rstoi = np.append(rstoi, (np.zeros((num_eqn[0], 1))), axis=1) y_arr = np.append(y_arr, (np.ones( (num_eqn[0], 1)) * -9999).astype(int), axis=1) y_arr_fixer = ((np.arange(0, num_eqn[0], dtype='int')).reshape(-1, 1)) y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac))) y_arr[y_arr != -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999] while (max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1])): pindx = np.append(pindx, (np.zeros((num_eqn[0], 1))).astype(int), axis=1) pstoi = np.append(pstoi, (np.zeros((num_eqn[0], 1))), axis=1) while ((len(reactants)**2.0 + len(reactants) * len(products)) > jac_indx.shape[1]): jac_indx = np.append(jac_indx, (np.zeros((num_eqn[0], 1))), axis=1) jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[0], 1))), axis=1) jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[0], 1))), axis=1) # .* means occurs anywhere in line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker rate_coeff_start_mark = str('\\' + chem_scheme_markers[9]) # . means match with anything except a new line character, when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, \\ ensures the marker # is recognised if eqn_sec == 1: # end of reaction rate coefficient part is start of equation part rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10]) else: # end of reaction rate coefficient part is end of line rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11]) # rate coefficient starts and end punctuation rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark) # rate coefficient expression in a string rate_ex = re.findall(rate_regex, line)[0][1:-1].strip() # convert fortran-type scientific notation to python type rate_ex = formatting.SN_conversion(rate_ex) # convert the rate coefficient expressions into Python readable commands rate_ex = formatting.convert_rate_mcm(rate_ex) if (rate_ex.find('EXP') != -1): print('Error in reaction rate coefficient expression: ', rate_ex) sys.exit() # store the reaction rate coefficient for this equation # (/s once any inputs applied) reac_coef.append(rate_ex) # extract the stoichiometric number of the component in current equation reactant_step = 0 product_step = 0 stoich_regex = r"^\d*\.\d*|^\d*" numr = len(reactants) # number of reactants in this equation # left hand side of equations (losses) for reactant in reactants: if (re.findall(stoich_regex, reactant)[0] != ''): stoich_num = float(re.findall(stoich_regex, reactant)[0]) # name with no stoich number name_only = re.sub(stoich_regex, '', reactant) elif (re.findall(stoich_regex, reactant)[0] == ''): stoich_num = 1. name_only = reactant # store stoichiometry rstoi[eqn_step, reactant_step] = stoich_num jac_stoi[eqn_step, reactant_step] = -1 * stoich_num if name_only not in comp_namelist: # if new component encountered comp_namelist.append( name_only) # add to chemical scheme name list # convert MCM chemical names to SMILES # index where xml file name matches reaction component name name_indx = comp_name.index(name_only) name_SMILE = comp_smil[name_indx] # SMILES of component comp_list.append(name_SMILE) # list SMILE names name_indx = comp_num # allocate index to this species # generate pybel object Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) # check if alkoxy radical present in this component and that component is organic if ('[O]' in name_SMILE): if ('C' in name_SMILE or 'C' in name_SMILE): if (name_SMILE != 'C[O]'): # ensure it's not carbon monoxide # if it is an organic alkoxy radical add its index to list RO_indx.append(comp_num) comp_num += 1 # number of unique species else: # if it is a component already encountered it will be in comp_list # existing index name_indx = comp_namelist.index(name_only) # store reactant index # check if index already present - i.e. component appears more than once if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0: # get existing index of this component exist_indx = (np.where( rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0] # add to existing stoichiometry rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step] jac_stoi[eqn_step, exist_indx] += -1 * rstoi[eqn_step, reactant_step] # remove stoichiometry added above rstoi[eqn_step, reactant_step] = 0 jac_stoi[eqn_step, reactant_step] = 0 reactant_step -= 1 # ignore this duplicate else: rindx[eqn_step, reactant_step] = int(name_indx) y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) + reactant_step) y_rind = np.append(y_rind, int(name_indx)) rr_arr = np.append(rr_arr, int(eqn_step)) reactant_step += 1 # number of reactants in this equation nreac[eqn_step] = int(reactant_step) # record 1D array of stoichiometries per equation rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step, 0:int(reactant_step)]) # right hand side of equations (gains) for product in products: if (re.findall(stoich_regex, product)[0] != ''): stoich_num = float(re.findall(stoich_regex, product)[0]) name_only = re.sub(stoich_regex, '', product) # name with no stoich number elif (re.findall(stoich_regex, product)[0] == ''): stoich_num = 1. name_only = product # store stoichiometry pstoi[eqn_step, product_step] = stoich_num jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num if name_only not in comp_namelist: # if new component encountered comp_namelist.append(name_only) # convert MCM chemical names to SMILES # index where xml file name matches reaction component name name_indx = comp_name.index(name_only) name_SMILE = comp_smil[name_indx] comp_list.append( name_SMILE) # list SMILE string of parsed species name_indx = comp_num # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) # check if alkoxy radical present in this component and that component is organic if ('[O]' in name_SMILE): if ('C' in name_SMILE or 'C' in name_SMILE): if (name_SMILE != 'C[O]'): # ensure it's not carbon monoxide # if it is an organic alkoxy radical add its index to list RO_indx.append(comp_num) comp_num += 1 # number of unique species else: # if it's a species already encountered # index of component already listed name_indx = comp_namelist.index(name_only) # store product index # check if index already present - i.e. component appears more than once if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0: # get existing index of this component exist_indx = (np.where( pindx[eqn_step, 0:product_step] == (int(name_indx))))[0] # add to existing stoichiometry pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step] jac_stoi[eqn_step, reactant_step + exist_indx] += 1 * pstoi[eqn_step, product_step] # remove stoichiometry added above pstoi[eqn_step, product_step] = 0 jac_stoi[eqn_step, reactant_step + product_step] = 0 product_step -= 1 # ignore this duplicate else: pindx[eqn_step, product_step] = int(name_indx) rr_arr_p = np.append(rr_arr_p, int(eqn_step)) y_pind = np.append(y_pind, int(name_indx)) product_step += 1 # number of products in this equation nprod[eqn_step] = int(product_step) # record 1D array of stoichiometries per equation pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step, 0:int(product_step)]) # now that total number of components (reactants and products) # in an equation is known, replicate the reactant indices over all # components tot_comp = nreac[eqn_step] + nprod[eqn_step] for i in range(nreac[eqn_step]): jac_den_indx[eqn_step, i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i] # also replicate the stoichiometries for every reactant if (i > 0): jac_stoi[eqn_step, i * tot_comp:(i + 1) * tot_comp] = jac_stoi[eqn_step, 0:tot_comp] # number of Jacobian elements affected by this equation njac[eqn_step, 0] = tot_comp * nreac[eqn_step] # remove fillers and flatten index for arranging concentrations # ready for reaction rate coefficient calculation y_arr_g = y_arr[y_arr != -9999] y_rind_g = y_rind.astype(int) # ensure integer type uni_y_rind_g = (np.unique(y_rind)).astype(int) # unique index of reactants y_pind_g = y_pind.astype(int) # ensure integer type uni_y_pind_g = (np.unique(y_pind)).astype(int) # unique index of products rr_arr_g = rr_arr.astype(int) # ensure integer type rr_arr_p_g = rr_arr_p.astype(int) # ensure integer type # colptrs for sparse matrix of the change to reactants per equation reac_col_g = np.cumsum(nreac) - nreac # colptrs for sparse matrix of the change to products per equation prod_col_g = np.cumsum(nprod) - nprod if (len(reac_col_g) > 0): # if gas-phase reaction present # include final columns reac_col_g = np.append(reac_col_g, reac_col_g[-1] + nreac[-1]) prod_col_g = np.append(prod_col_g, prod_col_g[-1] + nprod[-1]) # tag other gas-phase arrays rindx_g = rindx pindx_g = pindx rstoi_g = rstoi pstoi_g = pstoi jac_stoi_g = jac_stoi rstoi_flat_g = rstoi_flat pstoi_flat_g = pstoi_flat nreac_g = nreac nprod_g = nprod reac_coef_g = reac_coef jac_den_indx_g = jac_den_indx.astype(int) njac_g = njac.astype(int) jac_indx_g = jac_indx jac_indx_g = jac_indx_g.astype(int) # same for aqueous-phase reactions ---------------------------------- # preparatory part ---------------------------------------------------- # matrix to record indices of reactants (cols) in each equation (rows) rindx = (np.ones((num_eqn[1], 1)) * -2).astype(int) # matrix of indices to arrange reactant concentrations when # reaction rate coefficient calculated y_arr = (np.ones((num_eqn[1], 1)).astype(int)) * -9999 # array to arrange reaction rates so they align with reactant stoichiometries rr_arr = np.empty((0)) # same but for products rr_arr_p = np.empty((0)) # index array for extracting required reactant concentrations for the # reaction rate coefficient calculation y_rind = np.empty((0)) # index array for identifying products when assigning gains from reactions y_pind = np.empty((0)) # matrix to record indices of products (cols) in each equation (rows) pindx = np.zeros((num_eqn[1], 1)).astype(int) # matrix to record stoichiometries of reactants (cols) in each equation (rows) rstoi = np.zeros((num_eqn[1], 1)) jac_stoi = np.zeros((num_eqn[1], 1)) # 1D array to record stoichiometries of reactants per equation rstoi_flat = np.empty((0)) # 1D array to record stoichiometries of products per equation pstoi_flat = np.empty((0)) # matrix to record stoichiometries of products (cols) in each equation (rows) pstoi = np.zeros((num_eqn[1], 1)) # arrays to store number of reactants and products of equations nreac = np.empty(num_eqn[1], dtype=np.int8) nprod = np.empty(num_eqn[1], dtype=np.int8) # list for equation reaction rate coefficients reac_coef = [] # matrix containing index of components who are denominators in the # calculation of equation derivatives in the Jacobian jac_den_indx = np.zeros((num_eqn[1], 1)) # total number of Jacobian elements per equation njac = np.zeros((num_eqn[1], 1)) # indices of Jacobian to affect per equation (rows) jac_indx = np.zeros((num_eqn[1], 1)) # --------------------------------------------------------------------- max_no_reac = 0. # log maximum number of reactants in a reaction max_no_prod = 0. # log maximum number of products in a reaction # loop through aqueous-phase equations line by line and extract the required information for eqn_step in range(num_eqn[1]): line = aqeqn_list[eqn_step] # extract this line # work out whether equation or reaction rate coefficient part comes first eqn_start = str('.*\\' + chem_scheme_markers[10]) rrc_start = str('.*\\' + chem_scheme_markers[9]) # get index of these markers, note span is the property of the match object that # gives the location of the marker eqn_start_indx = (re.match(eqn_start, line)).span()[1] rrc_start_indx = (re.match(rrc_start, line)).span()[1] if eqn_start_indx > rrc_start_indx: eqn_sec = 1 # equation is second part else: eqn_sec = 0 # equation is first part # split the line into 2 parts: equation and rate coefficient # . means match with anything except a new line character., when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, so final part is stating the character(s) we # are specifically looking for, \\ ensures the marker is recognised if eqn_sec == 1: eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[11]) else: # end of equation part is start of reaction rate coefficient part eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' + chem_scheme_markers[9]) # extract the equation as a string ([0] extracts the equation section and # [1:-1] removes the bounding markers) eqn = re.findall(eqn_markers, line)[0][1:-1].strip() eqn_split = eqn.split() eqmark_pos = eqn_split.index('=') # with stoich number; rule out the photon reactants = [ i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv' ] products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+'] # with stoich number # record maximum number of reactants across all equations max_no_reac = np.maximum(len(reactants), max_no_reac) # record maximum number of products across all equations max_no_prod = np.maximum(len(products), max_no_prod) # append columns if needed while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]): rindx = np.append(rindx, (np.ones( (num_eqn[1], 1)) * -2).astype(int), axis=1) rstoi = np.append(rstoi, (np.zeros((num_eqn[1], 1))), axis=1) y_arr = np.append(y_arr, (np.ones( (num_eqn[1], 1)) * -9999).astype(int), axis=1) y_arr_fixer = ((np.arange(0, num_eqn[1], dtype='int')).reshape(-1, 1)) y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac))) y_arr[y_arr != -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999] while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]): pindx = np.append(pindx, (np.zeros((num_eqn[1], 1))).astype(int), axis=1) pstoi = np.append(pstoi, (np.zeros((num_eqn[1], 1))), axis=1) while ((len(reactants)**2.0 + len(reactants) * len(products)) > jac_indx.shape[1]): jac_indx = np.append(jac_indx, (np.zeros((num_eqn[1], 1))), axis=1) jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[1], 1))), axis=1) jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[1], 1))), axis=1) # .* means occurs anywhere in line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker rate_coeff_start_mark = str('\\' + chem_scheme_markers[9]) # . means match with anything except a new line character, when followed by a * # means match zero or more times (so now we match with all characters in the line # except for new line characters, \\ ensures the marker # is recognised if eqn_sec == 1: # end of reaction rate coefficient part is start of equation part rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10]) else: # end of reaction rate coefficient part is end of line rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11]) # rate coefficient starts and end punctuation rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark) # rate coefficient expression in a string rate_ex = re.findall(rate_regex, line)[0][1:-1].strip() # convert fortran-type scientific notation to python type rate_ex = formatting.SN_conversion(rate_ex) # convert the rate coefficient expressions into Python readable commands rate_ex = formatting.convert_rate_mcm(rate_ex) if (rate_ex.find('EXP') != -1): print('Error in reaction rate coefficient expression: ', rate_ex) sys.exit() # store the reaction rate coefficient for this equation # (/s once any inputs applied) reac_coef.append(rate_ex) # extract the stoichiometric number of the component in current equation reactant_step = 0 product_step = 0 stoich_regex = r"^\d*\.\d*|^\d*" numr = len(reactants) # number of reactants in this equation # left hand side of equations (losses) for reactant in reactants: if (re.findall(stoich_regex, reactant)[0] != ''): stoich_num = float(re.findall(stoich_regex, reactant)[0]) # name with no stoich number name_only = re.sub(stoich_regex, '', reactant) elif (re.findall(stoich_regex, reactant)[0] == ''): stoich_num = 1.0 name_only = reactant # store stoichiometry rstoi[eqn_step, reactant_step] = stoich_num jac_stoi[eqn_step, reactant_step] = -1 * stoich_num if name_only not in comp_namelist: # if new component encountered comp_namelist.append( name_only) # add to chemical scheme name list # convert MCM chemical names to SMILES if name_only in comp_name: # index where xml file name matches reaction component name name_indx = comp_name.index(name_only) name_SMILE = comp_smil[name_indx] # SMILES of component else: print( str('Error: inside eqn_parser, chemical scheme name ' + str(name_only) + ' not found in xml file')) sys.exit() comp_list.append(name_SMILE) # list SMILE names name_indx = comp_num # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) # check if alkoxy radical present in this component and that component is organic if ('[O]' in name_SMILE): if ('C' in name_SMILE or 'C' in name_SMILE): if (name_SMILE != 'C[O]'): # ensure it's not carbon monoxide # if it is an organic alkoxy radical add its index to list RO_indx.append(comp_num) comp_num += 1 # number of unique species else: # if it's a species already encountered it will be in comp_list # existing index name_indx = comp_namelist.index(name_only) # store reactant index # check if index already present - i.e. component appears more than once # as a reactant in this reaction if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0: # get existing index of this component exist_indx = (np.where( rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0] # add to existing stoichiometry rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step] jac_stoi[eqn_step, exist_indx] += -1 * rstoi[eqn_step, reactant_step] # remove stoichiometry added above rstoi[eqn_step, reactant_step] = 0 jac_stoi[eqn_step, reactant_step] = 0 reactant_step -= 1 # ignore this duplicate else: rindx[eqn_step, reactant_step] = int(name_indx) y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) + reactant_step) y_rind = np.append(y_rind, int(name_indx)) rr_arr = np.append(rr_arr, int(eqn_step)) reactant_step += 1 # number of reactants in this equation nreac[eqn_step] = int(reactant_step) # record 1D array of stoichiometries per equation rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step, 0:int(reactant_step)]) # right hand side of equations (gains) for product in products: if (re.findall(stoich_regex, product)[0] != ''): stoich_num = float(re.findall(stoich_regex, product)[0]) name_only = re.sub(stoich_regex, '', product) # name with no stoich number elif (re.findall(stoich_regex, product)[0] == ''): stoich_num = 1.0 name_only = product # store stoichiometry pstoi[eqn_step, product_step] = stoich_num jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num if name_only not in comp_namelist: # if new component encountered comp_namelist.append(name_only) # convert MCM chemical names to SMILES # index where xml file name matches reaction component name if name_only in comp_name: name_indx = comp_name.index(name_only) name_SMILE = comp_smil[name_indx] else: print('Error: inside eqn_interr, chemical scheme name ' + str(name_only) + ' not found in xml file') sys.exit() comp_list.append( name_SMILE) # list SMILE string of parsed species name_indx = comp_num # allocate index to this species # generate pybel object Pybel_object = pybel.readstring('smi', name_SMILE) # append to Pybel object list Pybel_objects.append(Pybel_object) # check if alkoxy radical present in this component and that component is organic if ('[O]' in name_SMILE): if ('C' in name_SMILE or 'C' in name_SMILE): if (name_SMILE != 'C[O]'): # ensure it's not carbon monoxide # if it is an organic alkoxy radical add its index to list RO_indx.append(comp_num) comp_num += 1 # number of unique species else: # if it's a species already encountered # index of component already listed name_indx = comp_namelist.index(name_only) # store product index # check if index already present - i.e. component appears more than once if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0: # get existing index of this component exist_indx = (np.where( pindx[eqn_step, 0:product_step] == (int(name_indx))))[0] # add to existing stoichiometry pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step] jac_stoi[eqn_step, reactant_step + exist_indx] += 1 * pstoi[eqn_step, product_step] # remove stoichiometry added above pstoi[eqn_step, product_step] = 0 jac_stoi[eqn_step, reactant_step + product_step] = 0 product_step -= 1 # ignore this duplicate else: pindx[eqn_step, product_step] = int(name_indx) rr_arr_p = np.append(rr_arr_p, int(eqn_step)) y_pind = np.append(y_pind, int(name_indx)) product_step += 1 # number of products in this equation nprod[eqn_step] = int(product_step) # record 1D array of stoichiometries per equation pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step, 0:int(product_step)]) # now that total number of components (reactants and products) # in an equation is known, replicate the reactant indices over all # components tot_comp = nreac[eqn_step] + nprod[eqn_step] for i in range(nreac[eqn_step]): jac_den_indx[eqn_step, i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i] # also replicate the stoichiometries for every reactant if (i > 0): jac_stoi[eqn_step, i * tot_comp:(i + 1) * tot_comp] = jac_stoi[eqn_step, 0:tot_comp] # number of Jacobian elements affected by this equation njac[eqn_step, 0] = tot_comp * nreac[eqn_step] # account for gas-phase in Jacobian denominator index jac_den_indx += (comp_num + 2) # remove fillers and flatten index for arranging concentrations ready for reaction rate coefficient calculation y_arr_aq = y_arr[y_arr != -9999] # remove fillers y_rind_aq = y_rind.astype(int) # ensure integer type uni_y_rind_aq = (np.unique(y_rind)).astype( int) # unique index of reactants y_pind_aq = y_pind.astype(int) # ensure integer type uni_y_pind_aq = (np.unique(y_pind)).astype(int) # unique index of products rr_arr_aq = rr_arr.astype(int) # ensure integer type rr_arr_p_aq = rr_arr_p.astype(int) # ensure integer type # colptrs for sparse matrix of the change to reactants per equation reac_col_aq = np.cumsum(nreac) - nreac # colptrs for sparse matrix of the change to products per equation prod_col_aq = np.cumsum(nprod) - nprod if (len(reac_col_aq) > 0): # if aqueous-phase reaction present # include final columns reac_col_aq = np.append(reac_col_aq, reac_col_aq[-1] + nreac[-1]) prod_col_aq = np.append(prod_col_aq, prod_col_aq[-1] + nprod[-1]) # tag other aqueous-phase arrays rindx_aq = rindx pindx_aq = pindx rstoi_aq = rstoi pstoi_aq = pstoi jac_stoi_aq = jac_stoi rstoi_flat_aq = rstoi_flat pstoi_flat_aq = pstoi_flat nreac_aq = nreac nprod_aq = nprod reac_coef_aq = reac_coef jac_den_indx_aq = jac_den_indx.astype(int) njac_aq = njac.astype(int) jac_indx_aq = jac_indx jac_indx_aq = jac_indx_aq.astype(int) return (rindx_g, rstoi_g, pindx_g, pstoi_g, reac_coef_g, nreac_g, nprod_g, jac_stoi_g, jac_den_indx_g, njac_g, jac_indx_g, y_arr_g, y_rind_g, uni_y_rind_g, y_pind_g, uni_y_pind_g, reac_col_g, prod_col_g, rstoi_flat_g, pstoi_flat_g, rr_arr_g, rr_arr_p_g, rindx_aq, rstoi_aq, pindx_aq, pstoi_aq, reac_coef_aq, nreac_aq, nprod_aq, jac_stoi_aq, jac_den_indx_aq, njac_aq, jac_indx_aq, y_arr_aq, y_rind_aq, uni_y_rind_aq, y_pind_aq, uni_y_pind_aq, reac_col_aq, prod_col_aq, rstoi_flat_aq, pstoi_flat_aq, rr_arr_aq, rr_arr_p_aq, comp_namelist, comp_list, Pybel_objects, comp_num, RO_indx)
def extract_mechanism(filename, xmlname, TEMP, PInit, Comp0, testf): # inputs: # testf - flag for operating in normal mode (0) or testing mode (1) if testf == 1: return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # Open the file f_open_eqn = open(filename, mode='r') # read the file and store everything into a list # reaction rates have units /s total_list_eqn = f_open_eqn.readlines() f_open_eqn.close() if (f_open_eqn.closed == False): print('IOError') print('Eqn file not closed') sys.exit() # equation list without comments naked_list_eqn = formatting.remove_comments(total_list_eqn) # calculate gas-phase concentrations of M, N2 and O2 (molecules/cc (air)) # 1.0e-6 converts from molecules/m3 to molecules/cc M_val = (PInit / (8.3144598 * TEMP) * 6.0221409e+23) * 1.0e-6 N2_val = M_val * 0.79 O2_val = M_val * 0.2096 # format the equation list # naked_list_eqn contains everything except the comments starting with // print('Now parsing the eqn info...\n') num_eqn = len(naked_list_eqn) # --open and initialise the xml file for converting chemical names to SMILES----- with open(xmlname) as fd: doc = xmltodict.parse(fd.read()) a = doc['mechanism']['species_defs']['species'] spec_numb = list(('0', ) * len(a)) spec_name = list(('0', ) * len(a)) spec_smil = list(('0', ) * len(a)) for i in range(len(a)): spec_numb[i] = a[i]['@species_number'] spec_name[i] = a[i]['@species_name'] if "smiles" in a[i]: spec_smil[i] = a[i]['smiles'] elif spec_name[i][0] == 'O' or spec_name[i][0] == 'H': spec_smil[i] = '[' + spec_name[i] + ']' else: spec_smil[i] = spec_name[i] species_step = 0 # log the number of unique species max_no_reac = 0.0 # log maximum number of reactants in a reaction max_no_prod = 0.0 # log maximum number of products in a reaction # convert input component names for components present in gas phase at experiment # start from chemical scheme names to SMILES init_SMIL = [] for species_step in range(len(Comp0)): name_indx = spec_name.index(Comp0[species_step]) init_SMIL.append(spec_smil[name_indx]) species_step = 0 # ready for equation loop # initialising lists # matrix to record indices of reactants (cols) in each equation (rows) rindx = np.zeros((num_eqn, 1)).astype(int) # matrix to record indices of products (cols) in each equation (rows) pindx = np.zeros((num_eqn, 1)).astype(int) # matrix to record stoichometries of reactants (cols) in each equation (rows) rstoi = np.zeros((num_eqn, 1)) # matrix to record stoichometries of products (cols) in each equation (rows) pstoi = np.zeros((num_eqn, 1)) # array to store number of reactants and products in an equation nreac = np.zeros((num_eqn)) nprod = np.zeros((num_eqn)) # list for equation reaction rate coefficients reac_coef = [] # list for species spec_list = [] # list of Pybel objects Pybel_objects = [] # a new list for the name strings of species presenting in the scheme (not SMILES) spec_namelist = [] # Loop through the equations line by line and extract the information for eqn_step in range(num_eqn): line = naked_list_eqn[eqn_step] # split the line into 2 parts: equation; rate coef # (fac format doesnt have id for each equation) # extract the equation (in a string) eqn_regex = r"\:.*\;" # eqn starts with a : and end with a ; eqn = re.findall(eqn_regex, line)[0][1:-1].strip() eqn_split = eqn.split() eqmark_pos = eqn_split.index('=') # with stoich number; rule out the photon reactants = [ i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv' ] products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+'] # with stoich number # record maximum number of reactants across all equations max_no_reac = np.maximum(len(reactants), max_no_reac) # record maximum number of products across all equations max_no_prod = np.maximum(len(products), max_no_prod) # append columns if needed while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]): rindx = np.append(rindx, (np.zeros((num_eqn, 1))).astype(int), axis=1) rstoi = np.append(rstoi, (np.zeros((num_eqn, 1))), axis=1) while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]): pindx = np.append(pindx, (np.zeros((num_eqn, 1))).astype(int), axis=1) pstoi = np.append(pstoi, (np.zeros((num_eqn, 1))), axis=1) # extract the rate constant (in a string) rate_regex = r"\%.*\:" # rate coef starts with a : and end with a ; # rate_ex: rate coefficient expression in a string rate_ex = re.findall(rate_regex, line)[0][1:-1].strip() # convert fortran-type scientific notation to python type rate_ex = formatting.SN_conversion(rate_ex) # convert the rate coefficient expressions into Python readable commands rate_ex = formatting.convert_rate_mcm(rate_ex) # store the reaction rate for this equation (/s once any inputs applied) reac_coef.append(rate_ex) # extract the stoichiometric number of the specii in current equation reactant_step = 0 product_step = 0 stoich_regex = r"^\d*\.\d*|^\d*" numr = len(reactants) # number of reactants in this equation # left hand side of equations (losses) for reactant in reactants: if reactant not in spec_namelist: spec_namelist.append(reactant) if (re.findall(stoich_regex, reactant)[0] != ''): stoich_num = float(re.findall(stoich_regex, reactant)[0]) name_only = re.sub(stoich_regex, '', reactant) # name with no stoich number elif (re.findall(stoich_regex, reactant)[0] == ''): stoich_num = 1.0 name_only = reactant # store stoichometry rstoi[eqn_step, reactant_step] = stoich_num # convert MCM chemical names to SMILES # index where xml file MCM name matches MCM name if name_only in spec_name: name_indx = spec_name.index(name_only) name_only = spec_smil[name_indx] if (name_only not in spec_list): spec_list.append(name_only) # log parsed species name_indx = species_step # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_only) # append to Pybel object list Pybel_objects.append(Pybel_object) species_step += 1 # number of unique species else: # if it's a species already encountered # pre-defined number of species name_indx = spec_list.index(name_only) # store reactant index rindx[eqn_step, reactant_step] = int(name_indx) reactant_step += 1 # number of reactants in this equation nreac[eqn_step] = reactant_step # right hand side of equations (gains) for product in products: if product not in spec_namelist: spec_namelist.append(product) if (re.findall(stoich_regex, product)[0] != ''): stoich_num = float(re.findall(stoich_regex, product)[0]) name_only = re.sub(stoich_regex, '', product) # name with no stoich number elif (re.findall(stoich_regex, product)[0] == ''): stoich_num = 1.0 name_only = product # store stoichometry pstoi[eqn_step, product_step] = stoich_num # convert MCM chemical names to SMILES # index where xml file MCM name matches MCM name if name_only in spec_name: name_indx = spec_name.index(name_only) name_only = spec_smil[name_indx] if (name_only not in spec_list): spec_list.append(name_only) # log parsed species name_indx = species_step # allocate index to this species # Generate pybel Pybel_object = pybel.readstring('smi', name_only) # append to Pybel object list Pybel_objects.append(Pybel_object) species_step += 1 # number of unique species else: # if it's a species already encountered # pre-defined number of species name_indx = spec_list.index(name_only) # store product index pindx[eqn_step, product_step] = int(name_indx) product_step += 1 # number of products in this equation nprod[eqn_step] = product_step # number of columns in rindx and pindx reacn = rindx.shape[1] prodn = pindx.shape[1] # create a 2 column array, the first column with the RO2 list index of any RO2 species # that appear in the species list, the second column for its index in the species list RO2_indices = write_RO2_indices(spec_namelist) # print the brief info for the simulation to the screen print('Briefing:') print('Total number of equations: %i' % (num_eqn)) print('Total number of species: %i\n' % (species_step)) # outputs: # rindx - matrix to record indices of reactants (cols) in each equation (rows) # pindx - indices of equation products (cols) in each equation (rows) # rstoi - matrix to record stoichometries of reactants (cols) in each equation (rows) # pstoi - matrix to record stoichometries of products (cols) in each equation (rows) # reac_coef - list for equation reaction rate coefficients # spec_list - list for species # Pybel_objects - list of Pybel objects # species_step - number of species # num_eqn - number of equations # nreac - number of reactants in each equation # max_no_jaci - number of columns for Jacobian index matrix # nprod - number of products per equation # prodn - number of columns in pindx # reacn - rindx number of columns # M_val - gas-phase concentration of M (molecules/cc (air)) # N2_val - gas-phase concentration of nitrogen (molecules/cc (air)) # O2_val - gas-phase concentration of oxygen (molecules/cc (air)) # init_SMIL - SMILE string for each component # spec_namelist - names of components as given in the equation file return (rindx, pindx, rstoi, pstoi, reac_coef, spec_list, Pybel_objects, num_eqn, species_step, RO2_indices, nreac, nprod, prodn, reacn, M_val, N2_val, O2_val, init_SMIL, spec_namelist)
def sch_interr(total_list_eqn, chm_sch_mrk): # inputs: ------------------------------------------------------------------ # total_list_eqn - all lines from the chemical scheme file # chm_sch_mrk - markers to denote different section of the chemical scheme # -------------------------------------------------------------------------- # preparatory part --------------------------------------------------------- eqn_list = [] # empty list for gas-phase reaction equation aqeqn_list = [] # empty list for particle-phase reaction equation RO2_names = [] # empty list for peroxy radicals rrc = [] # empty list for reaction rate coefficients rrc_name = [] # empty list for reaction rate coefficient labels eqn_flag = 0 # don't collate reaction equations until seen pr_flag = 0 # don't collate peroxy radicals until seen RO2_count = 0 # count on number of lines considered in peroxy radical list # ------------------------------------------------------------------------- # obtain lists for reaction rate coefficients, peroxy radicals # and equation reactions using markers for separating chemical scheme elements for line in total_list_eqn: line1 = line.strip() # remove bounding white space # -------------------------------------------------------------------------------- # generic reaction rate coefficients part # marker at end of generic reaction rate coefficient lines # the first \ allows python to interpret the second \ as a dash # to use in regex which means an escape in case the marker is a # regex special character # the $ means occurs at end of string end_mark = str('\\' + chm_sch_mrk[7] + '$') # look out for start of generic reaction rate coefficients # could be generic reaction coefficient if just one = in line if (len(line1.split('=')) == 2): rrc_flag = 1 # don't record if nothing preceding '=' (can occur in KPP files, e.g. # =IGNORE) if (len((line1.split('=')[0]).strip()) == 0): rrc_flag = 0 # don't record if this just an IGNORE command if len((line1.split('=')[1]).strip()) >= 6: if (line1.split('=')[1]).strip()[0:6] == 'IGNORE': rrc_flag = 0 # don't record if marker (if one present) for end of generic reaction rate # coefficient lines not present if (len(chm_sch_mrk[7]) > 0): if re.search(end_mark, line1.strip()) == None: rrc_flag = 0 if (rrc_flag == 1): # dont consider if start of peroxy radical list if (line1.split('=')[0]).strip() != chm_sch_mrk[1]: # don't consider if a gas-phase chemical scheme reaction if ((line1.split('=')[0]).strip())[0] != chm_sch_mrk[0]: # don't consider if an aqueous-phase chemical scheme reaction if ((line1.split('=')[0]).strip() )[0] != chm_sch_mrk[8]: # remove end characters line2 = line1.replace(str(chm_sch_mrk[7]), '') # remove all white space line2 = line2.replace(' ', '') # convert fortran-type scientific notation to python type line2 = formatting.SN_conversion(line2) # ensure rate coefficient is python readable line2 = formatting.convert_rate_mcm(line2) rrc.append(line2.strip()) # get just name of generic reaction rate coefficient rrc_name.append((line2.split('=')[0]).strip()) # -------------------------------------------------------------------------------- # peroxy radical part # start logging peroxy radicals RO2_start_mark = str('^' + chm_sch_mrk[1]) # if starting marker for peroxy radical list seen, flag that recording needed if (re.match(RO2_start_mark, line1) != None): # to double check that recording needed for peroxy radicals (in case # chm_sch_mrk[1] is not unique) # first check whether the RO2 list comprises just one line, as this will # mean its end marker is present if (len(chm_sch_mrk[5].strip()) > 0): # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chm_sch_mrk[5]) if (re.match(mark, line1) != None): pr_flag = 1 # look for presence of marker for RO2 list continuing onto next line, which # confirms this is the RO2 list when it covers more than one line # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chm_sch_mrk[6]) if (re.match(mark, line1) != None): pr_flag = 1 # if line end or continuation marker not supplied then assume the RO2 start # marker is unique if (len(chm_sch_mrk[5].strip()) == 0 and len(chm_sch_mrk[6].strip()) == 0): pr_flag = 1 if (pr_flag == 1): # get the elements in line separated by peroxy radical separator line2 = line1.split(chm_sch_mrk[2]) RO2_count += 1 # count on number of lines considered in peroxy radical list for line3 in line2: # loop through elements in line if len(line3.split('=')) > 1: # in case of RO2 = ... line3 = (line3.split('='))[1] if len(line3.split( ';')) > 1: # in case of RO2 list finishing with ...; line3 = (line3.split(';'))[0] if len(line3.split( '&')) > 1: # in case of RO2 list finishing with & line3 = (line3.split('&'))[0] # remove any white space line3 = line3.strip() # don't include white space or ampersands if (line3 == '' or line3 == '&'): continue else: # if not these, then first strip surrounding marks if line3[0:len(chm_sch_mrk[3])] == chm_sch_mrk[3]: line3 = line3[len(chm_sch_mrk[3])::] if line3[-len(chm_sch_mrk[4])::] == chm_sch_mrk[4]: line3 = line3[0:-len(chm_sch_mrk[4])] RO2_names.append(line3) # check for end of RO2 list - given either by marker for end or absence of # marker for continuation onto next line of RO2 # check for marker for end of RO2 list if (len(chm_sch_mrk[5].strip()) > 0): # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chm_sch_mrk[5]) if (re.match(mark, line1) != None): pr_flag = 0 else: # look for absence of marker for RO2 list continuing onto next line # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chm_sch_mrk[6]) if (re.match(mark, line1) == None): pr_flag = 0 # -------------------------------------------------------------------------------- # gas-phase reaction equation part # ^ means occurs at start of line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker marker = str('^\\' + chm_sch_mrk[0]) # first check is whether equation start marker is present if (re.match(marker, line1) != None): # second check is whether markers for starting reaction rate coefficients # part, and markers for end of equation lines, are present eqn_markers = [ str('.*\\' + chm_sch_mrk[9]), str('.*\\' + chm_sch_mrk[11]) ] if (re.match(eqn_markers[0], line1) != None and re.match(eqn_markers[1], line1) != None): eqn_list.append(line1) # store reaction equations # aqueous-phase reaction equation part # ^ means occurs at start of line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker # first, check if a marker given, if not bypass if chm_sch_mrk[8] == '': continue else: marker = str('^\\' + chm_sch_mrk[8]) if (re.match(marker, line1) != None): # second check is whether markers for starting reaction rate coefficients # part, and markers for end of equation lines, are present eqn_markers = [ str('.*\\' + chm_sch_mrk[9]), str('.*\\' + chm_sch_mrk[11]) ] if (re.match(eqn_markers[0], line1) != None and re.match(eqn_markers[1], line1) != None): aqeqn_list.append(line1) # store reaction equations # number of equations eqn_num = np.array((len(eqn_list), len(aqeqn_list))) return (eqn_list, aqeqn_list, eqn_num, rrc, rrc_name, RO2_names)
def extract_mechanism(filename, xmlname, PInit, testf, RH, start_sim_time, lat, lon, act_flux_path, DayOfYear, chem_scheme_markers, photo_par_file): # inputs: ---------------------------------------------------------------------------- # testf - flag for operating in normal mode (0) or testing mode (1) # chem_scheme_markers - markers for different sections of the chemical scheme, # default input is for the kinetic pre-processor (KPP) format # photo_par_file - path (from PyCHAM home directory) to file containing photolysis # information (absorption cross sections and quantum yields) # ------------------------------------------------------------------------------------ if testf == 1: # for just testing mode return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) print('Now parsing the equation information ... \n') # open the chemical scheme file f_open_eqn = open(filename, mode='r') # read the file and store everything into a list # reaction rates have units /s total_list_eqn = f_open_eqn.readlines() f_open_eqn.close() if (f_open_eqn.closed == False): print('IOError') print('Eqn file not closed') sys.exit() naked_list_eqn = [] # empty list for gas-phase equation reactions naked_list_peqn = [] # empty list for other equation reactions RO2_names = [] # empty list for peroxy radicals rrc = [] # empty list for reaction rate coefficients rrc_name = [] # empty list for reaction rate coefficient labels eqn_flag = 0 # don't collate reaction equations until seen pr_flag = 0 # don't collate peroxy radicals until seen RO2_count = 0 # count on number of lines considered in peroxy radical list # obtain lists for reaction rate coefficients, peroxy radicals and equation reactions # using markers for separating chemical scheme elements for line in total_list_eqn: line1 = line.strip() # remove bounding white space # -------------------------------------------------------------------------------- # generic reaction rate coefficients part # marker at end of generic reaction rate coefficient lines end_mark = str('$\\' + chem_scheme_markers[7]) # look out for start of generic reaction rate coefficients # could be generic reaction coefficient if just one = in line if len(line1.split('=')) == 2: rrc_flag = 1 # don't record if nothing preceding '=' (can occur in KPP files, e.g. # =IGNORE) if (len((line1.split('=')[0]).strip()) == 0): rrc_flag = 0 # don't record if this just an IGNORE command if len((line1.split('=')[1]).strip()) >= 6: if (line1.split('=')[1]).strip()[0:6] == 'IGNORE': rrc_flag = 0 # don't record if marker (if one present) for end of generic reaction rate # coefficient lines not present if len(chem_scheme_markers[7]) > 0: if re.match(end_mark, line1) == None: rrc_flag = 0 if rrc_flag == 1: # dont consider if start of peroxy radical list if (line1.split('=')[0]).strip() != chem_scheme_markers[1]: # don't consider if a gas-phase chemical scheme reaction if ((line1.split('=')[0]).strip() )[0] != chem_scheme_markers[0]: # don't consider if an aqueous-phase chemical scheme reaction if ((line1.split('=')[0]).strip() )[0] != chem_scheme_markers[8]: # remove end characters line2 = line1.replace(str(chem_scheme_markers[7]), '') # remove all white space line2 = line2.replace(' ', '') # convert fortran-type scientific notation to python type line2 = formatting.SN_conversion(line2) # ensure rate coefficient is python readable line2 = formatting.convert_rate_mcm(line2) rrc.append(line2.strip()) # get just name of generic reaction rate coefficient rrc_name.append((line2.split('=')[0]).strip()) # -------------------------------------------------------------------------------- # peroxy radical part # now start logging peroxy radicals RO2_start_mark = str('.*' + chem_scheme_markers[1]) # if starting marker for peroxy radical list seen, flag that recording needed if (re.match(RO2_start_mark, line1) != None): # to double check that recording needed for peroxy radicals (in case # chem_scheme_markers[1] is not unique # first check whether the RO2 list comprises just one line, as this will # mean its end marker is present if (len(chem_scheme_markers[5].strip()) > 0): # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chem_scheme_markers[5]) if (re.match(mark, line1) != None): pr_flag = 1 # look for presence of marker for RO2 list continuing onto next line, which # confirms this is the RO2 list when it covers more than one line # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chem_scheme_markers[6]) if (re.match(mark, line1) != None): pr_flag = 1 if (pr_flag == 1): # get the elements in line separated by peroxy radical separator line2 = line1.split(chem_scheme_markers[2]) RO2_count += 1 # count on number of lines considered in peroxy radical list for line3 in line2: # loop through elements in line if len(line3.split('=')) > 1: # in case of RO2 = ... line3 = (line3.split('='))[1] if len(line3.split( ';')) > 1: # in case of RO2 list finishing with ...; line3 = (line3.split(';'))[0] if len(line3.split( '&')) > 1: # in case of RO2 list finishing with & line3 = (line3.split('&'))[0] # remove any white space line3 = line3.strip() # don't include white space or ampersands if (line3 == '' or line3 == '&'): continue else: # if not these, then first strip surrounding marks if line3[0:len(chem_scheme_markers[3] )] == chem_scheme_markers[3]: line3 = line3[len(chem_scheme_markers[3])::] if line3[-len(chem_scheme_markers[4] )::] == chem_scheme_markers[4]: line3 = line3[0:-len(chem_scheme_markers[4])] RO2_names.append(line3) # check for end of RO2 list - given either by marker for end or absence of # marker for continuation onto next line of RO2 # check for marker for end of RO2 list if (len(chem_scheme_markers[5].strip()) > 0): # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chem_scheme_markers[5]) if (re.match(mark, line1) != None): pr_flag = 0 else: # look for absence of marker for RO2 list continuing onto next line # .* allows search across all elements of line, \\ ensures marker is # recognised as string mark = str('.*\\' + chem_scheme_markers[6]) if (re.match(mark, line1) == None): pr_flag = 0 # -------------------------------------------------------------------------------- # gas-phase reaction equation part # ^ means occurs at start of line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker marker = str('^\\' + chem_scheme_markers[0]) # first check is whether equation start marker is present if (re.match(marker, line1) != None): # second check is whether markers for starting reaction rate coefficients # part, and markers for end of equation lines, are present eqn_markers = [ str('.*\\' + chem_scheme_markers[9]), str('.*\\' + chem_scheme_markers[11]) ] if (re.match(eqn_markers[0], line1) != None and re.match(eqn_markers[1], line1) != None): naked_list_eqn.append(line1) # store reaction equations # aqueous-phase reaction equation part # ^ means occurs at start of line and, first \ means second \ can be interpreted # and second \ ensures recognition of marker # first, check if a marker given, if not bypass if chem_scheme_markers[8] == '': continue else: marker = str('^\\' + chem_scheme_markers[8]) if (re.match(marker, line1) != None): # second check is whether markers for starting reaction rate coefficients # part, and markers for end of equation lines, are present eqn_markers = [ str('.*\\' + chem_scheme_markers[9]), str('.*\\' + chem_scheme_markers[11]) ] if (re.match(eqn_markers[0], line1) != None and re.match(eqn_markers[1], line1) != None): naked_list_peqn.append(line1) # store reaction equations # -------------------------------------------------------------------------------- # format the equation list # get number of equations for phases num_eqn = np.array((len(naked_list_eqn), len(naked_list_peqn))) # --open and initialise the xml file for converting chemical names to SMILES----- with open(xmlname) as fd: doc = xmltodict.parse(fd.read()) a = doc['mechanism']['species_defs']['species'] spec_numb = list(('0', ) * len(a)) spec_name = list(('0', ) * len(a)) spec_smil = list(('0', ) * len(a)) for i in range(len(a)): spec_numb[i] = a[i]['@species_number'] spec_name[i] = a[i]['@species_name'] if "smiles" in a[i]: spec_smil[i] = a[i]['smiles'] elif spec_name[i][0] == 'O' or spec_name[i][0] == 'H': spec_smil[i] = '[' + spec_name[i] + ']' else: spec_smil[i] = spec_name[i] # initialising variables for equation interrogator (eqn_interr) comp_num = 0 # count the number of unique components # matrix to record indices of reactants (cols) in each equation (rows) rindx = np.zeros((num_eqn[0], 1)).astype(int) rindx_p = np.zeros((num_eqn[1], 1)).astype(int) # matrix to record indices of products (cols) in each equation (rows) pindx = np.zeros((num_eqn[0], 1)).astype(int) pindx_p = np.zeros((num_eqn[1], 1)).astype(int) # matrix to record stoichiometries of reactants (cols) in each equation (rows) rstoi = np.zeros((num_eqn[0], 1)) rstoi_p = np.zeros((num_eqn[1], 1)) # matrix to record stoichiometries of products (cols) in each equation (rows) pstoi = np.zeros((num_eqn[0], 1)) pstoi_p = np.zeros((num_eqn[1], 1)) # arrays to store number of reactants and products in gas-phase equations nreac = np.empty(num_eqn[0], dtype=np.int8) nprod = np.empty(num_eqn[0], dtype=np.int8) nreac_p = np.empty(num_eqn[1], dtype=np.int8) nprod_p = np.empty(num_eqn[1], dtype=np.int8) # list for equation reaction rate coefficients reac_coef = [] reac_coef_p = [] # list for components' SMILE strings spec_list = [] # list of Pybel objects Pybel_objects = [] # a new list for the name strings of species presented in the scheme (not SMILES) spec_namelist = [] # get equation information for gas-phase reactions [ rindx, rstoi, pindx, pstoi, reac_coef, spec_namelist, spec_list, Pybel_objects, nreac, nprod, comp_num ] = eqn_interr(num_eqn[0], naked_list_eqn, rindx, rstoi, pindx, pstoi, chem_scheme_markers, reac_coef, spec_namelist, spec_name, spec_smil, spec_list, Pybel_objects, nreac, nprod, comp_num, 0) # get equation information for aqueous-phase reactions [ rindx_aq, rstoi_aq, pindx_aq, pstoi_aq, reac_coef_aq, spec_namelist, spec_list, Pybel_objects, nreac_aq, nprod_aq, comp_num ] = eqn_interr(num_eqn[1], naked_list_peqn, rindx_p, rstoi_p, pindx_p, pstoi_p, chem_scheme_markers, reac_coef_p, spec_namelist, spec_name, spec_smil, spec_list, Pybel_objects, nreac_p, nprod_p, comp_num, 1) if len(spec_list) != len(spec_namelist): sys.exit( 'Error: inside eqn_parser, length of spec_list is different to length of spec_namelist and the SMILES in the former should align with the chemical scheme names in the latter' ) # number of columns in rindx and pindx reacn = rindx.shape[1] prodn = pindx.shape[1] reacn_aq = rindx_aq.shape[1] prodn_aq = pindx_aq.shape[1] # create a 2 column array, the first column with the RO2 list index of any RO2 species # that appears in the species list, the second column for its index in the species # list RO2_indices = write_RO2_indices(spec_namelist, RO2_names) # automatically generate the Rate_coeffs module that will allow rate coefficients to # be calculated inside ode_gen module (/s) for gas phase write_rate_file(reac_coef, rrc, rrc_name, testf) # repeat for aqueous phase - creates a different file to gas phase one write_rate_file(reac_coef_aq, rrc, rrc_name, 3) # number of photolysis reactions, if this relevant cwd = os.getcwd() # address of current working directory if photo_par_file == str(cwd + '/PyCHAM/photofiles/MCMv3.2'): Jlen = 62 # for MCM (default name of photolysis parameters) else: # need to find out number of photolysis reactions # use Fortran indexing to be consistent with MCM photochemical reaction numbers Jlen = 1 # open file to read f = open(str(photo_par_file), 'r') for line in f: # loop through line if line.strip() == str('J_' + str(Jlen) + '_axs'): Jlen += 1 # print the brief info for the simulation to the screen print('Briefing:') print('Total number of gas-phase equations: %i' % (num_eqn[0])) print('Total number of aqueous-phase equations: %i' % (num_eqn[1])) print('Total number of components found in chemical scheme file: %i\n' % (comp_num)) # outputs: --------------------------------------------------------------------------- # rindx - matrix to record indices of reactants (cols) in each equation (rows) # pindx - indices of equation products (cols) in each equation (rows) # rstoi - matrix to record stoichometries of reactants (cols) in each equation (rows) # pstoi - matrix to record stoichometries of products (cols) in each equation (rows) # reac_coef - list for equation reaction rate coefficients # comp_num - list for components' SMILE strings # Pybel_objects - list of Pybel objects # species_step - number of species # num_eqn - number of equations # nreac - number of reactants in each equation # max_no_jaci - number of columns for Jacobian index matrix # nprod - number of products per equation # prodn - number of columns in pindx # reacn - rindx number of columns # spec_namelist - list of component names used in the chemical reaction file # ------------------------------------------------------------------------------------ return (rindx, pindx, rstoi, pstoi, reac_coef, spec_list, Pybel_objects, num_eqn, comp_num, RO2_indices, nreac, nprod, prodn, reacn, spec_namelist, Jlen, rindx_aq, pindx_aq, rstoi_aq, pstoi_aq, reac_coef_aq, nreac_aq, nprod_aq, prodn_aq, reacn_aq)