def high_p(rxn_str, ea_units, a_units): """ Parses the data string for a reaction in the reactions block for the line containing the chemical equation in order to read the fitting parameters that are on the same line. :param rxn_str: raw Chemkin string for a single reaction :type rxn_str: str :param ea_units: units of activation energy :type ea_units: str :param a_units: units of rate constants; either 'moles' or 'molecules' :type a_units: str :return params: Arrhenius fitting parameters for high-P rates :rtype: list(list(float)) """ pattern = _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, param_ptt=app.capturing(COEFF_PATTERN)) string_lst = apf.all_captures(pattern, rxn_str) if string_lst: fake_params = [] for string in string_lst: fake_params.append(list(ap_cast(string.split()))) params = fake_params[0] # Convert the units of Ea and A ea_conv_factor = get_ea_conv_factor(ea_units) a_conv_factor = get_a_conv_factor(rxn_str, a_units) params[2] = params[2] * ea_conv_factor params[0] = params[0] * a_conv_factor params = [params] # convert to list inside a list else: params = None return params
def _expand_group(group_str): if apf.has_match(group_ptt, group_str): count, part = ap_cast(apf.first_capture(group_ptt, group_str)) parts = [part] * count else: parts = [group_str] return parts
def high_p_parameters(rxn_dstr, ea_units, a_units): """ Parses the data string for a reaction in the reactions block for the line containing the chemical equation in order to read the fitting parameters that are on the same line. :param rxn_dstr: data string for species in reaction block :type rxn_dstr: str :return params: Arrhenius fitting parameters for high-P rates :rtype: list(float) """ pattern = _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, param_ptt=app.capturing(COEFF_PATTERN)) string_lst = apf.all_captures(pattern, rxn_dstr) if string_lst: fake_params = [] for string in string_lst: fake_params.append(list(ap_cast(string.split()))) params = fake_params[0] # Convert the units of Ea and A ea_conv_factor = get_ea_conv_factor(rxn_dstr, ea_units) a_conv_factor = get_a_conv_factor(rxn_dstr, a_units) params[2] = params[2] * ea_conv_factor params[0] = params[0] * a_conv_factor else: params = None return params
def _bond_stereo_parities(lyr_dct, one_indexed=False): """ Parse bond stereo parities from a given layer dictionary """ if 'b' not in lyr_dct: bnd_ste_dct = {} else: lyr = lyr_dct['b'] # Set up the parser integer = pp.Word(pp.nums) bond = integer + pp.Suppress('-') + integer parity = pp.Or(['+', '-']) term = pp.Group(pp.Group(bond) + parity) parser = pp.Opt(pp.delimitedList(term, delim=',')) # Do the parsing lst = ap_cast(parser.parseString(lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 bnd_ste_dct = { frozenset({k1 + shift, k2 + shift}): (p == '+') for (k1, k2), p in lst } return bnd_ste_dct
def stereo_bonds(ich, iso=True, one_indexed=False): """ Parse the stereo bonds from the stereochemistry layer. :param ich: InChI string :type ich: str :param iso: Include isotope stereochemistry? :type iso: bool :param one_indexed: Return indices in one-indexing? :type one_indexed: bool """ if len(split(ich)) > 1: raise NotImplementedError("Multicomponent InChIs not implemented." "Call inchi.split() first") bnd_ptt = '-'.join([app.capturing(app.UNSIGNED_INTEGER)] * 2) ste_dct = stereo_sublayers(ich) iso_dct = isotope_sublayers(ich) blyr = '' if 'b' in ste_dct: blyr += ste_dct['b'] if iso and 'b' in iso_dct: blyr += ',' + iso_dct['b'] bnds = () if blyr: bnds = ap_cast(apf.all_captures(bnd_ptt, blyr)) if not one_indexed: bnds = tuple((i - 1, j - 1) for i, j in bnds) bnds = bnds if bnds is not None else () return bnds
def high_p_parameters(rxn_dstr): """ high-pressure parameters """ pattern = _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, coeff_ptt=app.capturing(COEFF_PATTERN)) string_lst = apf.all_captures(pattern, rxn_dstr) vals = [] for string in string_lst: vals += ap_cast(string.split()) return vals
def high_p_parameters(rxn_dstr): """ high-pressure parameters """ pattern = _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, coeff_ptt=app.capturing(COEFF_PATTERN)) params_string = apf.first_capture(pattern, rxn_dstr) if params_string is not None: params = list(ap_cast(params_string.split())) else: params = None # string_lst = apf.all_captures(pattern, rxn_dstr) # if string_lst is not None: # params = [] # for string in string_lst: # params.append(ap_cast(string.split())) return params
def hydrogen_valences(chi, one_indexed=False): """ Determine the hydrogen valences of backbone atoms in a ChI string :param chi: ChI string :type chi: str :param one_indexed: use one-indexing? :type one_indexed: bool :returns: a dictionary of hydrogen valences, keyed by canonical index :rtype: dict[int: int] """ # Set up the parser integer = pp.Word(pp.nums) sep = '-' | pp.Suppress(',') block = integer + pp.ZeroOrMore(sep + integer) + 'H' + pp.Opt(integer) parser = pp.Opt(pp.Group(block) + pp.ZeroOrMore(sep + pp.Group(block))) # Do the parsing main_lyr_dct = main_layers(chi) nhyd_lyr = main_lyr_dct['h'] if 'h' in main_lyr_dct else '' nhyd_lsts = ap_cast(parser.parseString(nhyd_lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 all_idxs = canonical_indices(chi, one_indexed=one_indexed) nhyd_dct = dict_.by_key({}, all_idxs, fill_val=0) for nhyd_lst in nhyd_lsts: if isinstance(nhyd_lst[-1], int): nhyd = nhyd_lst[-1] nhyd_lst = nhyd_lst[:-2] else: nhyd = 1 nhyd_lst = nhyd_lst[:-1] lsts = list(map(list, automol.util.breakby(nhyd_lst, '-'))) idxs = lsts.pop(0) for lst in lsts: idxs.extend(range(idxs[-1] + 1, lst[0])) idxs.extend(lst) idxs = [k + shift for k in idxs] nhyd_dct.update({k: nhyd for k in idxs}) return nhyd_dct
def _atom_stereo_parities(lyr_dct, one_indexed=False): """ Parse atom stereo parities from a given layer dictionary """ if 't' not in lyr_dct: atm_ste_dct = {} else: lyr = lyr_dct['t'] # Set up the parser integer = pp.Word(pp.nums) parity = pp.Or(['+', '-']) term = pp.Group(integer + parity) parser = pp.Opt(pp.delimitedList(term, delim=',')) # Do the parsing lst = ap_cast(parser.parseString(lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 atm_ste_dct = {k + shift: (p == '+') for k, p in lst} return atm_ste_dct
def stereo_atoms(ich, iso=True, one_indexed=False): """ Parse the stereo atoms from the stereochemistry layer. :param ich: InChI string :type ich: str :param iso: Include isotope stereochemistry? :type iso: bool :param one_indexed: Return indices in one-indexing? :type one_indexed: bool """ if len(split(ich)) > 1: raise NotImplementedError("Multicomponent InChIs not implemented." "Call inchi.split() first") atm_ptt = (app.capturing(app.UNSIGNED_INTEGER) + app.one_of_these(list(map(app.escape, '+-')))) ste_dct = stereo_sublayers(ich) iso_dct = isotope_sublayers(ich) tlyr = '' if 't' in ste_dct: tlyr += ste_dct['t'] if iso and 't' in iso_dct: tlyr += ',' + iso_dct['t'] atms = () if tlyr: atms = ap_cast(apf.all_captures(atm_ptt, tlyr)) if not one_indexed: atms = tuple(i - 1 for i in atms) atms = atms if atms is not None else () return atms
def bonds(chi, one_indexed=False): """ Determine bonds between backbone atoms in a ChI string :param chi: ChI string :type chi: str :param one_indexed: use one-indexing? :type one_indexed: bool """ # Set up the pyparsing parser integer = pp.Word(pp.nums) chain = pp.delimitedList(integer, delim='-') chains = chain + pp.ZeroOrMore(',' + chain) side_chain = pp.nestedExpr('(', ')', content=chains) parser = pp.Opt(chain + pp.ZeroOrMore(side_chain + chain)) # Do the parsing. This produces a nested list of numbers and commas # mirroring the connection layer main_lyr_dct = main_layers(chi) conn_lyr = main_lyr_dct['c'] if 'c' in main_lyr_dct else '' conn_lst = list(ap_cast(parser.parseString(conn_lyr).asList())) shift = 0 if one_indexed else -1 def _recurse_find_bonds(bnds, conn_lst): # Pop the current idx idx = conn_lst.pop(0) + shift # If there are elements left, continue if conn_lst: # Look at the next element obj = conn_lst[0] # Deal with the case where obj is a sequence if isinstance(obj, abc.Sequence): # In this case, we have multiple branches # Pop the sequence obj = conn_lst.pop(0) # Split the sequence at commas lsts = automol.util.breakby(obj, ',') # Add bonds to the first element and continue the recursion for # each sub list from the split for lst in map(list, lsts): nei = lst[0] + shift bnds.add(frozenset({idx, nei})) _recurse_find_bonds(bnds, lst) # Now that the list has been dealt with, continue with the # element following it, which is also bonded to `idx` nei = conn_lst[0] + shift # Check that this is an integer (it should always be) assert isinstance( nei, int), (f"Something is wrong. {nei} should be an integer.") # Add the bond bnds.add(frozenset({idx, nei})) # Continue the recursion bnds = _recurse_find_bonds(bnds, conn_lst) # Deal with the case where obj is a number else: # In this case, we are continuing along a chain # Add the bond nei = obj + shift bnds.add(frozenset({idx, nei})) # Continue the recursion bnds = _recurse_find_bonds(bnds, conn_lst) return bnds bnds = _recurse_find_bonds(set(), conn_lst) return bnds
def _parse_sort_order_from_aux_info(aux_info): ptt = app.escape('/N:') + app.capturing( app.series(app.UNSIGNED_INTEGER, app.one_of_these(',;'))) num_strs = apf.first_capture(ptt, aux_info).split(';') nums_lst = ap_cast(tuple(s.split(',') for s in num_strs)) return nums_lst