def _bond_stereo_parities(lyr_dct, one_indexed=False): """ Parse bond stereo parities from a given layer dictionary """ if 'b' not in lyr_dct: bnd_ste_dct = {} else: lyr = lyr_dct['b'] # Set up the parser integer = pp.Word(pp.nums) bond = integer + pp.Suppress('-') + integer parity = pp.Or(['+', '-']) term = pp.Group(pp.Group(bond) + parity) parser = pp.Opt(pp.delimitedList(term, delim=',')) # Do the parsing lst = ap_cast(parser.parseString(lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 bnd_ste_dct = { frozenset({k1 + shift, k2 + shift}): (p == '+') for (k1, k2), p in lst } return bnd_ste_dct
def hydrogen_valences(chi, one_indexed=False): """ Determine the hydrogen valences of backbone atoms in a ChI string :param chi: ChI string :type chi: str :param one_indexed: use one-indexing? :type one_indexed: bool :returns: a dictionary of hydrogen valences, keyed by canonical index :rtype: dict[int: int] """ # Set up the parser integer = pp.Word(pp.nums) sep = '-' | pp.Suppress(',') block = integer + pp.ZeroOrMore(sep + integer) + 'H' + pp.Opt(integer) parser = pp.Opt(pp.Group(block) + pp.ZeroOrMore(sep + pp.Group(block))) # Do the parsing main_lyr_dct = main_layers(chi) nhyd_lyr = main_lyr_dct['h'] if 'h' in main_lyr_dct else '' nhyd_lsts = ap_cast(parser.parseString(nhyd_lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 all_idxs = canonical_indices(chi, one_indexed=one_indexed) nhyd_dct = dict_.by_key({}, all_idxs, fill_val=0) for nhyd_lst in nhyd_lsts: if isinstance(nhyd_lst[-1], int): nhyd = nhyd_lst[-1] nhyd_lst = nhyd_lst[:-2] else: nhyd = 1 nhyd_lst = nhyd_lst[:-1] lsts = list(map(list, automol.util.breakby(nhyd_lst, '-'))) idxs = lsts.pop(0) for lst in lsts: idxs.extend(range(idxs[-1] + 1, lst[0])) idxs.extend(lst) idxs = [k + shift for k in idxs] nhyd_dct.update({k: nhyd for k in idxs}) return nhyd_dct
def _atom_stereo_parities(lyr_dct, one_indexed=False): """ Parse atom stereo parities from a given layer dictionary """ if 't' not in lyr_dct: atm_ste_dct = {} else: lyr = lyr_dct['t'] # Set up the parser integer = pp.Word(pp.nums) parity = pp.Or(['+', '-']) term = pp.Group(integer + parity) parser = pp.Opt(pp.delimitedList(term, delim=',')) # Do the parsing lst = ap_cast(parser.parseString(lyr).asList()) # Interpret the list shift = 0 if one_indexed else -1 atm_ste_dct = {k + shift: (p == '+') for k, p in lst} return atm_ste_dct
def bonds(chi, one_indexed=False): """ Determine bonds between backbone atoms in a ChI string :param chi: ChI string :type chi: str :param one_indexed: use one-indexing? :type one_indexed: bool """ # Set up the pyparsing parser integer = pp.Word(pp.nums) chain = pp.delimitedList(integer, delim='-') chains = chain + pp.ZeroOrMore(',' + chain) side_chain = pp.nestedExpr('(', ')', content=chains) parser = pp.Opt(chain + pp.ZeroOrMore(side_chain + chain)) # Do the parsing. This produces a nested list of numbers and commas # mirroring the connection layer main_lyr_dct = main_layers(chi) conn_lyr = main_lyr_dct['c'] if 'c' in main_lyr_dct else '' conn_lst = list(ap_cast(parser.parseString(conn_lyr).asList())) shift = 0 if one_indexed else -1 def _recurse_find_bonds(bnds, conn_lst): # Pop the current idx idx = conn_lst.pop(0) + shift # If there are elements left, continue if conn_lst: # Look at the next element obj = conn_lst[0] # Deal with the case where obj is a sequence if isinstance(obj, abc.Sequence): # In this case, we have multiple branches # Pop the sequence obj = conn_lst.pop(0) # Split the sequence at commas lsts = automol.util.breakby(obj, ',') # Add bonds to the first element and continue the recursion for # each sub list from the split for lst in map(list, lsts): nei = lst[0] + shift bnds.add(frozenset({idx, nei})) _recurse_find_bonds(bnds, lst) # Now that the list has been dealt with, continue with the # element following it, which is also bonded to `idx` nei = conn_lst[0] + shift # Check that this is an integer (it should always be) assert isinstance( nei, int), (f"Something is wrong. {nei} should be an integer.") # Add the bond bnds.add(frozenset({idx, nei})) # Continue the recursion bnds = _recurse_find_bonds(bnds, conn_lst) # Deal with the case where obj is a number else: # In this case, we are continuing along a chain # Add the bond nei = obj + shift bnds.add(frozenset({idx, nei})) # Continue the recursion bnds = _recurse_find_bonds(bnds, conn_lst) return bnds bnds = _recurse_find_bonds(set(), conn_lst) return bnds
return pp.MatchFirst( define_numeric_word(name, value) for name, value in zip(names, values)) units = define_numeric_word_range( "one two three four five six seven eight nine", 1, 9).set_name("units") teens = define_numeric_word_range( "ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen", 10, 19, ).set_name("teens") tens = define_numeric_word_range( "twenty thirty forty fifty sixty seventy eighty ninety", 20, 90, step=10).set_name("tens") opt_dash = pp.Opt(pp.Suppress("-")) twenty_to_99 = tens + pp.Opt(opt_dash + units) one_to_99 = (units | teens | twenty_to_99).set_name("1-99") # for expressions that parse multiple values, add them up one_to_99.add_parse_action(sum) numeric_expression = one_to_99 if __name__ == "__main__": numeric_expression.run_tests(""" one seven twelve twenty six
ligature_transformer = pp.oneOf(ligature_map).add_parse_action( lambda t: random.choice(ligature_map[t[0]].split())) def make_mixed_font(t): t_0 = t[0][0] ret = ['_' if t_0 == '_' else random.choice(ident_char_map.get(t_0, t_0))] t_rest = ligature_transformer.transform_string(t[0][1:]) ret.extend(random.choice(ident_char_map.get(c, c)) for c in t_rest) return ''.join(ret) identifier = pp.pyparsing_common.identifier identifier.add_parse_action(make_mixed_font) python_quoted_string = pp.Opt(pp.Char("fF")("f_string_prefix")) + ( pp.quotedString | pp.QuotedString('"""', multiline=True, unquoteResults=False) | pp.QuotedString("'''", multiline=True, unquoteResults=False))("quoted_string_body") def mix_fstring_expressions(t): if not t.f_string_prefix: return fstring_arg = pp.QuotedString("{", end_quote_char="}") fstring_arg.add_parse_action( lambda tt: "{" + transformer.transform_string(tt[0]) + "}") ret = t.f_string_prefix + fstring_arg.transform_string( t.quoted_string_body) return ret