Beispiel #1
0
def _bond_stereo_parities(lyr_dct, one_indexed=False):
    """ Parse bond stereo parities from a given layer dictionary
    """
    if 'b' not in lyr_dct:
        bnd_ste_dct = {}
    else:
        lyr = lyr_dct['b']

        # Set up the parser
        integer = pp.Word(pp.nums)
        bond = integer + pp.Suppress('-') + integer
        parity = pp.Or(['+', '-'])
        term = pp.Group(pp.Group(bond) + parity)
        parser = pp.Opt(pp.delimitedList(term, delim=','))

        # Do the parsing
        lst = ap_cast(parser.parseString(lyr).asList())

        # Interpret the list
        shift = 0 if one_indexed else -1
        bnd_ste_dct = {
            frozenset({k1 + shift, k2 + shift}): (p == '+')
            for (k1, k2), p in lst
        }
    return bnd_ste_dct
Beispiel #2
0
def hydrogen_valences(chi, one_indexed=False):
    """ Determine the hydrogen valences of backbone atoms in a ChI string

        :param chi: ChI string
        :type chi: str
        :param one_indexed: use one-indexing?
        :type one_indexed: bool
        :returns: a dictionary of hydrogen valences, keyed by canonical index
        :rtype: dict[int: int]
    """
    # Set up the parser
    integer = pp.Word(pp.nums)
    sep = '-' | pp.Suppress(',')
    block = integer + pp.ZeroOrMore(sep + integer) + 'H' + pp.Opt(integer)
    parser = pp.Opt(pp.Group(block) + pp.ZeroOrMore(sep + pp.Group(block)))

    # Do the parsing
    main_lyr_dct = main_layers(chi)
    nhyd_lyr = main_lyr_dct['h'] if 'h' in main_lyr_dct else ''
    nhyd_lsts = ap_cast(parser.parseString(nhyd_lyr).asList())

    # Interpret the list
    shift = 0 if one_indexed else -1
    all_idxs = canonical_indices(chi, one_indexed=one_indexed)
    nhyd_dct = dict_.by_key({}, all_idxs, fill_val=0)
    for nhyd_lst in nhyd_lsts:
        if isinstance(nhyd_lst[-1], int):
            nhyd = nhyd_lst[-1]
            nhyd_lst = nhyd_lst[:-2]
        else:
            nhyd = 1
            nhyd_lst = nhyd_lst[:-1]

        lsts = list(map(list, automol.util.breakby(nhyd_lst, '-')))
        idxs = lsts.pop(0)
        for lst in lsts:
            idxs.extend(range(idxs[-1] + 1, lst[0]))
            idxs.extend(lst)
        idxs = [k + shift for k in idxs]
        nhyd_dct.update({k: nhyd for k in idxs})

    return nhyd_dct
Beispiel #3
0
def _atom_stereo_parities(lyr_dct, one_indexed=False):
    """ Parse atom stereo parities from a given layer dictionary
    """
    if 't' not in lyr_dct:
        atm_ste_dct = {}
    else:
        lyr = lyr_dct['t']

        # Set up the parser
        integer = pp.Word(pp.nums)
        parity = pp.Or(['+', '-'])
        term = pp.Group(integer + parity)
        parser = pp.Opt(pp.delimitedList(term, delim=','))

        # Do the parsing
        lst = ap_cast(parser.parseString(lyr).asList())

        # Interpret the list
        shift = 0 if one_indexed else -1
        atm_ste_dct = {k + shift: (p == '+') for k, p in lst}
    return atm_ste_dct
Beispiel #4
0
def bonds(chi, one_indexed=False):
    """ Determine bonds between backbone atoms in a ChI string

        :param chi: ChI string
        :type chi: str
        :param one_indexed: use one-indexing?
        :type one_indexed: bool
    """
    # Set up the pyparsing parser
    integer = pp.Word(pp.nums)
    chain = pp.delimitedList(integer, delim='-')
    chains = chain + pp.ZeroOrMore(',' + chain)
    side_chain = pp.nestedExpr('(', ')', content=chains)
    parser = pp.Opt(chain + pp.ZeroOrMore(side_chain + chain))

    # Do the parsing. This produces a nested list of numbers and commas
    # mirroring the connection layer
    main_lyr_dct = main_layers(chi)
    conn_lyr = main_lyr_dct['c'] if 'c' in main_lyr_dct else ''
    conn_lst = list(ap_cast(parser.parseString(conn_lyr).asList()))

    shift = 0 if one_indexed else -1

    def _recurse_find_bonds(bnds, conn_lst):
        # Pop the current idx
        idx = conn_lst.pop(0) + shift

        # If there are elements left, continue
        if conn_lst:
            # Look at the next element
            obj = conn_lst[0]

            # Deal with the case where obj is a sequence
            if isinstance(obj, abc.Sequence):
                # In this case, we have multiple branches

                # Pop the sequence
                obj = conn_lst.pop(0)

                # Split the sequence at commas
                lsts = automol.util.breakby(obj, ',')

                # Add bonds to the first element and continue the recursion for
                # each sub list from the split
                for lst in map(list, lsts):
                    nei = lst[0] + shift
                    bnds.add(frozenset({idx, nei}))

                    _recurse_find_bonds(bnds, lst)

                # Now that the list has been dealt with, continue with the
                # element following it, which is also bonded to `idx`
                nei = conn_lst[0] + shift

                # Check that this is an integer (it should always be)
                assert isinstance(
                    nei,
                    int), (f"Something is wrong. {nei} should be an integer.")

                # Add the bond
                bnds.add(frozenset({idx, nei}))

                # Continue the recursion
                bnds = _recurse_find_bonds(bnds, conn_lst)
            # Deal with the case where obj is a number
            else:
                # In this case, we are continuing along a chain

                # Add the bond
                nei = obj + shift
                bnds.add(frozenset({idx, nei}))

                # Continue the recursion
                bnds = _recurse_find_bonds(bnds, conn_lst)

        return bnds

    bnds = _recurse_find_bonds(set(), conn_lst)

    return bnds
    return pp.MatchFirst(
        define_numeric_word(name, value) for name, value in zip(names, values))


units = define_numeric_word_range(
    "one two three four five six seven eight nine", 1, 9).set_name("units")
teens = define_numeric_word_range(
    "ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen",
    10,
    19,
).set_name("teens")
tens = define_numeric_word_range(
    "twenty thirty forty fifty sixty seventy eighty ninety", 20, 90,
    step=10).set_name("tens")

opt_dash = pp.Opt(pp.Suppress("-"))
twenty_to_99 = tens + pp.Opt(opt_dash + units)

one_to_99 = (units | teens | twenty_to_99).set_name("1-99")

# for expressions that parse multiple values, add them up
one_to_99.add_parse_action(sum)

numeric_expression = one_to_99

if __name__ == "__main__":
    numeric_expression.run_tests("""
        one
        seven
        twelve
        twenty six
Beispiel #6
0
ligature_transformer = pp.oneOf(ligature_map).add_parse_action(
    lambda t: random.choice(ligature_map[t[0]].split()))


def make_mixed_font(t):
    t_0 = t[0][0]
    ret = ['_' if t_0 == '_' else random.choice(ident_char_map.get(t_0, t_0))]
    t_rest = ligature_transformer.transform_string(t[0][1:])
    ret.extend(random.choice(ident_char_map.get(c, c)) for c in t_rest)
    return ''.join(ret)


identifier = pp.pyparsing_common.identifier
identifier.add_parse_action(make_mixed_font)

python_quoted_string = pp.Opt(pp.Char("fF")("f_string_prefix")) + (
    pp.quotedString
    | pp.QuotedString('"""', multiline=True, unquoteResults=False)
    | pp.QuotedString("'''", multiline=True,
                      unquoteResults=False))("quoted_string_body")


def mix_fstring_expressions(t):
    if not t.f_string_prefix:
        return
    fstring_arg = pp.QuotedString("{", end_quote_char="}")
    fstring_arg.add_parse_action(
        lambda tt: "{" + transformer.transform_string(tt[0]) + "}")
    ret = t.f_string_prefix + fstring_arg.transform_string(
        t.quoted_string_body)
    return ret