Exemple #1
0
def test_states():
    d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True])
    assert d.states == "\x01\xff\xff\x00"
    assert d.defaults == "\xff\xff"
    assert d.max_char == 2

    d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True])
    assert d.states == "\x01\x00"
    assert d.defaults == "\xff\x00"
    assert d.max_char == 1
Exemple #2
0
def test_nonascii():
    d = DFA([{"a": 1}, {NON_ASCII: 1}], [False, True])
    input = u"aüüüü".encode("utf-8")
    assert d.recognize(input) == len(input)
    assert d.recognize("c") == -1
    assert d.recognize("ü") == -1

    d = NonGreedyDFA([{NON_ASCII: 0, "b": 1}, {"b": 0}], [False, True])
    input = u"üübbbb".encode("utf-8")
    assert d.recognize(input) == len(u"üüb".encode("utf-8"))
    assert d.recognize("c") == -1

    pytest.raises(ValueError, DFA, [{"\x81": 2}], [True])
Exemple #3
0
def makePyEndDFAMap():
    states = []
    single = chain(
        states, any(states, notGroupStr(states, "'\\")),
        any(
            states,
            chain(states, newArcPair(states, "\\"),
                  newArcPair(states, DEFAULT),
                  any(states, notGroupStr(states, "'\\")))),
        newArcPair(states, "'"))
    singleDFA = DFA(*nfaToDfa(states, *single))
    states = []
    double = chain(
        states, any(states, notGroupStr(states, '"\\')),
        any(
            states,
            chain(states, newArcPair(states, "\\"),
                  newArcPair(states, DEFAULT),
                  any(states, notGroupStr(states, '"\\')))),
        newArcPair(states, '"'))
    doubleDFA = DFA(*nfaToDfa(states, *double))
    states = []
    single3 = chain(
        states, any(states, notGroupStr(states, "'\\")),
        any(
            states,
            chain(
                states,
                group(
                    states,
                    chain(states, newArcPair(states, "\\"),
                          newArcPair(states, DEFAULT)),
                    chain(states, newArcPair(states, "'"),
                          notChainStr(states, "''"))),
                any(states, notGroupStr(states, "'\\")))),
        chainStr(states, "'''"))
    single3DFA = NonGreedyDFA(*nfaToDfa(states, *single3))
    states = []
    double3 = chain(
        states, any(states, notGroupStr(states, '"\\')),
        any(
            states,
            chain(
                states,
                group(
                    states,
                    chain(states, newArcPair(states, "\\"),
                          newArcPair(states, DEFAULT)),
                    chain(states, newArcPair(states, '"'),
                          notChainStr(states, '""'))),
                any(states, notGroupStr(states, '"\\')))),
        chainStr(states, '"""'))
    double3DFA = NonGreedyDFA(*nfaToDfa(states, *double3))
    map = {
        "'": singleDFA,
        '"': doubleDFA,
        "r": None,
        "R": None,
        "u": None,
        "U": None,
        "b": None,
        "B": None
    }
    for uniPrefix in (
            "",
            "u",
            "U",
            "b",
            "B",
    ):
        for rawPrefix in ("", "r", "R"):
            prefix = uniPrefix + rawPrefix
            map[prefix + "'''"] = single3DFA
            map[prefix + '"""'] = double3DFA
    return map
Exemple #4
0
def makePyPseudoDFA():
    import string
    states = []

    def makeEOL():
        return group(
            states, newArcPair(states, "\n"),
            chain(states, newArcPair(states, "\r"),
                  maybe(states, newArcPair(states, "\n"))))

    # ____________________________________________________________
    def makeLineCont():
        return chain(states, newArcPair(states, "\\"), makeEOL())

    # ____________________________________________________________
    # Ignore stuff
    def makeWhitespace():
        return any(states, groupStr(states, " \f\t"))

    # ____________________________________________________________
    def makeComment():
        return chain(states, newArcPair(states, "#"),
                     any(states, notGroupStr(states, "\r\n")))

    # ____________________________________________________________
    #ignore = chain(states,
    #               makeWhitespace(),
    #               any(states, chain(states,
    #                                 makeLineCont(),
    #                                 makeWhitespace())),
    #               maybe(states, makeComment()))
    # ____________________________________________________________
    # Names
    name = chain(
        states, groupStr(states, string.letters + "_"),
        any(states, groupStr(states, string.letters + string.digits + "_")))

    # ____________________________________________________________
    # Digits
    def makeDigits():
        return groupStr(states, "0123456789")

    # ____________________________________________________________
    # Integer numbers
    hexNumber = chain(
        states, newArcPair(states, "0"), groupStr(states, "xX"),
        atleastonce(states, groupStr(states, "0123456789abcdefABCDEF")),
        maybe(states, groupStr(states, "lL")))
    octNumber = chain(
        states, newArcPair(states, "0"),
        maybe(
            states,
            chain(states, groupStr(states, "oO"), groupStr(states,
                                                           "01234567"))),
        any(states, groupStr(states, "01234567")),
        maybe(states, groupStr(states, "lL")))
    binNumber = chain(states, newArcPair(states, "0"), groupStr(states, "bB"),
                      atleastonce(states, groupStr(states, "01")),
                      maybe(states, groupStr(states, "lL")))
    decNumber = chain(states, groupStr(states, "123456789"),
                      any(states, makeDigits()),
                      maybe(states, groupStr(states, "lL")))
    intNumber = group(states, hexNumber, octNumber, binNumber, decNumber)

    # ____________________________________________________________
    # Exponents
    def makeExp():
        return chain(states, groupStr(states, "eE"),
                     maybe(states, groupStr(states, "+-")),
                     atleastonce(states, makeDigits()))

    # ____________________________________________________________
    # Floating point numbers
    def makeFloat():
        pointFloat = chain(
            states,
            group(
                states,
                chain(states, atleastonce(states, makeDigits()),
                      newArcPair(states, "."), any(states, makeDigits())),
                chain(states, newArcPair(states, "."),
                      atleastonce(states, makeDigits()))),
            maybe(states, makeExp()))
        expFloat = chain(states, atleastonce(states, makeDigits()), makeExp())
        return group(states, pointFloat, expFloat)

    # ____________________________________________________________
    # Imaginary numbers
    imagNumber = group(
        states,
        chain(states, atleastonce(states, makeDigits()),
              groupStr(states, "jJ")),
        chain(states, makeFloat(), groupStr(states, "jJ")))
    # ____________________________________________________________
    # Any old number.
    number = group(states, imagNumber, makeFloat(), intNumber)
    # ____________________________________________________________
    # Funny
    operator = group(
        states,
        chain(states, chainStr(states, "**"),
              maybe(states, newArcPair(states, "="))),
        chain(states, chainStr(states, ">>"),
              maybe(states, newArcPair(states, "="))),
        chain(states, chainStr(states, "<<"),
              maybe(states, newArcPair(states, "="))), chainStr(states, "<>"),
        chainStr(states, "!="),
        chain(states, chainStr(states, "//"),
              maybe(states, newArcPair(states, "="))),
        chain(states, groupStr(states, "+-*/%&|^=<>"),
              maybe(states, newArcPair(states, "="))), newArcPair(states, "~"))
    bracket = groupStr(states, "[](){}")
    special = group(states, makeEOL(), groupStr(states, "@:;.,`"))
    funny = group(states, operator, bracket, special)

    # ____________________________________________________________
    def makeStrPrefix():
        return chain(states, maybe(states, groupStr(states, "uUbB")),
                     maybe(states, groupStr(states, "rR")))

    # ____________________________________________________________
    contStr = group(
        states,
        chain(
            states, makeStrPrefix(), newArcPair(states, "'"),
            any(states, notGroupStr(states, "\r\n'\\")),
            any(
                states,
                chain(states, newArcPair(states, "\\"),
                      newArcPair(states, DEFAULT),
                      any(states, notGroupStr(states, "\r\n'\\")))),
            group(states, newArcPair(states, "'"), makeLineCont())),
        chain(
            states, makeStrPrefix(), newArcPair(states, '"'),
            any(states, notGroupStr(states, '\r\n"\\')),
            any(
                states,
                chain(states, newArcPair(states, "\\"),
                      newArcPair(states, DEFAULT),
                      any(states, notGroupStr(states, '\r\n"\\')))),
            group(states, newArcPair(states, '"'), makeLineCont())))
    triple = chain(
        states, makeStrPrefix(),
        group(states, chainStr(states, "'''"), chainStr(states, '"""')))
    pseudoExtras = group(states, makeLineCont(), makeComment(), triple)
    pseudoToken = chain(
        states, makeWhitespace(),
        group(states, newArcPair(states, EMPTY), pseudoExtras, number, funny,
              contStr, name))
    dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)
    return DFA(dfaStates, dfaAccepts)
Exemple #5
0
def makePyEndDFAMap():
    states = []
    single = chain(
        states, any(states, notGroupStr(states, "'\\")),
        any(
            states,
            chain(states, newArcPair(states, "\\"),
                  newArcPair(states, DEFAULT),
                  any(states, notGroupStr(states, "'\\")))),
        newArcPair(states, "'"))
    states, accepts = nfaToDfa(states, *single)
    singleDFA = DFA(states, accepts)
    states_singleDFA = states
    states = []
    double = chain(
        states, any(states, notGroupStr(states, '"\\')),
        any(
            states,
            chain(states, newArcPair(states, "\\"),
                  newArcPair(states, DEFAULT),
                  any(states, notGroupStr(states, '"\\')))),
        newArcPair(states, '"'))
    states, accepts = nfaToDfa(states, *double)
    doubleDFA = DFA(states, accepts)
    states_doubleDFA = states
    states = []
    single3 = chain(
        states, any(states, notGroupStr(states, "'\\")),
        any(
            states,
            chain(
                states,
                group(
                    states,
                    chain(states, newArcPair(states, "\\"),
                          newArcPair(states, DEFAULT)),
                    chain(states, newArcPair(states, "'"),
                          notChainStr(states, "''"))),
                any(states, notGroupStr(states, "'\\")))),
        chainStr(states, "'''"))
    states, accepts = nfaToDfa(states, *single3)
    single3DFA = NonGreedyDFA(states, accepts)
    states_single3DFA = states
    states = []
    double3 = chain(
        states, any(states, notGroupStr(states, '"\\')),
        any(
            states,
            chain(
                states,
                group(
                    states,
                    chain(states, newArcPair(states, "\\"),
                          newArcPair(states, DEFAULT)),
                    chain(states, newArcPair(states, '"'),
                          notChainStr(states, '""'))),
                any(states, notGroupStr(states, '"\\')))),
        chainStr(states, '"""'))
    states, accepts = nfaToDfa(states, *double3)
    double3DFA = NonGreedyDFA(states, accepts)
    states_double3DFA = states
    return {
        "'": (singleDFA, states_singleDFA),
        '"': (doubleDFA, states_doubleDFA),
        "'''": (single3DFA, states_single3DFA),
        '"""': (double3DFA, states_double3DFA)
    }
Exemple #6
0
def test_states():
    states = [{"\x00": 1}, {"\x01": 0}]
    d = DFA(states[:], [False, True])
    assert output('test', DFA, d, states) == """\
Exemple #7
0
def makePyPseudoDFA():
    import string
    states = []

    def makeEOL():
        return group(
            states, newArcPair(states, "\n"),
            chain(states, newArcPair(states, "\r"),
                  maybe(states, newArcPair(states, "\n"))))

    # ____________________________________________________________
    def makeLineCont():
        return chain(states, newArcPair(states, "\\"), makeEOL())

    # ____________________________________________________________
    # Ignore stuff
    def makeWhitespace():
        return any(states, groupStr(states, " \f\t"))

    # ____________________________________________________________
    def makeComment():
        return chain(states, newArcPair(states, "#"),
                     any(states, notGroupStr(states, "\r\n")))

    # ____________________________________________________________
    #ignore = chain(states,
    #               makeWhitespace(),
    #               any(states, chain(states,
    #                                 makeLineCont(),
    #                                 makeWhitespace())),
    #               maybe(states, makeComment()))
    # ____________________________________________________________
    # Names
    name = chain(
        states, groupStr(states, string.letters + "_"),
        any(states, groupStr(states, string.letters + string.digits + "_")))

    # ____________________________________________________________
    # Digits
    def makeDigits():
        return groupStr(states, "0123456789")

    # ____________________________________________________________
    # Integer numbers
    hexNumber = chain(
        states, newArcPair(states, "0"), groupStr(states, "xX"),
        atleastonce(states, groupStr(states, "0123456789abcdefABCDEF")),
        maybe(states, groupStr(states, "lL")))
    octNumber = chain(
        states, newArcPair(states, "0"),
        maybe(
            states,
            chain(states, groupStr(states, "oO"), groupStr(states,
                                                           "01234567"))),
        any(states, groupStr(states, "01234567")),
        maybe(states, groupStr(states, "lL")))
    binNumber = chain(states, newArcPair(states, "0"), groupStr(states, "bB"),
                      atleastonce(states, groupStr(states, "01")),
                      maybe(states, groupStr(states, "lL")))
    decNumber = chain(states, groupStr(states, "123456789"),
                      any(states, makeDigits()),
                      maybe(states, groupStr(states, "lL")))
    intNumber = group(states, hexNumber, octNumber, binNumber, decNumber)

    # ____________________________________________________________
    # Exponents
    def makeExp():
        return chain(states, groupStr(states, "eE"),
                     maybe(states, groupStr(states, "+-")),
                     atleastonce(states, makeDigits()))

    # ____________________________________________________________
    # Floating point numbers
    def makeFloat():
        pointFloat = chain(
            states,
            group(
                states,
                chain(states, atleastonce(states, makeDigits()),
                      newArcPair(states, "."), any(states, makeDigits())),
                chain(states, newArcPair(states, "."),
                      atleastonce(states, makeDigits()))),
            maybe(states, makeExp()))
        expFloat = chain(states, atleastonce(states, makeDigits()), makeExp())
        return group(states, pointFloat, expFloat)

    # ____________________________________________________________
    # Imaginary numbers
    imagNumber = group(
        states,
        chain(states, atleastonce(states, makeDigits()),
              groupStr(states, "jJ")),
        chain(states, makeFloat(), groupStr(states, "jJ")))
    # ____________________________________________________________
    # Any old number.
    number = group(states, imagNumber, makeFloat(), intNumber)
    # ____________________________________________________________
    # Funny
    # generate from pytoken
    funny = []
    for op in sorted(pytoken.python_opmap):
        if op == "$NUM":
            continue
        funny.append(chainStr(states, op))
    revdb_metavar = chain(states, groupStr(states, "$"),
                          atleastonce(states, makeDigits()))
    funny.append(revdb_metavar)
    funny.append(makeEOL())
    funny = group(states, *funny)

    # ____________________________________________________________
    def makeStrPrefix():
        return chain(states, maybe(states, groupStr(states, "uUbB")),
                     maybe(states, groupStr(states, "rR")))

    # ____________________________________________________________
    contStr = group(
        states,
        chain(
            states, makeStrPrefix(), newArcPair(states, "'"),
            any(states, notGroupStr(states, "\r\n'\\")),
            any(
                states,
                chain(states, newArcPair(states, "\\"),
                      newArcPair(states, DEFAULT),
                      any(states, notGroupStr(states, "\r\n'\\")))),
            group(states, newArcPair(states, "'"), makeLineCont())),
        chain(
            states, makeStrPrefix(), newArcPair(states, '"'),
            any(states, notGroupStr(states, '\r\n"\\')),
            any(
                states,
                chain(states, newArcPair(states, "\\"),
                      newArcPair(states, DEFAULT),
                      any(states, notGroupStr(states, '\r\n"\\')))),
            group(states, newArcPair(states, '"'), makeLineCont())))
    triple = chain(
        states, makeStrPrefix(),
        group(states, chainStr(states, "'''"), chainStr(states, '"""')))
    pseudoExtras = group(states, makeLineCont(), makeComment(), triple)
    pseudoToken = chain(
        states, makeWhitespace(),
        group(states, newArcPair(states, EMPTY), pseudoExtras, number, funny,
              contStr, name))
    dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)
    return DFA(dfaStates, dfaAccepts), dfaStates
Exemple #8
0
def test_recognize():
    d = DFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 5
    assert d.recognize("c") == -1

    d = DFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 5
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {"b": 0}], [False, True])
    assert d.recognize("ababab") == 1
    assert d.recognize("c") == -1

    d = NonGreedyDFA([{"a": 1}, {DEFAULT: 0}], [False, True])
    assert d.recognize("a,a?ab") == 1
    assert d.recognize("c") == -1