def test_states(): d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True]) assert d.states == "\x01\xff\xff\x00" assert d.defaults == "\xff\xff" assert d.max_char == 2 d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True]) assert d.states == "\x01\x00" assert d.defaults == "\xff\x00" assert d.max_char == 1
def test_nonascii(): d = DFA([{"a": 1}, {NON_ASCII: 1}], [False, True]) input = u"aüüüü".encode("utf-8") assert d.recognize(input) == len(input) assert d.recognize("c") == -1 assert d.recognize("ü") == -1 d = NonGreedyDFA([{NON_ASCII: 0, "b": 1}, {"b": 0}], [False, True]) input = u"üübbbb".encode("utf-8") assert d.recognize(input) == len(u"üüb".encode("utf-8")) assert d.recognize("c") == -1 pytest.raises(ValueError, DFA, [{"\x81": 2}], [True])
def makePyEndDFAMap(): states = [] single = chain( states, any(states, notGroupStr(states, "'\\")), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, "'\\")))), newArcPair(states, "'")) singleDFA = DFA(*nfaToDfa(states, *single)) states = [] double = chain( states, any(states, notGroupStr(states, '"\\')), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, '"\\')))), newArcPair(states, '"')) doubleDFA = DFA(*nfaToDfa(states, *double)) states = [] single3 = chain( states, any(states, notGroupStr(states, "'\\")), any( states, chain( states, group( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT)), chain(states, newArcPair(states, "'"), notChainStr(states, "''"))), any(states, notGroupStr(states, "'\\")))), chainStr(states, "'''")) single3DFA = NonGreedyDFA(*nfaToDfa(states, *single3)) states = [] double3 = chain( states, any(states, notGroupStr(states, '"\\')), any( states, chain( states, group( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT)), chain(states, newArcPair(states, '"'), notChainStr(states, '""'))), any(states, notGroupStr(states, '"\\')))), chainStr(states, '"""')) double3DFA = NonGreedyDFA(*nfaToDfa(states, *double3)) map = { "'": singleDFA, '"': doubleDFA, "r": None, "R": None, "u": None, "U": None, "b": None, "B": None } for uniPrefix in ( "", "u", "U", "b", "B", ): for rawPrefix in ("", "r", "R"): prefix = uniPrefix + rawPrefix map[prefix + "'''"] = single3DFA map[prefix + '"""'] = double3DFA return map
def makePyPseudoDFA(): import string states = [] def makeEOL(): return group( states, newArcPair(states, "\n"), chain(states, newArcPair(states, "\r"), maybe(states, newArcPair(states, "\n")))) # ____________________________________________________________ def makeLineCont(): return chain(states, newArcPair(states, "\\"), makeEOL()) # ____________________________________________________________ # Ignore stuff def makeWhitespace(): return any(states, groupStr(states, " \f\t")) # ____________________________________________________________ def makeComment(): return chain(states, newArcPair(states, "#"), any(states, notGroupStr(states, "\r\n"))) # ____________________________________________________________ #ignore = chain(states, # makeWhitespace(), # any(states, chain(states, # makeLineCont(), # makeWhitespace())), # maybe(states, makeComment())) # ____________________________________________________________ # Names name = chain( states, groupStr(states, string.letters + "_"), any(states, groupStr(states, string.letters + string.digits + "_"))) # ____________________________________________________________ # Digits def makeDigits(): return groupStr(states, "0123456789") # ____________________________________________________________ # Integer numbers hexNumber = chain( states, newArcPair(states, "0"), groupStr(states, "xX"), atleastonce(states, groupStr(states, "0123456789abcdefABCDEF")), maybe(states, groupStr(states, "lL"))) octNumber = chain( states, newArcPair(states, "0"), maybe( states, chain(states, groupStr(states, "oO"), groupStr(states, "01234567"))), any(states, groupStr(states, "01234567")), maybe(states, groupStr(states, "lL"))) binNumber = chain(states, newArcPair(states, "0"), groupStr(states, "bB"), atleastonce(states, groupStr(states, "01")), maybe(states, groupStr(states, "lL"))) decNumber = chain(states, groupStr(states, "123456789"), any(states, makeDigits()), maybe(states, groupStr(states, "lL"))) intNumber = group(states, hexNumber, octNumber, binNumber, decNumber) # ____________________________________________________________ # Exponents def makeExp(): return chain(states, groupStr(states, "eE"), maybe(states, groupStr(states, "+-")), atleastonce(states, makeDigits())) # ____________________________________________________________ # Floating point numbers def makeFloat(): pointFloat = chain( states, group( states, chain(states, atleastonce(states, makeDigits()), newArcPair(states, "."), any(states, makeDigits())), chain(states, newArcPair(states, "."), atleastonce(states, makeDigits()))), maybe(states, makeExp())) expFloat = chain(states, atleastonce(states, makeDigits()), makeExp()) return group(states, pointFloat, expFloat) # ____________________________________________________________ # Imaginary numbers imagNumber = group( states, chain(states, atleastonce(states, makeDigits()), groupStr(states, "jJ")), chain(states, makeFloat(), groupStr(states, "jJ"))) # ____________________________________________________________ # Any old number. number = group(states, imagNumber, makeFloat(), intNumber) # ____________________________________________________________ # Funny operator = group( states, chain(states, chainStr(states, "**"), maybe(states, newArcPair(states, "="))), chain(states, chainStr(states, ">>"), maybe(states, newArcPair(states, "="))), chain(states, chainStr(states, "<<"), maybe(states, newArcPair(states, "="))), chainStr(states, "<>"), chainStr(states, "!="), chain(states, chainStr(states, "//"), maybe(states, newArcPair(states, "="))), chain(states, groupStr(states, "+-*/%&|^=<>"), maybe(states, newArcPair(states, "="))), newArcPair(states, "~")) bracket = groupStr(states, "[](){}") special = group(states, makeEOL(), groupStr(states, "@:;.,`")) funny = group(states, operator, bracket, special) # ____________________________________________________________ def makeStrPrefix(): return chain(states, maybe(states, groupStr(states, "uUbB")), maybe(states, groupStr(states, "rR"))) # ____________________________________________________________ contStr = group( states, chain( states, makeStrPrefix(), newArcPair(states, "'"), any(states, notGroupStr(states, "\r\n'\\")), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, "\r\n'\\")))), group(states, newArcPair(states, "'"), makeLineCont())), chain( states, makeStrPrefix(), newArcPair(states, '"'), any(states, notGroupStr(states, '\r\n"\\')), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, '\r\n"\\')))), group(states, newArcPair(states, '"'), makeLineCont()))) triple = chain( states, makeStrPrefix(), group(states, chainStr(states, "'''"), chainStr(states, '"""'))) pseudoExtras = group(states, makeLineCont(), makeComment(), triple) pseudoToken = chain( states, makeWhitespace(), group(states, newArcPair(states, EMPTY), pseudoExtras, number, funny, contStr, name)) dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken) return DFA(dfaStates, dfaAccepts)
def makePyEndDFAMap(): states = [] single = chain( states, any(states, notGroupStr(states, "'\\")), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, "'\\")))), newArcPair(states, "'")) states, accepts = nfaToDfa(states, *single) singleDFA = DFA(states, accepts) states_singleDFA = states states = [] double = chain( states, any(states, notGroupStr(states, '"\\')), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, '"\\')))), newArcPair(states, '"')) states, accepts = nfaToDfa(states, *double) doubleDFA = DFA(states, accepts) states_doubleDFA = states states = [] single3 = chain( states, any(states, notGroupStr(states, "'\\")), any( states, chain( states, group( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT)), chain(states, newArcPair(states, "'"), notChainStr(states, "''"))), any(states, notGroupStr(states, "'\\")))), chainStr(states, "'''")) states, accepts = nfaToDfa(states, *single3) single3DFA = NonGreedyDFA(states, accepts) states_single3DFA = states states = [] double3 = chain( states, any(states, notGroupStr(states, '"\\')), any( states, chain( states, group( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT)), chain(states, newArcPair(states, '"'), notChainStr(states, '""'))), any(states, notGroupStr(states, '"\\')))), chainStr(states, '"""')) states, accepts = nfaToDfa(states, *double3) double3DFA = NonGreedyDFA(states, accepts) states_double3DFA = states return { "'": (singleDFA, states_singleDFA), '"': (doubleDFA, states_doubleDFA), "'''": (single3DFA, states_single3DFA), '"""': (double3DFA, states_double3DFA) }
def test_states(): states = [{"\x00": 1}, {"\x01": 0}] d = DFA(states[:], [False, True]) assert output('test', DFA, d, states) == """\
def makePyPseudoDFA(): import string states = [] def makeEOL(): return group( states, newArcPair(states, "\n"), chain(states, newArcPair(states, "\r"), maybe(states, newArcPair(states, "\n")))) # ____________________________________________________________ def makeLineCont(): return chain(states, newArcPair(states, "\\"), makeEOL()) # ____________________________________________________________ # Ignore stuff def makeWhitespace(): return any(states, groupStr(states, " \f\t")) # ____________________________________________________________ def makeComment(): return chain(states, newArcPair(states, "#"), any(states, notGroupStr(states, "\r\n"))) # ____________________________________________________________ #ignore = chain(states, # makeWhitespace(), # any(states, chain(states, # makeLineCont(), # makeWhitespace())), # maybe(states, makeComment())) # ____________________________________________________________ # Names name = chain( states, groupStr(states, string.letters + "_"), any(states, groupStr(states, string.letters + string.digits + "_"))) # ____________________________________________________________ # Digits def makeDigits(): return groupStr(states, "0123456789") # ____________________________________________________________ # Integer numbers hexNumber = chain( states, newArcPair(states, "0"), groupStr(states, "xX"), atleastonce(states, groupStr(states, "0123456789abcdefABCDEF")), maybe(states, groupStr(states, "lL"))) octNumber = chain( states, newArcPair(states, "0"), maybe( states, chain(states, groupStr(states, "oO"), groupStr(states, "01234567"))), any(states, groupStr(states, "01234567")), maybe(states, groupStr(states, "lL"))) binNumber = chain(states, newArcPair(states, "0"), groupStr(states, "bB"), atleastonce(states, groupStr(states, "01")), maybe(states, groupStr(states, "lL"))) decNumber = chain(states, groupStr(states, "123456789"), any(states, makeDigits()), maybe(states, groupStr(states, "lL"))) intNumber = group(states, hexNumber, octNumber, binNumber, decNumber) # ____________________________________________________________ # Exponents def makeExp(): return chain(states, groupStr(states, "eE"), maybe(states, groupStr(states, "+-")), atleastonce(states, makeDigits())) # ____________________________________________________________ # Floating point numbers def makeFloat(): pointFloat = chain( states, group( states, chain(states, atleastonce(states, makeDigits()), newArcPair(states, "."), any(states, makeDigits())), chain(states, newArcPair(states, "."), atleastonce(states, makeDigits()))), maybe(states, makeExp())) expFloat = chain(states, atleastonce(states, makeDigits()), makeExp()) return group(states, pointFloat, expFloat) # ____________________________________________________________ # Imaginary numbers imagNumber = group( states, chain(states, atleastonce(states, makeDigits()), groupStr(states, "jJ")), chain(states, makeFloat(), groupStr(states, "jJ"))) # ____________________________________________________________ # Any old number. number = group(states, imagNumber, makeFloat(), intNumber) # ____________________________________________________________ # Funny # generate from pytoken funny = [] for op in sorted(pytoken.python_opmap): if op == "$NUM": continue funny.append(chainStr(states, op)) revdb_metavar = chain(states, groupStr(states, "$"), atleastonce(states, makeDigits())) funny.append(revdb_metavar) funny.append(makeEOL()) funny = group(states, *funny) # ____________________________________________________________ def makeStrPrefix(): return chain(states, maybe(states, groupStr(states, "uUbB")), maybe(states, groupStr(states, "rR"))) # ____________________________________________________________ contStr = group( states, chain( states, makeStrPrefix(), newArcPair(states, "'"), any(states, notGroupStr(states, "\r\n'\\")), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, "\r\n'\\")))), group(states, newArcPair(states, "'"), makeLineCont())), chain( states, makeStrPrefix(), newArcPair(states, '"'), any(states, notGroupStr(states, '\r\n"\\')), any( states, chain(states, newArcPair(states, "\\"), newArcPair(states, DEFAULT), any(states, notGroupStr(states, '\r\n"\\')))), group(states, newArcPair(states, '"'), makeLineCont()))) triple = chain( states, makeStrPrefix(), group(states, chainStr(states, "'''"), chainStr(states, '"""'))) pseudoExtras = group(states, makeLineCont(), makeComment(), triple) pseudoToken = chain( states, makeWhitespace(), group(states, newArcPair(states, EMPTY), pseudoExtras, number, funny, contStr, name)) dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken) return DFA(dfaStates, dfaAccepts), dfaStates
def test_recognize(): d = DFA([{"a": 1}, {"b": 0}], [False, True]) assert d.recognize("ababab") == 5 assert d.recognize("c") == -1 d = DFA([{"a": 1}, {DEFAULT: 0}], [False, True]) assert d.recognize("a,a?ab") == 5 assert d.recognize("c") == -1 d = NonGreedyDFA([{"a": 1}, {"b": 0}], [False, True]) assert d.recognize("ababab") == 1 assert d.recognize("c") == -1 d = NonGreedyDFA([{"a": 1}, {DEFAULT: 0}], [False, True]) assert d.recognize("a,a?ab") == 1 assert d.recognize("c") == -1