def setUp(self): self.lexer = Lexer(self.CalcTokens) self.eacc = Eacc(self.CalcGrammar) # Link the handles to the patterns. self.eacc.add_handle(self.CalcGrammar.r_plus, self.plus) self.eacc.add_handle(self.CalcGrammar.r_minus, self.minus) self.eacc.add_handle(self.CalcGrammar.r_div, self.div) self.eacc.add_handle(self.CalcGrammar.r_mul, self.mul) self.eacc.add_handle(self.CalcGrammar.r_paren, self.paren) self.eacc.add_handle(self.CalcGrammar.r_done, self.done)
class TestTokVal(unittest.TestCase): class Wordtokens(XSpec): t_word = LexTok(r'[a-zA-Z]+', Word) t_blank = LexTok(r' +', type=Blank, discard=True) root = [t_word, t_blank] class WordGrammar(Grammar): r_phrase0 = Rule(TokVal('alpha'), TokVal('beta')) r_phrase1 = Rule(TokVal('gamma'), TokVal('zeta')) r_phrase2 = Rule(TokVal('abc'), TokVal('def')) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_phrase1, r_phrase0, r_phrase2, r_sof, r_eof] def setUp(self): self.lexer = Lexer(self.Wordtokens) self.eacc = Eacc(self.WordGrammar) def test0(self): data = 'alpha beta gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test1(self): data = 'gamma zeta abc def alpha beta ' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test2(self): data = 'gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test3(self): data = 'gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test4(self): data = 'gamma zeta abc' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree)
def __init__(self): super(RegexParser, self).__init__(RegexGrammar) self.hclass_lexer = Lexer(HClassTokens) self.include_set = IncludeSet() self.exclude_set = ExcludeSet() # self.add_handle(RegexGrammar.r_escape, self.escape) self.add_handle(RegexGrammar.r_comment, self.comment) self.add_handle(RegexGrammar.r_group, self.group) self.add_handle(RegexGrammar.r_ncapture, self.ncapture) self.add_handle(RegexGrammar.r_ngroup, self.ngroup) self.add_handle(RegexGrammar.r_dot, self.dot) self.add_handle(RegexGrammar.r_word, self.word) self.add_handle(RegexGrammar.r_nword, self.nword) self.add_handle(RegexGrammar.r_metab, self.metab) self.add_handle(RegexGrammar.r_metaB, self.metaB) self.add_handle(RegexGrammar.r_times0, self.times0) self.add_handle(RegexGrammar.r_times1, self.times1) self.add_handle(RegexGrammar.r_times2, self.times2) self.add_handle(RegexGrammar.r_times3, self.times3) self.add_handle(RegexGrammar.r_times4, self.times4) self.add_handle(RegexGrammar.r_times5, self.times5) self.add_handle(RegexGrammar.r_times6, self.times6) self.add_handle(RegexGrammar.r_times7, self.times7) self.add_handle(RegexGrammar.r_times8, self.times8) self.add_handle(RegexGrammar.r_times9, self.times9) self.add_handle(RegexGrammar.r_times10, self.times10) self.add_handle(RegexGrammar.r_times11, self.times11) self.add_handle(RegexGrammar.r_times12, self.times12) self.add_handle(RegexGrammar.r_times13, self.times13) self.add_handle(RegexGrammar.r_include, self.include) self.add_handle(RegexGrammar.r_exclude, self.exclude) self.add_handle(RegexGrammar.r_cnext, self.cnext) self.add_handle(RegexGrammar.r_ncnext, self.ncnext) self.add_handle(RegexGrammar.r_cback, self.cback) self.add_handle(RegexGrammar.r_ncback, self.ncback) self.add_handle(RegexGrammar.r_gref, self.gref) self.add_handle(RegexGrammar.r_ngref, self.ngref) self.add_handle(RegexGrammar.r_pipe, self.pipe) self.add_handle(RegexGrammar.r_char, self.char) self.add_handle(RegexGrammar.r_caret, self.caret) self.add_handle(RegexGrammar.r_dollar, self.dollar) self.add_handle(RegexGrammar.r_done, self.done)
class TestOps0(unittest.TestCase): class ExprTokens(XSpec): t_one = LexTok(r'1', One) t_two = LexTok(r'2', Two) t_three = LexTok(r'3', Three) t_four = LexTok(r'4', Four) t_five = LexTok(r'5', Five) t_blank = LexTok(r' +', Blank, discard=True) root = [t_one, t_two, t_three, t_four, t_five, t_blank] class ExprGrammar(Grammar): r_num = Rule(One, Except(Three), One) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_num, r_sof, r_eof] def setUp(self): self.lexer = Lexer(self.ExprTokens) self.eacc = Eacc(self.ExprGrammar) def test0(self): data = '121 141 141 141' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree)
def xmake(regstr): """ Generate Python code from the regex regstr.. """ # Make sure the regex is valid before parsing. regexc = re.compile(regstr) xlexer = Lexer(RegexTokens) xparser = RegexParser() tokens = xlexer.feed(regstr) tseq = xparser.build(tokens) tseq = list(tseq) # regtree = tseq[-1].val() # regtree.test() # regtree.hits() return tseq[-1].val()
""" """ from eacc.lexer import Lexer, LexSeq, LexTok, SeqTok, XSpec from eacc.token import DoubleQuote, String, Blank class StringTokens(XSpec): t_dquote = LexSeq(SeqTok(r'\"', DoubleQuote), SeqTok(r'[^\"]+', String), SeqTok(r'\"', DoubleQuote)) t_blank = LexTok(r' +', type=Blank) root = [t_dquote, t_blank] lex = Lexer(StringTokens) print('Example 1!') data = '" This will" "rock!" "For sure!"' tokens = lex.feed(data) print('Consumed:', list(tokens))
""" """ from eacc.lexer import XSpec, Lexer, SeqTok, LexTok, LexSeq from eacc.token import Keyword, Identifier, RP, LP, Colon, Blank class KeywordTokens(XSpec): t_if = LexSeq(SeqTok(r'if', type=Keyword), SeqTok(r'\s+', type=Blank)) t_blank = LexTok(r' +', type=Blank) t_lparen = LexTok(r'\(', type=LP) t_rparen = LexTok(r'\)', type=RP) t_colon = LexTok(r'\:', type=Colon) # Match identifier only if it is not an if. t_identifier = LexTok(r'[a-zA-Z0-9]+', type=Identifier) root = [t_if, t_blank, t_lparen, t_rparen, t_colon, t_identifier] lex = Lexer(KeywordTokens) data = 'if ifnum: foobar()' tokens = lex.feed(data) print('Consumed:', list(tokens))
""" The example below tokenizes numbers whose number of digits is 3 <= n < 6. When the number of digits is not in that range then it raises an error. Thus the string below would give a lexical error. 12 31 445 """ from eacc.lexer import Lexer, SeqTok, LexSeq, LexTok, XSpec from eacc.token import Num, Blank class NumsTokens(XSpec): t_blank = LexTok(r' +', Blank) t_num = LexTok(r'[0-9]{3,6}', Num) root = [t_num, t_blank] print('Example 1') lex = Lexer(NumsTokens) data = '332 3445 11234' tokens = lex.feed(data) print('Consumed:', list(tokens))
a lexical error. In the below example it generates an error due to mixing up digits. """ from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Letter, Blank class LetterTokens(XSpec): t_blank = LexTok(r' +', Blank) t_letter = LexTok(r'[a-zA-Z]', Letter) root = [t_letter, t_blank] lex = Lexer(LetterTokens) print('Example 1') data = 'abc def uoc' tokens = lex.feed(data) print('Consumed:', list(tokens)) print('Example 2') data = 'abc def uoc 123' tokens = lex.feed(data) print('Consumed:', list(tokens))
from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Char class CharTokens(XSpec): t_char = LexTok(r'.', Char) root = [t_char] data = 'abc' lexer = Lexer(CharTokens) tokens = lexer.feed(data) for ind in tokens: print('%s\nStart:%s\nEnd:%s\n' % (ind, ind.start, ind.end))
""" from eacc.eacc import Eacc, Rule, Grammar, TokVal from eacc.lexer import XSpec, Lexer, LexTok from eacc.token import Blank, Word, Sof, Eof class WordTokens(XSpec): t_word = LexTok(r'[a-zA-Z]+', Word) t_blank = LexTok(r' +', type=Blank, discard=True) root = [t_word, t_blank] class WordGrammar(Grammar): r_phrase0 = Rule(TokVal('alpha'), TokVal('beta')) r_phrase1 = Rule(TokVal('gamma'), TokVal('zeta')) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_phrase1, r_phrase0, r_sof, r_eof] if __name__ == '__main__': data = 'alpha beta gamma zeta' lexer = Lexer(WordTokens) eacc = Eacc(WordGrammar) tokens = lexer.feed(data) ptree = eacc.build(tokens) print(list(ptree))
root = [r_lparen, r_rparen, r_num, r_blank] class TupleGrammar(Grammar): # It means to accumulate as many Num tokens as possible. g_num = Times(Num, min=1, type=Num) # Then we trigge such a pattern in this rule. r_paren = Rule(LP, g_num, RP, type=Num) r_done = Rule(Sof, Num, Eof) root = [r_paren, r_done] def done(sof, expr, eof): print('Result:', expr) if __name__ == '__main__': print('Example 1') data = '(1 (1 1) ((((1)))))' lexer = Lexer(TupleTokens) tokens = lexer.feed(data) eacc = Eacc(TupleGrammar) ptree = eacc.build(tokens) eacc.add_handle(TupleGrammar.r_done, done) ptree = list(ptree)
class TestRule(unittest.TestCase): class CalcTokens(XSpec): t_plus = LexTok(r'\+', Plus) t_minus = LexTok(r'\-', Minus) t_lparen = LexTok(r'\(', LP) t_rparen = LexTok(r'\)', RP) t_mul = LexTok(r'\*', Mul) t_div = LexTok(r'\/', Div) t_num = LexTok(r'[0-9]+', Num, float) t_blank = LexTok(r' +', Blank, discard=True) root = [ t_plus, t_minus, t_lparen, t_num, t_blank, t_rparen, t_mul, t_div ] class CalcGrammar(Grammar): r_paren = Rule(LP, Num, RP, type=Num) r_div = Rule(Num, Div, Num, type=Num) r_mul = Rule(Num, Mul, Num, type=Num) o_div = Rule(Div) o_mul = Rule(Mul) r_plus = Rule(Num, Plus, Num, type=Num, up=(o_mul, o_div)) r_minus = Rule(Num, Minus, Num, type=Num, up=(o_mul, o_div)) r_done = Rule(Sof, Num, Eof) root = [r_paren, r_plus, r_minus, r_mul, r_div, r_done] def plus(self, expr, sign, term): return expr.val() + term.val() def minus(self, expr, sign, term): return expr.val() - term.val() def div(self, term, sign, factor): return term.val() / factor.val() def mul(self, term, sign, factor): return term.val() * factor.val() def paren(self, left, expression, right): return expression.val() def done(self, sof, num, eof): print('Result:', num.val()) return num.val() def setUp(self): self.lexer = Lexer(self.CalcTokens) self.eacc = Eacc(self.CalcGrammar) # Link the handles to the patterns. self.eacc.add_handle(self.CalcGrammar.r_plus, self.plus) self.eacc.add_handle(self.CalcGrammar.r_minus, self.minus) self.eacc.add_handle(self.CalcGrammar.r_div, self.div) self.eacc.add_handle(self.CalcGrammar.r_mul, self.mul) self.eacc.add_handle(self.CalcGrammar.r_paren, self.paren) self.eacc.add_handle(self.CalcGrammar.r_done, self.done) def test0(self): data = '1+2/3*(3*2 - 1) /(1-1-2-3-1+2)*3/ (1 - 2)*10' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test1(self): data = '(1+2/3*(3*2 - 1)) + ((1 - 2)*10)' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test2(self): data = '((1+2/3*(3*2 - 1)) + ((1 - 2)*10))' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test3(self): data = '(1/2) * (3/4) * (5/2/3/5/2*1)/((((((1))))))' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test4(self): data = '(1/2) * (3/4) * (5 2)' tokens = self.lexer.feed(data) print('Expr:', data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree) def test4(self): data = '(1/2) * 3/4) * (512)' tokens = self.lexer.feed(data) print('Expr:', data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree) def test5(self): data = '1+2*2/2 - 2/2 - 2*2/2+1' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data))
def setUp(self): self.lexer = Lexer(self.ExprTokens) self.eacc = Eacc(self.ExprGrammar)
def setUp(self): self.lexer = Lexer(self.Wordtokens) self.eacc = Eacc(self.WordGrammar)
class RegexParser(Eacc): def __init__(self): super(RegexParser, self).__init__(RegexGrammar) self.hclass_lexer = Lexer(HClassTokens) self.include_set = IncludeSet() self.exclude_set = ExcludeSet() # self.add_handle(RegexGrammar.r_escape, self.escape) self.add_handle(RegexGrammar.r_comment, self.comment) self.add_handle(RegexGrammar.r_group, self.group) self.add_handle(RegexGrammar.r_ncapture, self.ncapture) self.add_handle(RegexGrammar.r_ngroup, self.ngroup) self.add_handle(RegexGrammar.r_dot, self.dot) self.add_handle(RegexGrammar.r_word, self.word) self.add_handle(RegexGrammar.r_nword, self.nword) self.add_handle(RegexGrammar.r_metab, self.metab) self.add_handle(RegexGrammar.r_metaB, self.metaB) self.add_handle(RegexGrammar.r_times0, self.times0) self.add_handle(RegexGrammar.r_times1, self.times1) self.add_handle(RegexGrammar.r_times2, self.times2) self.add_handle(RegexGrammar.r_times3, self.times3) self.add_handle(RegexGrammar.r_times4, self.times4) self.add_handle(RegexGrammar.r_times5, self.times5) self.add_handle(RegexGrammar.r_times6, self.times6) self.add_handle(RegexGrammar.r_times7, self.times7) self.add_handle(RegexGrammar.r_times8, self.times8) self.add_handle(RegexGrammar.r_times9, self.times9) self.add_handle(RegexGrammar.r_times10, self.times10) self.add_handle(RegexGrammar.r_times11, self.times11) self.add_handle(RegexGrammar.r_times12, self.times12) self.add_handle(RegexGrammar.r_times13, self.times13) self.add_handle(RegexGrammar.r_include, self.include) self.add_handle(RegexGrammar.r_exclude, self.exclude) self.add_handle(RegexGrammar.r_cnext, self.cnext) self.add_handle(RegexGrammar.r_ncnext, self.ncnext) self.add_handle(RegexGrammar.r_cback, self.cback) self.add_handle(RegexGrammar.r_ncback, self.ncback) self.add_handle(RegexGrammar.r_gref, self.gref) self.add_handle(RegexGrammar.r_ngref, self.ngref) self.add_handle(RegexGrammar.r_pipe, self.pipe) self.add_handle(RegexGrammar.r_char, self.char) self.add_handle(RegexGrammar.r_caret, self.caret) self.add_handle(RegexGrammar.r_dollar, self.dollar) self.add_handle(RegexGrammar.r_done, self.done) def build(self, tokens): tree = super(RegexParser, self).build(tokens) return list(tree) def word(self, escape, wsym): meta = Word() return meta def metab(self, escape, wsym): meta = Metab() return meta def metaB(self, escape, wsym): meta = MetaB() return meta def nword(self, escape, wsym): meta = NotWord() return meta def pipe(self, regex0, pipe, regex1): data0 = [ind.val() for ind in regex0] data1 = [ind.val() for ind in regex1] pattern1 = data1[0] pattern0 = data0[0] if len(data1) > 1: pattern1 = Pattern(*data1) if isinstance(data0[0], Any): pattern0.args.append(pattern1) else: pattern0 = Any(Pattern(*data0), pattern1) return pattern0 def reduce_pipe(self, regex0, regex1): pattern1 = Pattern(*data1) pattern2 = Any(pattern0, pattern1) return pattern2 def group(self, lp, regex, rp): data = (ind.val() for ind in regex) group = Group(*data) return group def ngroup(self, lp, question, gsym, lesser, gname, greater, regex, rp): data = (ind.val() for ind in regex) group = NamedGroup(gname.val(), *data) return group def ncapture(self, lp, question, colon, regex, rp): data = (ind.val() for ind in regex) ncapture = NonCapture(*data) return ncapture def gref(self, escape, num): link = GLink(int(num.val())) return link def ngref(self, lp, question, gsym, equal, gname, rp): link = NGLink(gname.val()) return link def escape(self, escape, char): return char.val() def include(self, lb, string, rb): tokens = self.hclass_lexer.feed(string.val()) tree = self.include_set.build(tokens) tree = list(tree)[-1] return tree.val() def exclude(self, lb, caret, string, rb): tokens = self.hclass_lexer.feed(string.val()) tree = self.exclude_set.build(tokens) tree = list(tree)[-1] return tree.val() def cnext(self, lp, question, lexer, equal, regex0, rp, regex1): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeNext(pattern0, pattern1) return e def ncnext(self, lp, question, lexer, exlam, regex0, rp, regex1): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeNext(pattern0, pattern1, neg=True) return e def cback(self, regex0, lp, question, equal, regex1, rp): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeBack(pattern0, pattern1) return e def ncback(self, regex0, lp, question, exlam, regex1, rp): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeBack(pattern0, pattern1, neg=True) return e def dot(self, dot): x = X() return x def caret(self, caret): caret = Caret() return caret def dollar(self, caret): dollar = Dollar() return dollar def times0(self, regex, lbr, min, comma, max, rbr): min = ''.join((ind.val() for ind in min)) max = ''.join((ind.val() for ind in max)) repeat = Repeat(regex.val(), int(min), int(max)) return repeat def times1(self, regex, lbr, num, rbr): num = ''.join((ind.val() for ind in num)) num = int(num) repeat = Repeat(regex.val(), num, num) return repeat def times2(self, regex, lbr, min, comma, rbr): min = ''.join((ind.val() for ind in min)) repeat = Repeat(regex.val(), int(min)) return repeat return repeat def times3(self, regex, lbr, comma, max, rbr): max = ''.join((ind.val() for ind in max)) repeat = Repeat(regex.val(), max=int(max)) return repeat def times4(self, regex, mul): repeat = ZeroOrMore(regex.val()) return repeat def times5(self, regex, question): repeat = OneOrZero(regex.val()) return repeat def times6(self, regex, question): repeat = OneOrMore(regex.val()) return repeat def times7(self, regex, plus, question): """ Greedy operators should behave alike non greedy in the context. Although the serialization has to be different. """ repeat = OneOrMore(regex.val(), greedy=True) return repeat def times8(self, regex, ask, question): """ """ repeat = ZeroOrMore(regex.val(), greedy=True) return repeat def times9(self, regex, question0, question1): """ """ repeat = OneOrZero(regex.val(), greedy=True) return repeat def times10(self, regex, lbr, min, comma, max, rbr, question): repeat = Repeat(regex.val(), int(min.val()), int(max.val()), greedy=True) return repeat def times11(self, regex, lbr, min, comma, rbr, question): repeat = Repeat(regex.val(), int(min.val()), greedy=True) return repeat def times12(self, regex, lbr, comma, max, rbr, question): repeat = Repeat(regex.val(), max=int(max.val()), greedy=True) return repeat def times13(self, regex, lbr, num, rbr, question): repeat = Repeat(regex.val(), min=int(num.val()), max=int(num.val()), greedy=True) return repeat def char(self, char): return RegexStr(char.val()) def comment(self, lp, question, hash, comment, rp): return RegexComment(comment.val()) def done(self, sof, regex, eof): data = [ind.val() for ind in regex] if len(data) > 1: return Pattern(*data) return data[0]
""" """ from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Plus, Minus, LP, RP, Mul, Div, Num, Blank class CalcTokens(XSpec): t_plus = LexTok(r'\+', Plus) t_minus = LexTok(r'\-', Minus) t_lparen = LexTok(r'\(', LP) t_rparen = LexTok(r'\)', RP) t_mul = LexTok(r'\*', Mul) t_div = LexTok(r'\/', Div) t_num = LexTok(r'[0-9]+', Num, float) t_blank = LexTok(r' +', Blank) root = [t_num, t_blank, t_plus, t_minus, t_lparen, t_rparen, t_mul, t_div] print('Example 1') lex = Lexer(CalcTokens) data = '1+1+(3*2+4)' tokens = lex.feed(data) tokens = list(tokens) print('Consumed:', tokens)
t_four = LexTok(r'4', Four) t_five = LexTok(r'5', Five) t_blank = LexTok(r' +', Blank, discard=True) root = [t_one, t_two, t_three, t_four, t_five, t_blank] class ExprGrammar(Grammar): r_one = Rule(One, Except(Three), One) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_one, r_sof, r_eof] if __name__ == '__main__': print('Example 1') lexer = Lexer(ExprTokens) eacc = Eacc(ExprGrammar) data = '121 141' tokens = lexer.feed(data) ptree = eacc.build(tokens) ptree = list(ptree) print(ptree) print('\nExample 2') data = '1 2 1 1 3 1' # Will fail. tokens = lexer.feed(data) ptree = eacc.build(tokens) ptree = list(ptree)