class TestTokVal(unittest.TestCase): class Wordtokens(XSpec): t_word = LexTok(r'[a-zA-Z]+', Word) t_blank = LexTok(r' +', type=Blank, discard=True) root = [t_word, t_blank] class WordGrammar(Grammar): r_phrase0 = Rule(TokVal('alpha'), TokVal('beta')) r_phrase1 = Rule(TokVal('gamma'), TokVal('zeta')) r_phrase2 = Rule(TokVal('abc'), TokVal('def')) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_phrase1, r_phrase0, r_phrase2, r_sof, r_eof] def setUp(self): self.lexer = Lexer(self.Wordtokens) self.eacc = Eacc(self.WordGrammar) def test0(self): data = 'alpha beta gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test1(self): data = 'gamma zeta abc def alpha beta ' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test2(self): data = 'gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test3(self): data = 'gamma zeta' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) def test4(self): data = 'gamma zeta abc' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree)
class TestOps0(unittest.TestCase): class ExprTokens(XSpec): t_one = LexTok(r'1', One) t_two = LexTok(r'2', Two) t_three = LexTok(r'3', Three) t_four = LexTok(r'4', Four) t_five = LexTok(r'5', Five) t_blank = LexTok(r' +', Blank, discard=True) root = [t_one, t_two, t_three, t_four, t_five, t_blank] class ExprGrammar(Grammar): r_num = Rule(One, Except(Three), One) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_num, r_sof, r_eof] def setUp(self): self.lexer = Lexer(self.ExprTokens) self.eacc = Eacc(self.ExprGrammar) def test0(self): data = '121 141 141 141' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree)
def xmake(regstr): """ Generate Python code from the regex regstr.. """ # Make sure the regex is valid before parsing. regexc = re.compile(regstr) xlexer = Lexer(RegexTokens) xparser = RegexParser() tokens = xlexer.feed(regstr) tseq = xparser.build(tokens) tseq = list(tseq) # regtree = tseq[-1].val() # regtree.test() # regtree.hits() return tseq[-1].val()
class RegexParser(Eacc): def __init__(self): super(RegexParser, self).__init__(RegexGrammar) self.hclass_lexer = Lexer(HClassTokens) self.include_set = IncludeSet() self.exclude_set = ExcludeSet() # self.add_handle(RegexGrammar.r_escape, self.escape) self.add_handle(RegexGrammar.r_comment, self.comment) self.add_handle(RegexGrammar.r_group, self.group) self.add_handle(RegexGrammar.r_ncapture, self.ncapture) self.add_handle(RegexGrammar.r_ngroup, self.ngroup) self.add_handle(RegexGrammar.r_dot, self.dot) self.add_handle(RegexGrammar.r_word, self.word) self.add_handle(RegexGrammar.r_nword, self.nword) self.add_handle(RegexGrammar.r_metab, self.metab) self.add_handle(RegexGrammar.r_metaB, self.metaB) self.add_handle(RegexGrammar.r_times0, self.times0) self.add_handle(RegexGrammar.r_times1, self.times1) self.add_handle(RegexGrammar.r_times2, self.times2) self.add_handle(RegexGrammar.r_times3, self.times3) self.add_handle(RegexGrammar.r_times4, self.times4) self.add_handle(RegexGrammar.r_times5, self.times5) self.add_handle(RegexGrammar.r_times6, self.times6) self.add_handle(RegexGrammar.r_times7, self.times7) self.add_handle(RegexGrammar.r_times8, self.times8) self.add_handle(RegexGrammar.r_times9, self.times9) self.add_handle(RegexGrammar.r_times10, self.times10) self.add_handle(RegexGrammar.r_times11, self.times11) self.add_handle(RegexGrammar.r_times12, self.times12) self.add_handle(RegexGrammar.r_times13, self.times13) self.add_handle(RegexGrammar.r_include, self.include) self.add_handle(RegexGrammar.r_exclude, self.exclude) self.add_handle(RegexGrammar.r_cnext, self.cnext) self.add_handle(RegexGrammar.r_ncnext, self.ncnext) self.add_handle(RegexGrammar.r_cback, self.cback) self.add_handle(RegexGrammar.r_ncback, self.ncback) self.add_handle(RegexGrammar.r_gref, self.gref) self.add_handle(RegexGrammar.r_ngref, self.ngref) self.add_handle(RegexGrammar.r_pipe, self.pipe) self.add_handle(RegexGrammar.r_char, self.char) self.add_handle(RegexGrammar.r_caret, self.caret) self.add_handle(RegexGrammar.r_dollar, self.dollar) self.add_handle(RegexGrammar.r_done, self.done) def build(self, tokens): tree = super(RegexParser, self).build(tokens) return list(tree) def word(self, escape, wsym): meta = Word() return meta def metab(self, escape, wsym): meta = Metab() return meta def metaB(self, escape, wsym): meta = MetaB() return meta def nword(self, escape, wsym): meta = NotWord() return meta def pipe(self, regex0, pipe, regex1): data0 = [ind.val() for ind in regex0] data1 = [ind.val() for ind in regex1] pattern1 = data1[0] pattern0 = data0[0] if len(data1) > 1: pattern1 = Pattern(*data1) if isinstance(data0[0], Any): pattern0.args.append(pattern1) else: pattern0 = Any(Pattern(*data0), pattern1) return pattern0 def reduce_pipe(self, regex0, regex1): pattern1 = Pattern(*data1) pattern2 = Any(pattern0, pattern1) return pattern2 def group(self, lp, regex, rp): data = (ind.val() for ind in regex) group = Group(*data) return group def ngroup(self, lp, question, gsym, lesser, gname, greater, regex, rp): data = (ind.val() for ind in regex) group = NamedGroup(gname.val(), *data) return group def ncapture(self, lp, question, colon, regex, rp): data = (ind.val() for ind in regex) ncapture = NonCapture(*data) return ncapture def gref(self, escape, num): link = GLink(int(num.val())) return link def ngref(self, lp, question, gsym, equal, gname, rp): link = NGLink(gname.val()) return link def escape(self, escape, char): return char.val() def include(self, lb, string, rb): tokens = self.hclass_lexer.feed(string.val()) tree = self.include_set.build(tokens) tree = list(tree)[-1] return tree.val() def exclude(self, lb, caret, string, rb): tokens = self.hclass_lexer.feed(string.val()) tree = self.exclude_set.build(tokens) tree = list(tree)[-1] return tree.val() def cnext(self, lp, question, lexer, equal, regex0, rp, regex1): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeNext(pattern0, pattern1) return e def ncnext(self, lp, question, lexer, exlam, regex0, rp, regex1): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeNext(pattern0, pattern1, neg=True) return e def cback(self, regex0, lp, question, equal, regex1, rp): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeBack(pattern0, pattern1) return e def ncback(self, regex0, lp, question, exlam, regex1, rp): data0 = (ind.val() for ind in regex0) data1 = (ind.val() for ind in regex1) pattern0 = Pattern(*data0) pattern1 = Pattern(*data1) e = ConsumeBack(pattern0, pattern1, neg=True) return e def dot(self, dot): x = X() return x def caret(self, caret): caret = Caret() return caret def dollar(self, caret): dollar = Dollar() return dollar def times0(self, regex, lbr, min, comma, max, rbr): min = ''.join((ind.val() for ind in min)) max = ''.join((ind.val() for ind in max)) repeat = Repeat(regex.val(), int(min), int(max)) return repeat def times1(self, regex, lbr, num, rbr): num = ''.join((ind.val() for ind in num)) num = int(num) repeat = Repeat(regex.val(), num, num) return repeat def times2(self, regex, lbr, min, comma, rbr): min = ''.join((ind.val() for ind in min)) repeat = Repeat(regex.val(), int(min)) return repeat return repeat def times3(self, regex, lbr, comma, max, rbr): max = ''.join((ind.val() for ind in max)) repeat = Repeat(regex.val(), max=int(max)) return repeat def times4(self, regex, mul): repeat = ZeroOrMore(regex.val()) return repeat def times5(self, regex, question): repeat = OneOrZero(regex.val()) return repeat def times6(self, regex, question): repeat = OneOrMore(regex.val()) return repeat def times7(self, regex, plus, question): """ Greedy operators should behave alike non greedy in the context. Although the serialization has to be different. """ repeat = OneOrMore(regex.val(), greedy=True) return repeat def times8(self, regex, ask, question): """ """ repeat = ZeroOrMore(regex.val(), greedy=True) return repeat def times9(self, regex, question0, question1): """ """ repeat = OneOrZero(regex.val(), greedy=True) return repeat def times10(self, regex, lbr, min, comma, max, rbr, question): repeat = Repeat(regex.val(), int(min.val()), int(max.val()), greedy=True) return repeat def times11(self, regex, lbr, min, comma, rbr, question): repeat = Repeat(regex.val(), int(min.val()), greedy=True) return repeat def times12(self, regex, lbr, comma, max, rbr, question): repeat = Repeat(regex.val(), max=int(max.val()), greedy=True) return repeat def times13(self, regex, lbr, num, rbr, question): repeat = Repeat(regex.val(), min=int(num.val()), max=int(num.val()), greedy=True) return repeat def char(self, char): return RegexStr(char.val()) def comment(self, lp, question, hash, comment, rp): return RegexComment(comment.val()) def done(self, sof, regex, eof): data = [ind.val() for ind in regex] if len(data) > 1: return Pattern(*data) return data[0]
""" """ from eacc.lexer import Lexer, LexSeq, LexTok, SeqTok, XSpec from eacc.token import DoubleQuote, String, Blank class StringTokens(XSpec): t_dquote = LexSeq(SeqTok(r'\"', DoubleQuote), SeqTok(r'[^\"]+', String), SeqTok(r'\"', DoubleQuote)) t_blank = LexTok(r' +', type=Blank) root = [t_dquote, t_blank] lex = Lexer(StringTokens) print('Example 1!') data = '" This will" "rock!" "For sure!"' tokens = lex.feed(data) print('Consumed:', list(tokens))
from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Char class CharTokens(XSpec): t_char = LexTok(r'.', Char) root = [t_char] data = 'abc' lexer = Lexer(CharTokens) tokens = lexer.feed(data) for ind in tokens: print('%s\nStart:%s\nEnd:%s\n' % (ind, ind.start, ind.end))
class TestRule(unittest.TestCase): class CalcTokens(XSpec): t_plus = LexTok(r'\+', Plus) t_minus = LexTok(r'\-', Minus) t_lparen = LexTok(r'\(', LP) t_rparen = LexTok(r'\)', RP) t_mul = LexTok(r'\*', Mul) t_div = LexTok(r'\/', Div) t_num = LexTok(r'[0-9]+', Num, float) t_blank = LexTok(r' +', Blank, discard=True) root = [ t_plus, t_minus, t_lparen, t_num, t_blank, t_rparen, t_mul, t_div ] class CalcGrammar(Grammar): r_paren = Rule(LP, Num, RP, type=Num) r_div = Rule(Num, Div, Num, type=Num) r_mul = Rule(Num, Mul, Num, type=Num) o_div = Rule(Div) o_mul = Rule(Mul) r_plus = Rule(Num, Plus, Num, type=Num, up=(o_mul, o_div)) r_minus = Rule(Num, Minus, Num, type=Num, up=(o_mul, o_div)) r_done = Rule(Sof, Num, Eof) root = [r_paren, r_plus, r_minus, r_mul, r_div, r_done] def plus(self, expr, sign, term): return expr.val() + term.val() def minus(self, expr, sign, term): return expr.val() - term.val() def div(self, term, sign, factor): return term.val() / factor.val() def mul(self, term, sign, factor): return term.val() * factor.val() def paren(self, left, expression, right): return expression.val() def done(self, sof, num, eof): print('Result:', num.val()) return num.val() def setUp(self): self.lexer = Lexer(self.CalcTokens) self.eacc = Eacc(self.CalcGrammar) # Link the handles to the patterns. self.eacc.add_handle(self.CalcGrammar.r_plus, self.plus) self.eacc.add_handle(self.CalcGrammar.r_minus, self.minus) self.eacc.add_handle(self.CalcGrammar.r_div, self.div) self.eacc.add_handle(self.CalcGrammar.r_mul, self.mul) self.eacc.add_handle(self.CalcGrammar.r_paren, self.paren) self.eacc.add_handle(self.CalcGrammar.r_done, self.done) def test0(self): data = '1+2/3*(3*2 - 1) /(1-1-2-3-1+2)*3/ (1 - 2)*10' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test1(self): data = '(1+2/3*(3*2 - 1)) + ((1 - 2)*10)' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test2(self): data = '((1+2/3*(3*2 - 1)) + ((1 - 2)*10))' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test3(self): data = '(1/2) * (3/4) * (5/2/3/5/2*1)/((((((1))))))' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data)) def test4(self): data = '(1/2) * (3/4) * (5 2)' tokens = self.lexer.feed(data) print('Expr:', data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree) def test4(self): data = '(1/2) * 3/4) * (512)' tokens = self.lexer.feed(data) print('Expr:', data) ptree = self.eacc.build(tokens) with self.assertRaises(EaccError): ptree = list(ptree) def test5(self): data = '1+2*2/2 - 2/2 - 2*2/2+1' tokens = self.lexer.feed(data) ptree = self.eacc.build(tokens) ptree = list(ptree) print('Expr:', data) self.assertEqual(ptree[-1].val(), eval(data))