def setUp(self): self.lexer = Lexer(self.CalcTokens) self.eacc = Eacc(self.CalcGrammar) # Link the handles to the patterns. self.eacc.add_handle(self.CalcGrammar.r_plus, self.plus) self.eacc.add_handle(self.CalcGrammar.r_minus, self.minus) self.eacc.add_handle(self.CalcGrammar.r_div, self.div) self.eacc.add_handle(self.CalcGrammar.r_mul, self.mul) self.eacc.add_handle(self.CalcGrammar.r_paren, self.paren) self.eacc.add_handle(self.CalcGrammar.r_done, self.done)
def __init__(self): super(RegexParser, self).__init__(RegexGrammar) self.hclass_lexer = Lexer(HClassTokens) self.include_set = IncludeSet() self.exclude_set = ExcludeSet() # self.add_handle(RegexGrammar.r_escape, self.escape) self.add_handle(RegexGrammar.r_comment, self.comment) self.add_handle(RegexGrammar.r_group, self.group) self.add_handle(RegexGrammar.r_ncapture, self.ncapture) self.add_handle(RegexGrammar.r_ngroup, self.ngroup) self.add_handle(RegexGrammar.r_dot, self.dot) self.add_handle(RegexGrammar.r_word, self.word) self.add_handle(RegexGrammar.r_nword, self.nword) self.add_handle(RegexGrammar.r_metab, self.metab) self.add_handle(RegexGrammar.r_metaB, self.metaB) self.add_handle(RegexGrammar.r_times0, self.times0) self.add_handle(RegexGrammar.r_times1, self.times1) self.add_handle(RegexGrammar.r_times2, self.times2) self.add_handle(RegexGrammar.r_times3, self.times3) self.add_handle(RegexGrammar.r_times4, self.times4) self.add_handle(RegexGrammar.r_times5, self.times5) self.add_handle(RegexGrammar.r_times6, self.times6) self.add_handle(RegexGrammar.r_times7, self.times7) self.add_handle(RegexGrammar.r_times8, self.times8) self.add_handle(RegexGrammar.r_times9, self.times9) self.add_handle(RegexGrammar.r_times10, self.times10) self.add_handle(RegexGrammar.r_times11, self.times11) self.add_handle(RegexGrammar.r_times12, self.times12) self.add_handle(RegexGrammar.r_times13, self.times13) self.add_handle(RegexGrammar.r_include, self.include) self.add_handle(RegexGrammar.r_exclude, self.exclude) self.add_handle(RegexGrammar.r_cnext, self.cnext) self.add_handle(RegexGrammar.r_ncnext, self.ncnext) self.add_handle(RegexGrammar.r_cback, self.cback) self.add_handle(RegexGrammar.r_ncback, self.ncback) self.add_handle(RegexGrammar.r_gref, self.gref) self.add_handle(RegexGrammar.r_ngref, self.ngref) self.add_handle(RegexGrammar.r_pipe, self.pipe) self.add_handle(RegexGrammar.r_char, self.char) self.add_handle(RegexGrammar.r_caret, self.caret) self.add_handle(RegexGrammar.r_dollar, self.dollar) self.add_handle(RegexGrammar.r_done, self.done)
def xmake(regstr): """ Generate Python code from the regex regstr.. """ # Make sure the regex is valid before parsing. regexc = re.compile(regstr) xlexer = Lexer(RegexTokens) xparser = RegexParser() tokens = xlexer.feed(regstr) tseq = xparser.build(tokens) tseq = list(tseq) # regtree = tseq[-1].val() # regtree.test() # regtree.hits() return tseq[-1].val()
""" """ from eacc.lexer import Lexer, LexSeq, LexTok, SeqTok, XSpec from eacc.token import DoubleQuote, String, Blank class StringTokens(XSpec): t_dquote = LexSeq(SeqTok(r'\"', DoubleQuote), SeqTok(r'[^\"]+', String), SeqTok(r'\"', DoubleQuote)) t_blank = LexTok(r' +', type=Blank) root = [t_dquote, t_blank] lex = Lexer(StringTokens) print('Example 1!') data = '" This will" "rock!" "For sure!"' tokens = lex.feed(data) print('Consumed:', list(tokens))
""" """ from eacc.lexer import XSpec, Lexer, SeqTok, LexTok, LexSeq from eacc.token import Keyword, Identifier, RP, LP, Colon, Blank class KeywordTokens(XSpec): t_if = LexSeq(SeqTok(r'if', type=Keyword), SeqTok(r'\s+', type=Blank)) t_blank = LexTok(r' +', type=Blank) t_lparen = LexTok(r'\(', type=LP) t_rparen = LexTok(r'\)', type=RP) t_colon = LexTok(r'\:', type=Colon) # Match identifier only if it is not an if. t_identifier = LexTok(r'[a-zA-Z0-9]+', type=Identifier) root = [t_if, t_blank, t_lparen, t_rparen, t_colon, t_identifier] lex = Lexer(KeywordTokens) data = 'if ifnum: foobar()' tokens = lex.feed(data) print('Consumed:', list(tokens))
""" The example below tokenizes numbers whose number of digits is 3 <= n < 6. When the number of digits is not in that range then it raises an error. Thus the string below would give a lexical error. 12 31 445 """ from eacc.lexer import Lexer, SeqTok, LexSeq, LexTok, XSpec from eacc.token import Num, Blank class NumsTokens(XSpec): t_blank = LexTok(r' +', Blank) t_num = LexTok(r'[0-9]{3,6}', Num) root = [t_num, t_blank] print('Example 1') lex = Lexer(NumsTokens) data = '332 3445 11234' tokens = lex.feed(data) print('Consumed:', list(tokens))
a lexical error. In the below example it generates an error due to mixing up digits. """ from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Letter, Blank class LetterTokens(XSpec): t_blank = LexTok(r' +', Blank) t_letter = LexTok(r'[a-zA-Z]', Letter) root = [t_letter, t_blank] lex = Lexer(LetterTokens) print('Example 1') data = 'abc def uoc' tokens = lex.feed(data) print('Consumed:', list(tokens)) print('Example 2') data = 'abc def uoc 123' tokens = lex.feed(data) print('Consumed:', list(tokens))
from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Char class CharTokens(XSpec): t_char = LexTok(r'.', Char) root = [t_char] data = 'abc' lexer = Lexer(CharTokens) tokens = lexer.feed(data) for ind in tokens: print('%s\nStart:%s\nEnd:%s\n' % (ind, ind.start, ind.end))
""" """ from eacc.lexer import Lexer, LexTok, XSpec from eacc.token import Plus, Minus, LP, RP, Mul, Div, Num, Blank class CalcTokens(XSpec): t_plus = LexTok(r'\+', Plus) t_minus = LexTok(r'\-', Minus) t_lparen = LexTok(r'\(', LP) t_rparen = LexTok(r'\)', RP) t_mul = LexTok(r'\*', Mul) t_div = LexTok(r'\/', Div) t_num = LexTok(r'[0-9]+', Num, float) t_blank = LexTok(r' +', Blank) root = [t_num, t_blank, t_plus, t_minus, t_lparen, t_rparen, t_mul, t_div] print('Example 1') lex = Lexer(CalcTokens) data = '1+1+(3*2+4)' tokens = lex.feed(data) tokens = list(tokens) print('Consumed:', tokens)
""" from eacc.eacc import Eacc, Rule, Grammar, TokVal from eacc.lexer import XSpec, Lexer, LexTok from eacc.token import Blank, Word, Sof, Eof class WordTokens(XSpec): t_word = LexTok(r'[a-zA-Z]+', Word) t_blank = LexTok(r' +', type=Blank, discard=True) root = [t_word, t_blank] class WordGrammar(Grammar): r_phrase0 = Rule(TokVal('alpha'), TokVal('beta')) r_phrase1 = Rule(TokVal('gamma'), TokVal('zeta')) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_phrase1, r_phrase0, r_sof, r_eof] if __name__ == '__main__': data = 'alpha beta gamma zeta' lexer = Lexer(WordTokens) eacc = Eacc(WordGrammar) tokens = lexer.feed(data) ptree = eacc.build(tokens) print(list(ptree))
root = [r_lparen, r_rparen, r_num, r_blank] class TupleGrammar(Grammar): # It means to accumulate as many Num tokens as possible. g_num = Times(Num, min=1, type=Num) # Then we trigge such a pattern in this rule. r_paren = Rule(LP, g_num, RP, type=Num) r_done = Rule(Sof, Num, Eof) root = [r_paren, r_done] def done(sof, expr, eof): print('Result:', expr) if __name__ == '__main__': print('Example 1') data = '(1 (1 1) ((((1)))))' lexer = Lexer(TupleTokens) tokens = lexer.feed(data) eacc = Eacc(TupleGrammar) ptree = eacc.build(tokens) eacc.add_handle(TupleGrammar.r_done, done) ptree = list(ptree)
def setUp(self): self.lexer = Lexer(self.ExprTokens) self.eacc = Eacc(self.ExprGrammar)
def setUp(self): self.lexer = Lexer(self.Wordtokens) self.eacc = Eacc(self.WordGrammar)
t_four = LexTok(r'4', Four) t_five = LexTok(r'5', Five) t_blank = LexTok(r' +', Blank, discard=True) root = [t_one, t_two, t_three, t_four, t_five, t_blank] class ExprGrammar(Grammar): r_one = Rule(One, Except(Three), One) r_sof = Rule(Sof) r_eof = Rule(Eof) root = [r_one, r_sof, r_eof] if __name__ == '__main__': print('Example 1') lexer = Lexer(ExprTokens) eacc = Eacc(ExprGrammar) data = '121 141' tokens = lexer.feed(data) ptree = eacc.build(tokens) ptree = list(ptree) print(ptree) print('\nExample 2') data = '1 2 1 1 3 1' # Will fail. tokens = lexer.feed(data) ptree = eacc.build(tokens) ptree = list(ptree)