Esempio n. 1
0
class TestTokVal(unittest.TestCase):
    class Wordtokens(XSpec):
        t_word = LexTok(r'[a-zA-Z]+', Word)
        t_blank = LexTok(r' +', type=Blank, discard=True)

        root = [t_word, t_blank]

    class WordGrammar(Grammar):
        r_phrase0 = Rule(TokVal('alpha'), TokVal('beta'))
        r_phrase1 = Rule(TokVal('gamma'), TokVal('zeta'))
        r_phrase2 = Rule(TokVal('abc'), TokVal('def'))

        r_sof = Rule(Sof)
        r_eof = Rule(Eof)

        root = [r_phrase1, r_phrase0, r_phrase2, r_sof, r_eof]

    def setUp(self):
        self.lexer = Lexer(self.Wordtokens)
        self.eacc = Eacc(self.WordGrammar)

    def test0(self):
        data = 'alpha beta gamma zeta'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)

    def test1(self):
        data = 'gamma zeta     abc      def     alpha beta '
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)

    def test2(self):
        data = 'gamma zeta'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)

    def test3(self):
        data = 'gamma zeta'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)

    def test4(self):
        data = 'gamma zeta abc'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)

        with self.assertRaises(EaccError):
            ptree = list(ptree)
Esempio n. 2
0
class TestOps0(unittest.TestCase):
    class ExprTokens(XSpec):
        t_one = LexTok(r'1', One)
        t_two = LexTok(r'2', Two)

        t_three = LexTok(r'3', Three)
        t_four = LexTok(r'4', Four)
        t_five = LexTok(r'5', Five)
        t_blank = LexTok(r' +', Blank, discard=True)

        root = [t_one, t_two, t_three, t_four, t_five, t_blank]

    class ExprGrammar(Grammar):
        r_num = Rule(One, Except(Three), One)

        r_sof = Rule(Sof)
        r_eof = Rule(Eof)

        root = [r_num, r_sof, r_eof]

    def setUp(self):
        self.lexer = Lexer(self.ExprTokens)
        self.eacc = Eacc(self.ExprGrammar)

    def test0(self):
        data = '121 141 141 141'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
Esempio n. 3
0
def xmake(regstr):
    """
    Generate Python code from the regex regstr..
    """
    # Make sure the regex is valid before parsing.
    regexc = re.compile(regstr)
    xlexer  = Lexer(RegexTokens)
    xparser = RegexParser()

    tokens  = xlexer.feed(regstr)
    tseq = xparser.build(tokens)
    tseq = list(tseq)
    # regtree = tseq[-1].val()
    # regtree.test()
    # regtree.hits()
    
    return tseq[-1].val()
Esempio n. 4
0
class RegexParser(Eacc):
    def __init__(self):
        super(RegexParser, self).__init__(RegexGrammar)
        self.hclass_lexer = Lexer(HClassTokens)
        self.include_set = IncludeSet()
        self.exclude_set = ExcludeSet()

        # self.add_handle(RegexGrammar.r_escape, self.escape)
        self.add_handle(RegexGrammar.r_comment, self.comment)

        self.add_handle(RegexGrammar.r_group, self.group)
        self.add_handle(RegexGrammar.r_ncapture, self.ncapture)

        self.add_handle(RegexGrammar.r_ngroup, self.ngroup)

        self.add_handle(RegexGrammar.r_dot, self.dot)
        self.add_handle(RegexGrammar.r_word, self.word)
        self.add_handle(RegexGrammar.r_nword, self.nword)
        self.add_handle(RegexGrammar.r_metab, self.metab)
        self.add_handle(RegexGrammar.r_metaB, self.metaB)

        self.add_handle(RegexGrammar.r_times0, self.times0)
        self.add_handle(RegexGrammar.r_times1, self.times1)
        self.add_handle(RegexGrammar.r_times2, self.times2)
        self.add_handle(RegexGrammar.r_times3, self.times3)
        self.add_handle(RegexGrammar.r_times4, self.times4)
        self.add_handle(RegexGrammar.r_times5, self.times5)
        self.add_handle(RegexGrammar.r_times6, self.times6)
        self.add_handle(RegexGrammar.r_times7, self.times7)
        self.add_handle(RegexGrammar.r_times8, self.times8)
        self.add_handle(RegexGrammar.r_times9, self.times9)
        self.add_handle(RegexGrammar.r_times10, self.times10)
        self.add_handle(RegexGrammar.r_times11, self.times11)
        self.add_handle(RegexGrammar.r_times12, self.times12)
        self.add_handle(RegexGrammar.r_times13, self.times13)

        self.add_handle(RegexGrammar.r_include, self.include)
        self.add_handle(RegexGrammar.r_exclude, self.exclude)
        self.add_handle(RegexGrammar.r_cnext, self.cnext)
        self.add_handle(RegexGrammar.r_ncnext, self.ncnext)

        self.add_handle(RegexGrammar.r_cback, self.cback)
        self.add_handle(RegexGrammar.r_ncback, self.ncback)
        self.add_handle(RegexGrammar.r_gref, self.gref)
        self.add_handle(RegexGrammar.r_ngref, self.ngref)

        self.add_handle(RegexGrammar.r_pipe, self.pipe)
        self.add_handle(RegexGrammar.r_char, self.char)
        self.add_handle(RegexGrammar.r_caret, self.caret)
        self.add_handle(RegexGrammar.r_dollar, self.dollar)

        self.add_handle(RegexGrammar.r_done, self.done)

    def build(self, tokens):
        tree = super(RegexParser, self).build(tokens)
        return list(tree)

    def word(self, escape, wsym):
        meta = Word()
        return meta

    def metab(self, escape, wsym):
        meta = Metab()
        return meta

    def metaB(self, escape, wsym):
        meta = MetaB()
        return meta

    def nword(self, escape, wsym):
        meta = NotWord()
        return meta

    def pipe(self, regex0, pipe, regex1):
        data0 = [ind.val() for ind in regex0]
        data1 = [ind.val() for ind in regex1]

        pattern1 = data1[0]
        pattern0 = data0[0]

        if len(data1) > 1:
            pattern1 = Pattern(*data1)

        if isinstance(data0[0], Any):
            pattern0.args.append(pattern1)
        else:
            pattern0 = Any(Pattern(*data0), pattern1)
        return pattern0

    def reduce_pipe(self, regex0, regex1):
        pattern1 = Pattern(*data1)
        pattern2 = Any(pattern0, pattern1)
        return pattern2

    def group(self, lp, regex, rp):
        data  = (ind.val() for ind in regex)
        group = Group(*data)
        return group

    def ngroup(self, lp, question, gsym, lesser,  gname, greater, regex, rp):
        data = (ind.val() for ind in regex)
        group = NamedGroup(gname.val(), *data)
        return group

    def ncapture(self, lp, question, colon, regex, rp):
        data = (ind.val() for ind in regex)
        ncapture = NonCapture(*data)
        return ncapture

    def gref(self, escape, num):
        link = GLink(int(num.val()))
        return link

    def ngref(self, lp, question, gsym, equal, gname, rp):
        link = NGLink(gname.val())
        return link

    def escape(self, escape, char):
        return char.val()

    def include(self, lb, string, rb):
        tokens = self.hclass_lexer.feed(string.val())
        tree = self.include_set.build(tokens)
        tree = list(tree)[-1]
        return tree.val()

    def exclude(self, lb, caret, string, rb):
        tokens = self.hclass_lexer.feed(string.val())

        tree = self.exclude_set.build(tokens)
        tree = list(tree)[-1]
        return tree.val()

    def cnext(self, lp, question, lexer, equal, regex0, rp, regex1):
        data0 = (ind.val() for ind in regex0)
        data1 = (ind.val() for ind in regex1)
        pattern0 = Pattern(*data0)
        pattern1 = Pattern(*data1)
        e = ConsumeNext(pattern0, pattern1)
        return e

    def ncnext(self, lp, question, lexer, exlam, regex0, rp, regex1):
        data0 = (ind.val() for ind in regex0)
        data1 = (ind.val() for ind in regex1)
        pattern0 = Pattern(*data0)
        pattern1 = Pattern(*data1)
        e = ConsumeNext(pattern0, pattern1, neg=True)
        return e

    def cback(self, regex0, lp, question,  equal, regex1, rp):
        data0 = (ind.val() for ind in regex0)
        data1 = (ind.val() for ind in regex1)
        pattern0 = Pattern(*data0)
        pattern1 = Pattern(*data1)
        e = ConsumeBack(pattern0, pattern1)
        return e

    def ncback(self, regex0, lp, question, exlam, regex1, rp):
        data0 = (ind.val() for ind in regex0)
        data1 = (ind.val() for ind in regex1)
        pattern0 = Pattern(*data0)
        pattern1 = Pattern(*data1)
        e = ConsumeBack(pattern0, pattern1, neg=True)
        return e

    def dot(self, dot):
        x = X()
        return x

    def caret(self, caret):
        caret = Caret()
        return caret

    def dollar(self, caret):
        dollar = Dollar()
        return dollar

    def times0(self, regex, lbr, min, comma, max, rbr):
        min = ''.join((ind.val() for ind in min))
        max = ''.join((ind.val() for ind in max))
    
        repeat = Repeat(regex.val(), int(min), int(max))
        return repeat

    def times1(self, regex, lbr, num, rbr):
        num = ''.join((ind.val() for ind in num))
        num = int(num)
        repeat = Repeat(regex.val(), num, num)
        return repeat

    def times2(self, regex, lbr, min, comma, rbr):
        min = ''.join((ind.val() for ind in min))

        repeat = Repeat(regex.val(), int(min))
        return repeat

        return repeat

    def times3(self, regex, lbr, comma, max, rbr):
        max = ''.join((ind.val() for ind in max))

        repeat = Repeat(regex.val(), max=int(max))
        return repeat

    def times4(self, regex, mul):
        repeat = ZeroOrMore(regex.val())
        return repeat

    def times5(self, regex, question):
        repeat = OneOrZero(regex.val())
        return repeat

    def times6(self, regex, question):
        repeat = OneOrMore(regex.val())
        return repeat

    def times7(self, regex, plus, question):
        """
        Greedy operators should behave alike non greedy in the context.
        Although the serialization has to be different.
        """
        repeat = OneOrMore(regex.val(), greedy=True)
        return repeat

    def times8(self, regex, ask, question):
        """
        """
        repeat = ZeroOrMore(regex.val(), greedy=True)
        return repeat

    def times9(self, regex, question0, question1):
        """
        """
        repeat = OneOrZero(regex.val(), greedy=True)
        return repeat

    def times10(self, regex, lbr, min, comma, max, rbr, question):
        repeat = Repeat(regex.val(), int(min.val()), int(max.val()), greedy=True)
        return repeat

    def times11(self, regex, lbr, min, comma, rbr, question):
        repeat = Repeat(regex.val(), int(min.val()), greedy=True)
        return repeat

    def times12(self, regex, lbr, comma, max, rbr, question):
        repeat = Repeat(regex.val(), max=int(max.val()), greedy=True)
        return repeat

    def times13(self, regex, lbr, num, rbr, question):
        repeat = Repeat(regex.val(), min=int(num.val()), 
        max=int(num.val()), greedy=True)

        return repeat

    def char(self, char):
        return RegexStr(char.val())

    def comment(self, lp, question, hash, comment, rp):
        return RegexComment(comment.val())

    def done(self, sof, regex, eof):
        data = [ind.val() for ind in regex]
        if len(data) > 1:
            return Pattern(*data)
        return data[0]
Esempio n. 5
0
"""
"""

from eacc.lexer import Lexer, LexSeq, LexTok, SeqTok, XSpec
from eacc.token import DoubleQuote, String, Blank


class StringTokens(XSpec):
    t_dquote = LexSeq(SeqTok(r'\"', DoubleQuote), SeqTok(r'[^\"]+', String),
                      SeqTok(r'\"', DoubleQuote))

    t_blank = LexTok(r' +', type=Blank)

    root = [t_dquote, t_blank]


lex = Lexer(StringTokens)
print('Example 1!')
data = '" This will"       "rock!"     "For sure!"'
tokens = lex.feed(data)
print('Consumed:', list(tokens))
Esempio n. 6
0
from eacc.lexer import Lexer, LexTok, XSpec
from eacc.token import Char


class CharTokens(XSpec):
    t_char = LexTok(r'.', Char)
    root = [t_char]


data = 'abc'
lexer = Lexer(CharTokens)
tokens = lexer.feed(data)

for ind in tokens:
    print('%s\nStart:%s\nEnd:%s\n' % (ind, ind.start, ind.end))
Esempio n. 7
0
class TestRule(unittest.TestCase):
    class CalcTokens(XSpec):
        t_plus = LexTok(r'\+', Plus)
        t_minus = LexTok(r'\-', Minus)

        t_lparen = LexTok(r'\(', LP)
        t_rparen = LexTok(r'\)', RP)
        t_mul = LexTok(r'\*', Mul)
        t_div = LexTok(r'\/', Div)

        t_num = LexTok(r'[0-9]+', Num, float)
        t_blank = LexTok(r' +', Blank, discard=True)

        root = [
            t_plus, t_minus, t_lparen, t_num, t_blank, t_rparen, t_mul, t_div
        ]

    class CalcGrammar(Grammar):
        r_paren = Rule(LP, Num, RP, type=Num)
        r_div = Rule(Num, Div, Num, type=Num)
        r_mul = Rule(Num, Mul, Num, type=Num)
        o_div = Rule(Div)
        o_mul = Rule(Mul)

        r_plus = Rule(Num, Plus, Num, type=Num, up=(o_mul, o_div))
        r_minus = Rule(Num, Minus, Num, type=Num, up=(o_mul, o_div))

        r_done = Rule(Sof, Num, Eof)
        root = [r_paren, r_plus, r_minus, r_mul, r_div, r_done]

    def plus(self, expr, sign, term):
        return expr.val() + term.val()

    def minus(self, expr, sign, term):
        return expr.val() - term.val()

    def div(self, term, sign, factor):
        return term.val() / factor.val()

    def mul(self, term, sign, factor):
        return term.val() * factor.val()

    def paren(self, left, expression, right):
        return expression.val()

    def done(self, sof, num, eof):
        print('Result:', num.val())
        return num.val()

    def setUp(self):
        self.lexer = Lexer(self.CalcTokens)
        self.eacc = Eacc(self.CalcGrammar)

        # Link the handles to the patterns.
        self.eacc.add_handle(self.CalcGrammar.r_plus, self.plus)
        self.eacc.add_handle(self.CalcGrammar.r_minus, self.minus)
        self.eacc.add_handle(self.CalcGrammar.r_div, self.div)
        self.eacc.add_handle(self.CalcGrammar.r_mul, self.mul)
        self.eacc.add_handle(self.CalcGrammar.r_paren, self.paren)
        self.eacc.add_handle(self.CalcGrammar.r_done, self.done)

    def test0(self):
        data = '1+2/3*(3*2 - 1) /(1-1-2-3-1+2)*3/ (1 - 2)*10'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
        print('Expr:', data)
        self.assertEqual(ptree[-1].val(), eval(data))

    def test1(self):
        data = '(1+2/3*(3*2 - 1)) + ((1 - 2)*10)'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
        print('Expr:', data)
        self.assertEqual(ptree[-1].val(), eval(data))

    def test2(self):
        data = '((1+2/3*(3*2 - 1)) + ((1 - 2)*10))'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
        print('Expr:', data)
        self.assertEqual(ptree[-1].val(), eval(data))

    def test3(self):
        data = '(1/2) * (3/4) * (5/2/3/5/2*1)/((((((1))))))'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
        print('Expr:', data)
        self.assertEqual(ptree[-1].val(), eval(data))

    def test4(self):
        data = '(1/2) * (3/4) * (5 2)'
        tokens = self.lexer.feed(data)
        print('Expr:', data)
        ptree = self.eacc.build(tokens)

        with self.assertRaises(EaccError):
            ptree = list(ptree)

    def test4(self):
        data = '(1/2) * 3/4) * (512)'
        tokens = self.lexer.feed(data)
        print('Expr:', data)
        ptree = self.eacc.build(tokens)

        with self.assertRaises(EaccError):
            ptree = list(ptree)

    def test5(self):
        data = '1+2*2/2 - 2/2 - 2*2/2+1'
        tokens = self.lexer.feed(data)
        ptree = self.eacc.build(tokens)
        ptree = list(ptree)
        print('Expr:', data)
        self.assertEqual(ptree[-1].val(), eval(data))