Ejemplo n.º 1
0
    def test_multiline_pattern(self):
        grammar = r'''
            start =
            /(?x)
            foo
            bar
            / $ ;
        '''
        model = compile(grammar=trim(grammar))
        print(codegen(model.rules[0].exp.sequence[0]))
        self.assertEqual(
            codegen(model.rules[0].exp.sequence[0]),
            urepr("self._pattern('(?x)\nfoo\nbar\n')").strip('"\'')
        )

        grammar = r'''
            start =
            /(?x)foo\nbar
            blort/ $ ;
        '''
        model = compile(grammar=trim(grammar))
        print(codegen(model.rules[0].exp.sequence[0]))
        self.assertEqual(
            trim(codegen(model.rules[0].exp.sequence[0])),
            urepr("self._pattern('(?x)foo\\nbar\nblort')").strip(r'"\.')
        )
Ejemplo n.º 2
0
    def test_right_join(self):
        grammar = r'''
            start
                =
                (op)>{number}+ $
                ;


            op
                =
                '+' | '-'
                ;


            number
                =
                /\d+/
                ;
        '''
        text = '1 + 2 - 3 + 4'

        model = compile(grammar, "test")
        self.assertEqual(trim(grammar).strip(), str(model).strip())
        codegen(model)

        ast = model.parse(text)
        self.assertEqual(('+', '1', ('-', '2', ('+', '3', '4'))), ast)
Ejemplo n.º 3
0
    def test_parseinfo_directive(self):
        grammar = '''
            @@parseinfo
            @@parseinfo :: True

            test = value:"test" $;
        '''
        model = tatsu.compile(grammar, "test")
        ast = model.parse("test")
        self.assertIsNotNone(ast.parseinfo)

        code = codegen(model)
        self.assertTrue('parseinfo=True' in code)
        compile(code, 'test.py', EXEC)

        grammar = '''
            @@parseinfo :: False

            test = value:"test" $;
        '''
        model = tatsu.compile(grammar, "test")
        ast = model.parse("test")
        self.assertIsNone(ast.parseinfo)

        code = codegen(model)
        self.assertTrue('parseinfo=False' in code)
        compile(code, 'test.py', EXEC)
Ejemplo n.º 4
0
 def test_empty_closure(self):
     grammar = '''
         start = {'x'}+ {} 'y'$;
     '''
     model = compile(grammar, "test")
     codegen(model)
     ast = model.parse("xxxy", nameguard=False)
     self.assertEqual([['x', 'x', 'x'], [], 'y'], ast)
Ejemplo n.º 5
0
    def test_normal_join(self):
        grammar = '''
            start = ','%{'x' 'y'} 'z' ;
        '''

        model = compile(grammar, "test")
        codegen(model)

        ast = model.parse("x y, x y z", nameguard=False)
        self.assertEqual(([['x', 'y'], ',', ['x', 'y']], 'z'), ast)

        ast = model.parse("x y z", nameguard=False)
        self.assertEqual(([['x', 'y']], 'z'), ast)

        ast = model.parse("z", nameguard=False)
        self.assertEqual(([], 'z'), ast)
Ejemplo n.º 6
0
    def test_check_keywords(self):
        import parser

        grammar = '''
            @@keyword :: A

            start = {id}+ $ ;

            @name
            id = /\w+/ ;
        '''
        model = compile(grammar, 'test')
        c = codegen(model)
        parser.suite(c)

        ast = model.parse('hello world')
        self.assertEqual(['hello', 'world'], ast)

        try:
            ast = model.parse("hello A world")
            self.assertEqual(['hello', 'A', 'world'], ast)
            self.fail('accepted keyword as name')
        except FailedParse as e:
            self.assertTrue('"A" is a reserved word' in str(e))
            pass
Ejemplo n.º 7
0
    def test_sparse_keywords(self):
        grammar = r'''
            @@keyword :: A

            @@ignorecase :: False

            start = {id}+ $ ;

            @@keyword :: B

            @name
            id = /\w+/ ;
        '''
        model = compile(grammar, 'test', trace=False, colorize=True)
        c = codegen(model)
        parse(c)

        ast = model.parse('hello world')
        self.assertEqual(['hello', 'world'], ast)

        for k in ('A', 'B'):
            try:
                ast = model.parse("hello %s world" % k)
                self.assertEqual(['hello', k, 'world'], ast)
                self.fail('accepted keyword "%s" as name' % k)
            except FailedParse as e:
                self.assertTrue('"%s" is a reserved word' % k in str(e))
Ejemplo n.º 8
0
    def test_36_unichars(self):
        grammar = '''
            start = { rule_positional | rule_keywords | rule_all }* $ ;

            rule_positional("ÄÖÜäöüß") = 'a' ;

            rule_keywords(k1='äöüÄÖÜß') = 'b' ;

            rule_all('ßÄÖÜäöü', k1="ßäöüÄÖÜ") = 'c' ;
        '''

        def _trydelete(pymodule):
            import os
            try:
                os.unlink(pymodule + ".py")
            except EnvironmentError:
                pass
            try:
                os.unlink(pymodule + ".pyc")
            except EnvironmentError:
                pass
            try:
                os.unlink(pymodule + ".pyo")
            except EnvironmentError:
                pass

        def assert_equal(target, value):
            self.assertEqual(target, value)

        class UnicharsSemantics(object):
            """Check all rule parameters for expected types and values"""
            def rule_positional(self, ast, p1):
                assert_equal("ÄÖÜäöüß", p1)
                return ast

            def rule_keyword(self, ast, k1):
                assert_equal("äöüÄÖÜß", k1)
                return ast

            def rule_all(self, ast, p1, k1):
                assert_equal("ßÄÖÜäöü", p1)
                assert_equal("ßäöüÄÖÜ", k1)
                return ast

        m = compile(grammar, "UnicodeRuleArguments")
        ast = m.parse("a b c")
        self.assertEqual(['a', 'b', 'c'], ast)

        semantics = UnicharsSemantics()
        ast = m.parse("a b c", semantics=semantics)
        self.assertEqual(['a', 'b', 'c'], ast)

        code = codegen(m)
        import codecs
        with codecs.open("tc36unicharstest.py", "w", "utf-8") as f:
            f.write(code)
        import tc36unicharstest
        tc36unicharstest
        _trydelete("tc36unicharstest")
Ejemplo n.º 9
0
    def test_define_keywords(self):
        grammar = '''
            @@keyword :: B C
            @@keyword :: 'A'

            start = ('a' 'b').{'x'}+ ;
        '''
        model = compile(grammar, "test")
        c = codegen(model)
        parse(c)

        grammar2 = str(model)
        model2 = compile(grammar2, "test")
        c2 = codegen(model2)
        parse(c2)

        self.assertEqual(grammar2, str(model2))
Ejemplo n.º 10
0
    def test_whitespace_directive(self):
        grammar = '''
            @@whitespace :: /[\t ]+/

            test = "test" $;
        '''
        model = tatsu.compile(grammar, "test")
        code = codegen(model)
        compile('test.py', code, EXEC)
Ejemplo n.º 11
0
    def test_eol_comments_re_directive(self):
        grammar = '''
            @@eol_comments :: /#.*?$/

            test = "test" $;
        '''
        model = tatsu.compile(grammar, "test")
        code = codegen(model)
        compile(code, 'test.py', EXEC)
Ejemplo n.º 12
0
    def test_failed_ref(self):
        grammar = """
            final = object;
            type = /[^\s=()]+/;
            object = '('type')' '{' @:{pair} {',' @:{pair}}* [','] '}';
            pair = key '=' value;
            list = '('type')' '[' @:{object} {',' @:{object}}* [','] ']';
            key = /[^\s=]+/;
            value = @:(string|list|object|unset|boolean|number|null) [','];
            null = '('type')' @:{ 'null' };
            boolean = /(true|false)/;
            unset = '<unset>';
            string = '"' @:/[^"]*/ '"';
            number = /-?[0-9]+/;
        """

        model = compile(grammar, "final")
        codegen(model)
        model.parse('(sometype){boolean = true}')
Ejemplo n.º 13
0
    def test_group_join(self):
        grammar = '''
            start = ('a' 'b')%{'x'}+ ;
        '''
        model = compile(grammar, "test")
        c = codegen(model)
        parse(c)

        ast = model.parse("x a b x", nameguard=False)
        self.assertEqual(['x', ['a', 'b'], 'x'], ast)
Ejemplo n.º 14
0
 def test_keyword_params(self):
     grammar = '''
         start(k1=1, k2=2)
             =
             {'a'} $
             ;
     '''
     g = GrammarGenerator('Keywords')
     model = g.parse(grammar)
     code = codegen(model)
     self.assertEqual('#!/usr/bin/env python', code.splitlines()[0])
Ejemplo n.º 15
0
    def test_group_gather(self):
        grammar = '''
            start = ('a' 'b').{'x'}+ ;
        '''
        model = compile(grammar, "test")
        c = codegen(model)
        import parser
        parser.suite(c)

        ast = model.parse("x a b x", nameguard=False)
        self.assertEqual(['x', 'x'], ast)
Ejemplo n.º 16
0
    def test_left_recursion_directive(self):
        grammar = '''
            @@left_recursion :: False

            test = "test" $;
        '''
        model = tatsu.compile(grammar, "test")
        self.assertFalse(model.directives.get('left_recursion'))
        self.assertFalse(model.config.left_recursion)

        code = codegen(model)
        compile('test.py', code, EXEC)
Ejemplo n.º 17
0
    def test_positive_gather(self):
        grammar = '''
            start = ','.{'x' 'y'}+ ;
        '''

        grammar2 = '''
            start = (','.{'x'}+|{}) ;
        '''

        grammar3 = '''
            start = [','.{'x'}+] ;
        '''

        model = compile(grammar, "test")
        codegen(model)
        ast = model.parse("x y, x y", nameguard=False)
        self.assertEqual([['x', 'y'], ['x', 'y']], ast)
        ast = model.parse("x y x y", nameguard=False)
        self.assertEqual([['x', 'y']], ast)
        try:
            ast = model.parse("y x", nameguard=False)
            self.fail('closure not positive')
        except FailedParse:
            pass

        model = compile(grammar2, "test")
        ast = model.parse("y x", nameguard=False)
        self.assertEqual([], ast)
        ast = model.parse("x", nameguard=False)
        self.assertEqual(['x'], ast)
        ast = model.parse("x,x", nameguard=False)
        self.assertEqual(['x', 'x'], ast)

        model = compile(grammar3, "test")
        ast = model.parse("y x", nameguard=False)
        self.assertEqual(None, ast)
Ejemplo n.º 18
0
    def test_grammar_directive(self):
        grammar = '''
            @@grammar :: Test

            start = test $;
            test = "test";
        '''
        model = tatsu.compile(grammar=grammar)
        self.assertEqual('Test', model.directives.get('grammar'))
        self.assertEqual('Test', model.name)

        code = codegen(model)
        module = compile(code, 'test.py', EXEC)

        assert 'TestParser' in module.co_names
Ejemplo n.º 19
0
Archivo: test.py Proyecto: kkoci/TatSu
def main():
    grammar = regex_parser.translate('(a|b)*')
    model = tatsu.compile(grammar, 'Regexp')
    model.parse('aaabbaba', 'S0')
    try:
        model.parse('aaaCbbaba', 'S0')
        raise Exception('Should not have parsed!')
    except tatsu.exceptions.FailedParse:
        pass
    print('Grammar:', file=sys.stderr)
    print(grammar)
    sys.stdout.flush()
    with open(PARSER_FILENAME, 'w') as f:
        f.write(codegen(model))
    print('Generated parser saved as:', PARSER_FILENAME, file=sys.stderr)
    print(file=sys.stderr)
Ejemplo n.º 20
0
    def test_36_param_combinations(self):
        def assert_equal(target, value):
            self.assertEqual(target, value)

        class TC36Semantics:

            """Check all rule parameters for expected types and values"""

            def rule_positional(self, ast, p1, p2, p3, p4):
                assert_equal("ABC", p1)
                assert_equal(123, p2)
                assert_equal('=', p3)
                assert_equal("+", p4)
                return ast

            def rule_keyword(self, ast, k1, k2, k3, k4):
                assert_equal("ABC", k1)
                assert_equal(123, k2)
                assert_equal('=', k3)
                assert_equal('+', k4)
                return ast

            def rule_all(self, ast, p1, p2, p3, p4, k1, k2, k3, k4):
                assert_equal("DEF", p1)
                assert_equal(456, p2)
                assert_equal('=', p3)
                assert_equal("+", p4)
                assert_equal("HIJ", k1)
                assert_equal(789, k2)
                assert_equal('=', k3)
                assert_equal('+', k4)
                return ast

        grammar = '''
            @@ignorecase::False
            @@nameguard

            start
                = {rule_positional | rule_keywords | rule_all} $ ;
            rule_positional('ABC', 123, '=', '+')
                = 'a' ;
            rule_keywords(k1=ABC, k3='=', k4='+', k2=123)
                = 'b' ;
            rule_all('DEF', 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789)
                = 'c' ;
        '''

        pretty = '''
            @@ignorecase :: False
            @@nameguard :: True

            start
                =
                {rule_positional | rule_keywords | rule_all} $
                ;


            rule_positional(ABC, 123, '=', '+')
                =
                'a'
                ;


            rule_keywords(k1=ABC, k3='=', k4='+', k2=123)
                =
                'b'
                ;


            rule_all(DEF, 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789)
                =
                'c'
                ;
        '''

        model = compile(grammar, 'RuleArguments')
        self.assertEqual(trim(pretty), str(model))
        model = compile(pretty, 'RuleArguments')

        ast = model.parse("a b c")
        self.assertEqual(['a', 'b', 'c'], ast)
        semantics = TC36Semantics()
        ast = model.parse("a b c", semantics=semantics)
        self.assertEqual(['a', 'b', 'c'], ast)
        codegen(model)
Ejemplo n.º 21
0
    def test_bootstrap(self):
        print()

        if os.path.isfile('./tmp/00.ast'):
            shutil.rmtree('./tmp')
        if not os.path.isdir('./tmp'):
            os.mkdir('./tmp')
        print('-' * 20, 'phase 00 - parse using the bootstrap grammar')
        with open('grammar/tatsu.ebnf') as f:
            text = str(f.read())
        g = EBNFParser('EBNFBootstrap')
        grammar0 = g.parse(text)
        ast0 = json.dumps(asjson(grammar0), indent=2)
        with open('./tmp/00.ast', 'w') as f:
            f.write(ast0)

        print('-' * 20, 'phase 01 - parse with parser generator')
        with open('grammar/tatsu.ebnf') as f:
            text = str(f.read())
        g = GrammarGenerator('EBNFBootstrap')
        g.parse(text)

        generated_grammar1 = str(g.ast['start'])
        with open('./tmp/01.ebnf', 'w') as f:
            f.write(generated_grammar1)

        print('-' * 20, 'phase 02 - parse previous output with the parser generator')
        with open('./tmp/01.ebnf') as f:
            text = str(f.read())
        g = GrammarGenerator('EBNFBootstrap')
        g.parse(text)
        generated_grammar2 = str(g.ast['start'])
        with open('./tmp/02.ebnf', 'w') as f:
            f.write(generated_grammar2)
        self.assertEqual(generated_grammar2, generated_grammar1)

        print('-' * 20, 'phase 03 - repeat')
        with open('./tmp/02.ebnf') as f:
            text = f.read()
        g = EBNFParser('EBNFBootstrap')
        ast3 = g.parse(text)
        with open('./tmp/03.ast', 'w') as f:
            f.write(json.dumps(asjson(ast3), indent=2))

        print('-' * 20, 'phase 04 - repeat')
        with open('./tmp/02.ebnf') as f:
            text = f.read()
        g = GrammarGenerator('EBNFBootstrap')
        g.parse(text)
        parser = g.ast['start']
    #    pprint(parser.first_sets, indent=2, depth=3)
        generated_grammar4 = str(parser)
        with open('./tmp/04.ebnf', 'w') as f:
            f.write(generated_grammar4)
        self.assertEqual(generated_grammar4, generated_grammar2)

        print('-' * 20, 'phase 05 - parse using the grammar model')
        with open('./tmp/04.ebnf') as f:
            text = f.read()
        ast5 = parser.parse(text)
        with open('./tmp/05.ast', 'w') as f:
            f.write(json.dumps(asjson(ast5), indent=2))

        print('-' * 20, 'phase 06 - generate parser code')
        gencode6 = codegen(parser)
        with open('./tmp/g06.py', 'w') as f:
            f.write(gencode6)

        print('-' * 20, 'phase 07 - import generated code')
        py_compile.compile('./tmp/g06.py', doraise=True)
        # g06 = __import__('g06')
        # GenParser = g06.EBNFBootstrapParser

        # print('-' * 20, 'phase 08 - compile using generated code')
        # parser = GenParser(trace=False)
        # result = parser.parse(
        #     text,
        #     'start',
        #     comments_re=COMMENTS_RE,
        #     eol_comments_re=EOL_COMMENTS_RE
        # )
        # self.assertEqual(result, parser.ast['start'])
        # ast8 = parser.ast['start']
        # json8 = json.dumps(asjson(ast8), indent=2)
        # open('./tmp/08.ast', 'w').write(json8)
        # self.assertEqual(ast5, ast8)

        print('-' * 20, 'phase 09 - Generate parser with semantics')
        with open('grammar/tatsu.ebnf') as f:
            text = f.read()
        parser = GrammarGenerator('EBNFBootstrap')
        g9 = parser.parse(text)
        generated_grammar9 = str(g9)
        with open('./tmp/09.ebnf', 'w') as f:
            f.write(generated_grammar9)
        self.assertEqual(generated_grammar9, generated_grammar1)

        print('-' * 20, 'phase 10 - Parse with a model using a semantics')
        g10 = g9.parse(
            text,
            start_rule='start',
            semantics=EBNFGrammarSemantics('EBNFBootstrap')
        )
        generated_grammar10 = str(g10)
        with open('./tmp/10.ebnf', 'w') as f:
            f.write(generated_grammar10)
        gencode10 = codegen(g10)
        with open('./tmp/g10.py', 'w') as f:
            f.write(gencode10)

        print('-' * 20, 'phase 11 - Pickle the model and try again.')
        with open('./tmp/11.tatsu', 'wb') as f:
            pickle.dump(g10, f, protocol=2)
        with open('./tmp/11.tatsu', 'rb') as f:
            g11 = pickle.load(f)
        r11 = g11.parse(
            text,
            start_rule='start',
            semantics=EBNFGrammarSemantics('EBNFBootstrap')
        )
        with open('./tmp/11.ebnf', 'w') as f:
            f.write(str(g11))
        gencode11 = codegen(r11)
        with open('./tmp/g11.py', 'w') as f:
            f.write(gencode11)

        print('-' * 20, 'phase 12 - Walker')

        class PrintNameWalker(DepthFirstWalker):
            def __init__(self):
                self.walked = []

            def walk_default(self, o, children):
                self.walked.append(o.__class__.__name__)

        v = PrintNameWalker()
        v.walk(g11)
        with open('./tmp/12.txt', 'w') as f:
            f.write('\n'.join(v.walked))

        # note: pygraphviz not yet updated
        if sys.version_info >= (3, 7):
            return

        print('-' * 20, 'phase 13 - Graphics')
        try:
            from tatsu.diagrams import draw
        except ImportError:
            print('PyGraphViz not found!')
        else:
            if not util.PY37:
                draw('./tmp/13.png', g11)