Python compile Examples, jsparagus.gen.compile Python Examples

Example #1

0

Show file

File: test.py Project: evilpie/jsparagus

    def testNegativeLookahead(self):
        tokenize = lexer.LexicalGrammar('a b')
        rules = {
            'goal': [
                [LookaheadRule(frozenset({'a'}), False), 'abs'],
            ],
            'abs': [
                ['a'],
                ['b'],
                ['abs', 'a'],
                ['abs', 'b'],
            ],
        }

        parse = gen.compile(Grammar(rules))
        self.assertRaisesRegex(SyntaxError,
                               r"expected 'b', got 'a'",
                               lambda: parse(tokenize, "a b"))
        self.assertEqual(
            parse(tokenize, 'b a'),
            ('goal', ('abs 2', 'b', 'a'))
        )

        # In simple cases like this, the lookahead restriction can even
        # disambiguate a grammar that would otherwise be ambiguous.
        rules['goal'].append(prod(['a'], 'goal_a'))
        parse = gen.compile(Grammar(rules))
        self.assertEqual(
            parse(tokenize, 'a'),
            ('goal_a', 'a')
        )

Example #2

0

Show file

File: test.py Project: evilpie/jsparagus

    def disabledNegativeLookaheadDisambiguation(self):
        tokenize = lexer.LexicalGrammar(
            '( ) { } ; function =',
            IDENT=r'[A-Za-z_][A-Za-z_0-9]*')
        grammar = Grammar({
            'stmts': [
                ['stmt'],
                ['stmts', 'stmt'],
            ],
            'stmt': [
                [LookaheadRule(set=frozenset({'function'}), positive=False),
                 'expr', ';'],
                ['fndecl'],
            ],
            'fndecl': [
                ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'],
            ],
            'expr': [
                ['term'],
                ['IDENT', '=', 'expr'],
            ],
            'term': [
                ['(', 'expr', ')'],
                ['fndecl'],
                ['term', '(', 'expr', ')'],
            ],
        })
        parse = gen.compile(grammar)

        # Test that without the lookahead restriction, we reject this grammar
        # (it's ambiguous):
        del grammar['stmt'][0][0]
        self.assertRaisesRegex(ValueError,
                               'banana',
                               lambda: gen.compile(grammar))

        self.assertEqual(
            parse(tokenize, 'function f() { x = function y() {}; }'),
            ('stmt', 1,
                ('fndecl',
                    'function', 'f', '(', ')', '{',
                    ('stmt', 0,
                        ('expr', 1,
                            'x',
                            '=',
                            ('expr', 0,
                                ('term', 1,
                                    ('fndecl',
                                        'function', 'y', '(', ')',
                                        '{', None, '}')))),
                        ';'))))

        self.assertEqual(
            parse(tokenize, '(function g(){});'),
            ('stmts', 0,
                ('stmt', 0,
                    ('term', 1,
                        ('fndecl',
                            'function', 'g', '(', ')', '{', None, '}')),
                    ';')))

Example #3

0

Show file

File: test.py Project: evilpie/jsparagus

 def testList(self):
     list_grammar = Grammar({
         'prelist': [
             ['word', 'list']
         ],
         'list': [
             ['word'],
             ['list', 'word'],
         ],
         'word': [
             ['SYMBOL']
         ],
     })
     parse = gen.compile(list_grammar)
     self.assertEqual(
         parse(LispTokenizer,
               "the quick brown fox jumped over the lazy dog"),
         ('prelist',
             'the',
             ('list 1',
                 ('list 1',
                     ('list 1',
                         ('list 1',
                             ('list 1',
                                 ('list 1',
                                     ('list 1',
                                         'quick',
                                         'brown'),
                                     'fox'),
                                 'jumped'),
                             'over'),
                         'the'),
                     'lazy'),
                 'dog')))

Example #4

0

Show file

File: test.py Project: evilpie/jsparagus

    def testDeepRecursion(self):
        grammar = Grammar({
            'expr': [
                ['SYMBOL'],
                ['(', ')'],
                ['(', 'exprs', ')'],
            ],
            'exprs': [
                ['expr'],
                ['exprs', 'expr'],
            ],
        })
        parse = gen.compile(grammar)

        N = 3000
        s = "x"
        t = ('expr 0', 'x')
        for i in range(N):
            s = "(" + s + ")"
            t = ('expr 2', '(', t, ')')

        result = parse(LispTokenizer, s)

        # Python can't check that result == t; it causes a RecursionError.
        # Testing that repr(result) == repr(t), same deal. So:
        for i in range(N):
            self.assertIsInstance(result, tuple)
            self.assertEqual(len(result), 4)
            self.assertEqual(result[0], 'expr 2')
            self.assertEqual(result[1], '(')
            self.assertEqual(result[3], ')')
            result = result[2]

Example #5

0

Show file

File: test.py Project: evilpie/jsparagus

    def testSimple(self):
        grammar = Grammar({
            'expr': [
                ['SYMBOL'],
                ['(', 'tail'],
            ],
            'tail': [
                [')'],
                ['expr', 'tail'],
            ],
        })
        parse = gen.compile(grammar)

        parsed = parse(LispTokenizer, "(lambda (x) (* x x))")
        self.assertEqual(
            parsed,
            ('expr 1',
                '(',
                ('tail 1',
                    'lambda',
                    ('tail 1',
                        ('expr 1', '(', ('tail 1', 'x', ')')),
                        ('tail 1',
                            ('expr 1',
                                '(',
                                ('tail 1',
                                    '*',
                                    ('tail 1',
                                        'x',
                                        ('tail 1', 'x', ')')))),
                            ')')))))

Example #6

0

Show file

File: test.py Project: evilpie/jsparagus

    def testLeftFactorMultiLevel(self):
        """Test left-factoring again on a nonterminal introduced by
        left-factoring."""
        tokenize = lexer.LexicalGrammar("FOR IN TO BY ( ) = ;",
                                        VAR=r'[A-Za-z]+')

        # The first left-factoring pass on `stmt` will left-factor `FOR ( VAR`.
        # A second pass is needed to left-factor `= expr TO expr`.
        grammar = Grammar({
            'stmt': [
                ['expr', ';'],
                ['FOR', '(', 'VAR', 'IN', 'expr', ')', 'stmt'],
                ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', ')', 'stmt'],
                ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr',
                 'BY', 'expr', ')', 'stmt'],
                ['IF', '(', 'expr', ')', 'stmt'],
            ],
            'expr': [
                ['VAR'],
            ],
        })
        parse = gen.compile(grammar)
        self.assertEqual(
            parse(tokenize, "FOR (x IN y) z;"),
            ('stmt 1', 'FOR', '(', 'x', 'IN', 'y', ')',
             ('stmt 0', 'z', ';')))
        self.assertEqual(
            parse(tokenize, "FOR (x = y TO z) x;"),
            ('stmt 2', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', ')',
             ('stmt 0', 'x', ';')))
        self.assertEqual(
            parse(tokenize, "FOR (x = y TO z BY w) x;"),
            ('stmt 3', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', 'BY', 'w', ')',
             ('stmt 0', 'x', ';')))

Example #7

0

Show file

File: test.py Project: evilpie/jsparagus

 def testTrailingLookahead(self):
     """Lookahead at the end of a production is banned."""
     grammar = gen.Grammar({
         'stmt': [
             ['OTHER', ';'],
             ['IF', '(', 'X', ')', 'stmt',
              LookaheadRule(frozenset({'ELSE'}), False)],
             ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'],
         ],
     })
     self.assertRaisesRegex(
         ValueError,
         r"invalid grammar: lookahead restriction at end of production",
         lambda: gen.compile(grammar))

Example #8

0

Show file

File: test.py Project: evilpie/jsparagus

 def testLeftFactorMulti(self):
     """Test left-factoring with common prefix of length >1."""
     tokenize = lexer.LexicalGrammar("A B C D E")
     grammar = Grammar({
         'goal': [
             ['A', 'B', 'C', 'D'],
             ['A', 'B', 'C', 'E'],
         ],
     })
     parse = gen.compile(grammar)
     self.assertEqual(
         parse(tokenize, "A B C D"),
         ('goal 0', 'A', 'B', 'C', 'D'))
     self.assertEqual(
         parse(tokenize, "A B C E"),
         ('goal 1', 'A', 'B', 'C', 'E'))

Example #9

0

Show file

    def compile_as_js(
        self,
        grammar_source: str,
        goals: typing.Optional[typing.Iterable[str]] = None,
        verbose: bool = False,
    ) -> None:
        """Like self.compile(), but generate a parser from ESGrammar,
        with ASI support, using the JS lexer.
        """
        from js_parser.lexer import JSLexer
        from js_parser import load_es_grammar
        from js_parser import generate_js_parser_tables

        grammar = parse_esgrammar(
            grammar_source,
            filename="es-simplified.esgrammar",
            extensions=[],
            goals=goals,
            synthetic_terminals=load_es_grammar.ECMASCRIPT_SYNTHETIC_TERMINALS,
            terminal_names=load_es_grammar.TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR
        )
        grammar = generate_js_parser_tables.hack_grammar(grammar)
        base_parser_class = gen.compile(grammar, verbose=verbose)

        # "type: ignore" because poor mypy can't cope with the runtime codegen
        # we're doing here.
        class JSParser(base_parser_class):  # type: ignore
            def __init__(self, goal='Script', builder=None):
                super().__init__(goal, builder)
                self._goal = goal
                # self.debug = True

            def clone(self):
                return JSParser(self._goal, self.methods)

            def on_recover(self, error_code, lexer, stv):
                """Check that ASI error recovery is really acceptable."""
                if error_code == 'asi':
                    if not self.closed and stv.term != '}' and not lexer.saw_line_terminator(
                    ):
                        lexer.throw("missing semicolon")
                else:
                    assert error_code == 'do_while_asi'

        self.tokenize = JSLexer
        self.parser_class = JSParser

Example #10

0

Show file

File: test.py Project: evilpie/jsparagus

    def testArithmetic(self):
        tokenize = lexer.LexicalGrammar(
            "+ - * / ( )",
            NUM=r'[0-9]\w*',
            VAR=r'[A-Za-z]\w*')
        arith_grammar = Grammar({
            'expr': [
                ['term'],
                ['expr', '+', 'term'],
                ['expr', '-', 'term'],
            ],
            'term': [
                ['prim'],
                ['term', '*', 'prim'],
                ['term', '/', 'prim'],
            ],
            'prim': [
                ['NUM'],
                ['VAR'],
                ['(', 'expr', ')'],
            ],
        })
        parse = gen.compile(arith_grammar)

        self.assertEqual(
            parse(tokenize, '2 * 3 + 4 * (5 + 7)'),
            ('expr 1',
                ('term 1', '2', '*', '3'),
                '+',
                ('term 1',
                    '4',
                    '*',
                    ('prim 2',
                        '(',
                        ('expr 1', '5', '+', '7'),
                        ')'))))

        self.assertRaisesRegex(
            SyntaxError,
            r"unexpected end of input",
            lambda: parse(tokenize, "("))
        self.assertRaisesRegex(
            SyntaxError,
            r"expected one of \['\(', 'NUM', 'VAR'], got '\)'",
            lambda: parse(tokenize, ")"))

Example #11

0

Show file

File: test.py Project: evilpie/jsparagus

 def testOptionalEmpty(self):
     tokenize = lexer.LexicalGrammar("X Y")
     grammar = Grammar({
         'a': [
             [Optional('b'), Optional('c')],
         ],
         'b': [
             prod(['X'], 'b'),
         ],
         'c': [
             prod(['Y'], 'c'),
         ]
     })
     parse = gen.compile(grammar)
     self.assertEqual(parse(tokenize, ""), ('a', None, None))
     self.assertEqual(parse(tokenize, "X"), ('a', ('b', 'X'), None))
     self.assertEqual(parse(tokenize, "Y"), ('a', None, ('c', 'Y')))
     self.assertEqual(parse(tokenize, "X Y"), ('a', ('b', 'X'), ('c', 'Y')))

Example #12

0

Show file

File: test.py Project: evilpie/jsparagus

 def testOptional(self):
     tokenize = lexer.LexicalGrammar('[ ] , X')
     grammar = Grammar({
         'array': [
             ['[', Optional('elision'), ']'],
             ['[', 'elements', ']'],
             ['[', 'elements', ',', Optional('elision'), ']']
         ],
         'elements': [
             [Optional('elision'), 'X'],
             ['elements', ',', Optional('elision'), 'X']
         ],
         'elision': [
             [','],
             ['elision', ',']
         ]
     })
     parse = gen.compile(grammar)
     self.assertEqual(parse(tokenize, "[]"),
                      ('array 0', '[', None, ']'))
     self.assertEqual(parse(tokenize, "[,]"),
                      ('array 0', '[', ',', ']'))
     self.assertEqual(
         parse(tokenize, "[,,X,,X,]"),
         ('array 2',
             '[',
             ('elements 1',
                 ('elements 0',
                     ('elision 1',
                         ',',
                         ','),
                     'X'),
                 ',',
                 ',',
                 'X'),
             ',',
             None,
             ']'))

Example #13

0

Show file

File: test.py Project: evilpie/jsparagus

 def compile(self, tokenize, grammar):
     """Compile a grammar. Use this when you expect compilation to
     succeed."""
     self.tokenize = tokenize
     self.parse = gen.compile(grammar)

Example #14

0

Show file

    # prose not wrapped in square brackets
    # To avoid conflict with the `>` token, this is recognized only after a space.
    PROSE=r'(?<= )>[^\n]*',

    # prose wrapped in square brackets
    WPROSE=r'\[>[^]]*\]',

    # expression denoting a matched terminal or nonterminal
    MATCH_REF=r'\$(?:0|[1-9][0-9]*)',

    # the spec also gives a few productions names
    RUSTCOMMENT=r'//.*\n',
)

ESGrammarParser = gen.compile(
    parse_pgen.load_grammar(
        os.path.join(os.path.dirname(__file__), "esgrammar.pgen")))

SIGIL_FALSE = '~'
SIGIL_TRUE = '+'

# Abbreviations for single-character terminals, used in the lexical grammar.
ECMASCRIPT_CODE_POINTS = {
    # From <https://tc39.es/ecma262/#table-31>
    '<ZWNJ>': grammar.Literal('\u200c'),
    '<ZWJ>': grammar.Literal('\u200d'),
    '<ZWNBSP>': grammar.Literal('\ufeff'),

    # From <https://tc39.es/ecma262/#table-32>
    '<TAB>': grammar.Literal('\t'),
    '<VT>': grammar.Literal('\u000b'),

Example #15

0

Show file

File: parse_esgrammar.py Project: evilpie/jsparagus

    # nonterminals wrapped in vertical bars for no apparent reason
    NTALT=r'\|[A-Z]\w+\|',

    # the spec also gives a few productions names
    PRODID=r'#[A-Za-z]\w*',

    # prose to the end of the line
    PROSE=r'>.*',

    # prose wrapped in square brackets
    WPROSE=r'\[>[^]]*\]',
)

parse_esgrammar_generic = gen.compile(
    parse_pgen.load_grammar(
        os.path.join(os.path.dirname(__file__), "esgrammar.pgen")))

SIGIL_FALSE = '~'
SIGIL_TRUE = '+'

# Productions like
#
#     Expression : AssignmentExpression
#     PrimaryExpression : ArrayLiteral
#     Statement : IfStatement
#
# should not cause an extra method call; the action for each of these
# productions should be `$0`, i.e. just return the right-hand side unchanged.
# Then type inference will make sure that the two nonterminals (Statement and
# IfStatement, for example) are given the same type.