Python compile Exemples, jsparagus.gen.compile Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def testNegativeLookahead(self):
        tokenize = lexer.LexicalGrammar('a b')
        rules = {
            'goal': [
                [LookaheadRule(frozenset({'a'}), False), 'abs'],
            ],
            'abs': [
                ['a'],
                ['b'],
                ['abs', 'a'],
                ['abs', 'b'],
            ],
        }

        parse = gen.compile(Grammar(rules))
        self.assertRaisesRegex(SyntaxError,
                               r"expected 'b', got 'a'",
                               lambda: parse(tokenize, "a b"))
        self.assertEqual(
            parse(tokenize, 'b a'),
            ('goal', ('abs 2', 'b', 'a'))
        )

        # In simple cases like this, the lookahead restriction can even
        # disambiguate a grammar that would otherwise be ambiguous.
        rules['goal'].append(prod(['a'], 'goal_a'))
        parse = gen.compile(Grammar(rules))
        self.assertEqual(
            parse(tokenize, 'a'),
            ('goal_a', 'a')
        )

Exemple #2

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def disabledNegativeLookaheadDisambiguation(self):
        tokenize = lexer.LexicalGrammar(
            '( ) { } ; function =',
            IDENT=r'[A-Za-z_][A-Za-z_0-9]*')
        grammar = Grammar({
            'stmts': [
                ['stmt'],
                ['stmts', 'stmt'],
            ],
            'stmt': [
                [LookaheadRule(set=frozenset({'function'}), positive=False),
                 'expr', ';'],
                ['fndecl'],
            ],
            'fndecl': [
                ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'],
            ],
            'expr': [
                ['term'],
                ['IDENT', '=', 'expr'],
            ],
            'term': [
                ['(', 'expr', ')'],
                ['fndecl'],
                ['term', '(', 'expr', ')'],
            ],
        })
        parse = gen.compile(grammar)

        # Test that without the lookahead restriction, we reject this grammar
        # (it's ambiguous):
        del grammar['stmt'][0][0]
        self.assertRaisesRegex(ValueError,
                               'banana',
                               lambda: gen.compile(grammar))

        self.assertEqual(
            parse(tokenize, 'function f() { x = function y() {}; }'),
            ('stmt', 1,
                ('fndecl',
                    'function', 'f', '(', ')', '{',
                    ('stmt', 0,
                        ('expr', 1,
                            'x',
                            '=',
                            ('expr', 0,
                                ('term', 1,
                                    ('fndecl',
                                        'function', 'y', '(', ')',
                                        '{', None, '}')))),
                        ';'))))

        self.assertEqual(
            parse(tokenize, '(function g(){});'),
            ('stmts', 0,
                ('stmt', 0,
                    ('term', 1,
                        ('fndecl',
                            'function', 'g', '(', ')', '{', None, '}')),
                    ';')))

Exemple #3

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def testList(self):
     list_grammar = Grammar({
         'prelist': [
             ['word', 'list']
         ],
         'list': [
             ['word'],
             ['list', 'word'],
         ],
         'word': [
             ['SYMBOL']
         ],
     })
     parse = gen.compile(list_grammar)
     self.assertEqual(
         parse(LispTokenizer,
               "the quick brown fox jumped over the lazy dog"),
         ('prelist',
             'the',
             ('list 1',
                 ('list 1',
                     ('list 1',
                         ('list 1',
                             ('list 1',
                                 ('list 1',
                                     ('list 1',
                                         'quick',
                                         'brown'),
                                     'fox'),
                                 'jumped'),
                             'over'),
                         'the'),
                     'lazy'),
                 'dog')))

Exemple #4

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def testDeepRecursion(self):
        grammar = Grammar({
            'expr': [
                ['SYMBOL'],
                ['(', ')'],
                ['(', 'exprs', ')'],
            ],
            'exprs': [
                ['expr'],
                ['exprs', 'expr'],
            ],
        })
        parse = gen.compile(grammar)

        N = 3000
        s = "x"
        t = ('expr 0', 'x')
        for i in range(N):
            s = "(" + s + ")"
            t = ('expr 2', '(', t, ')')

        result = parse(LispTokenizer, s)

        # Python can't check that result == t; it causes a RecursionError.
        # Testing that repr(result) == repr(t), same deal. So:
        for i in range(N):
            self.assertIsInstance(result, tuple)
            self.assertEqual(len(result), 4)
            self.assertEqual(result[0], 'expr 2')
            self.assertEqual(result[1], '(')
            self.assertEqual(result[3], ')')
            result = result[2]

Exemple #5

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def testSimple(self):
        grammar = Grammar({
            'expr': [
                ['SYMBOL'],
                ['(', 'tail'],
            ],
            'tail': [
                [')'],
                ['expr', 'tail'],
            ],
        })
        parse = gen.compile(grammar)

        parsed = parse(LispTokenizer, "(lambda (x) (* x x))")
        self.assertEqual(
            parsed,
            ('expr 1',
                '(',
                ('tail 1',
                    'lambda',
                    ('tail 1',
                        ('expr 1', '(', ('tail 1', 'x', ')')),
                        ('tail 1',
                            ('expr 1',
                                '(',
                                ('tail 1',
                                    '*',
                                    ('tail 1',
                                        'x',
                                        ('tail 1', 'x', ')')))),
                            ')')))))

Exemple #6

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def testLeftFactorMultiLevel(self):
        """Test left-factoring again on a nonterminal introduced by
        left-factoring."""
        tokenize = lexer.LexicalGrammar("FOR IN TO BY ( ) = ;",
                                        VAR=r'[A-Za-z]+')

        # The first left-factoring pass on `stmt` will left-factor `FOR ( VAR`.
        # A second pass is needed to left-factor `= expr TO expr`.
        grammar = Grammar({
            'stmt': [
                ['expr', ';'],
                ['FOR', '(', 'VAR', 'IN', 'expr', ')', 'stmt'],
                ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', ')', 'stmt'],
                ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr',
                 'BY', 'expr', ')', 'stmt'],
                ['IF', '(', 'expr', ')', 'stmt'],
            ],
            'expr': [
                ['VAR'],
            ],
        })
        parse = gen.compile(grammar)
        self.assertEqual(
            parse(tokenize, "FOR (x IN y) z;"),
            ('stmt 1', 'FOR', '(', 'x', 'IN', 'y', ')',
             ('stmt 0', 'z', ';')))
        self.assertEqual(
            parse(tokenize, "FOR (x = y TO z) x;"),
            ('stmt 2', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', ')',
             ('stmt 0', 'x', ';')))
        self.assertEqual(
            parse(tokenize, "FOR (x = y TO z BY w) x;"),
            ('stmt 3', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', 'BY', 'w', ')',
             ('stmt 0', 'x', ';')))

Exemple #7

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def testTrailingLookahead(self):
     """Lookahead at the end of a production is banned."""
     grammar = gen.Grammar({
         'stmt': [
             ['OTHER', ';'],
             ['IF', '(', 'X', ')', 'stmt',
              LookaheadRule(frozenset({'ELSE'}), False)],
             ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'],
         ],
     })
     self.assertRaisesRegex(
         ValueError,
         r"invalid grammar: lookahead restriction at end of production",
         lambda: gen.compile(grammar))

Exemple #8

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def testLeftFactorMulti(self):
     """Test left-factoring with common prefix of length >1."""
     tokenize = lexer.LexicalGrammar("A B C D E")
     grammar = Grammar({
         'goal': [
             ['A', 'B', 'C', 'D'],
             ['A', 'B', 'C', 'E'],
         ],
     })
     parse = gen.compile(grammar)
     self.assertEqual(
         parse(tokenize, "A B C D"),
         ('goal 0', 'A', 'B', 'C', 'D'))
     self.assertEqual(
         parse(tokenize, "A B C E"),
         ('goal 1', 'A', 'B', 'C', 'E'))

Exemple #9

0

Afficher le fichier

    def compile_as_js(
        self,
        grammar_source: str,
        goals: typing.Optional[typing.Iterable[str]] = None,
        verbose: bool = False,
    ) -> None:
        """Like self.compile(), but generate a parser from ESGrammar,
        with ASI support, using the JS lexer.
        """
        from js_parser.lexer import JSLexer
        from js_parser import load_es_grammar
        from js_parser import generate_js_parser_tables

        grammar = parse_esgrammar(
            grammar_source,
            filename="es-simplified.esgrammar",
            extensions=[],
            goals=goals,
            synthetic_terminals=load_es_grammar.ECMASCRIPT_SYNTHETIC_TERMINALS,
            terminal_names=load_es_grammar.TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR
        )
        grammar = generate_js_parser_tables.hack_grammar(grammar)
        base_parser_class = gen.compile(grammar, verbose=verbose)

        # "type: ignore" because poor mypy can't cope with the runtime codegen
        # we're doing here.
        class JSParser(base_parser_class):  # type: ignore
            def __init__(self, goal='Script', builder=None):
                super().__init__(goal, builder)
                self._goal = goal
                # self.debug = True

            def clone(self):
                return JSParser(self._goal, self.methods)

            def on_recover(self, error_code, lexer, stv):
                """Check that ASI error recovery is really acceptable."""
                if error_code == 'asi':
                    if not self.closed and stv.term != '}' and not lexer.saw_line_terminator(
                    ):
                        lexer.throw("missing semicolon")
                else:
                    assert error_code == 'do_while_asi'

        self.tokenize = JSLexer
        self.parser_class = JSParser

Exemple #10

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

    def testArithmetic(self):
        tokenize = lexer.LexicalGrammar(
            "+ - * / ( )",
            NUM=r'[0-9]\w*',
            VAR=r'[A-Za-z]\w*')
        arith_grammar = Grammar({
            'expr': [
                ['term'],
                ['expr', '+', 'term'],
                ['expr', '-', 'term'],
            ],
            'term': [
                ['prim'],
                ['term', '*', 'prim'],
                ['term', '/', 'prim'],
            ],
            'prim': [
                ['NUM'],
                ['VAR'],
                ['(', 'expr', ')'],
            ],
        })
        parse = gen.compile(arith_grammar)

        self.assertEqual(
            parse(tokenize, '2 * 3 + 4 * (5 + 7)'),
            ('expr 1',
                ('term 1', '2', '*', '3'),
                '+',
                ('term 1',
                    '4',
                    '*',
                    ('prim 2',
                        '(',
                        ('expr 1', '5', '+', '7'),
                        ')'))))

        self.assertRaisesRegex(
            SyntaxError,
            r"unexpected end of input",
            lambda: parse(tokenize, "("))
        self.assertRaisesRegex(
            SyntaxError,
            r"expected one of \['\(', 'NUM', 'VAR'], got '\)'",
            lambda: parse(tokenize, ")"))

Exemple #11

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def testOptionalEmpty(self):
     tokenize = lexer.LexicalGrammar("X Y")
     grammar = Grammar({
         'a': [
             [Optional('b'), Optional('c')],
         ],
         'b': [
             prod(['X'], 'b'),
         ],
         'c': [
             prod(['Y'], 'c'),
         ]
     })
     parse = gen.compile(grammar)
     self.assertEqual(parse(tokenize, ""), ('a', None, None))
     self.assertEqual(parse(tokenize, "X"), ('a', ('b', 'X'), None))
     self.assertEqual(parse(tokenize, "Y"), ('a', None, ('c', 'Y')))
     self.assertEqual(parse(tokenize, "X Y"), ('a', ('b', 'X'), ('c', 'Y')))

Exemple #12

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def testOptional(self):
     tokenize = lexer.LexicalGrammar('[ ] , X')
     grammar = Grammar({
         'array': [
             ['[', Optional('elision'), ']'],
             ['[', 'elements', ']'],
             ['[', 'elements', ',', Optional('elision'), ']']
         ],
         'elements': [
             [Optional('elision'), 'X'],
             ['elements', ',', Optional('elision'), 'X']
         ],
         'elision': [
             [','],
             ['elision', ',']
         ]
     })
     parse = gen.compile(grammar)
     self.assertEqual(parse(tokenize, "[]"),
                      ('array 0', '[', None, ']'))
     self.assertEqual(parse(tokenize, "[,]"),
                      ('array 0', '[', ',', ']'))
     self.assertEqual(
         parse(tokenize, "[,,X,,X,]"),
         ('array 2',
             '[',
             ('elements 1',
                 ('elements 0',
                     ('elision 1',
                         ',',
                         ','),
                     'X'),
                 ',',
                 ',',
                 'X'),
             ',',
             None,
             ']'))

Exemple #13

0

Afficher le fichier

Fichier : test.py Projet : evilpie/jsparagus

 def compile(self, tokenize, grammar):
     """Compile a grammar. Use this when you expect compilation to
     succeed."""
     self.tokenize = tokenize
     self.parse = gen.compile(grammar)

Exemple #14

0

Afficher le fichier

    # prose not wrapped in square brackets
    # To avoid conflict with the `>` token, this is recognized only after a space.
    PROSE=r'(?<= )>[^\n]*',

    # prose wrapped in square brackets
    WPROSE=r'\[>[^]]*\]',

    # expression denoting a matched terminal or nonterminal
    MATCH_REF=r'\$(?:0|[1-9][0-9]*)',

    # the spec also gives a few productions names
    RUSTCOMMENT=r'//.*\n',
)

ESGrammarParser = gen.compile(
    parse_pgen.load_grammar(
        os.path.join(os.path.dirname(__file__), "esgrammar.pgen")))

SIGIL_FALSE = '~'
SIGIL_TRUE = '+'

# Abbreviations for single-character terminals, used in the lexical grammar.
ECMASCRIPT_CODE_POINTS = {
    # From <https://tc39.es/ecma262/#table-31>
    '<ZWNJ>': grammar.Literal('\u200c'),
    '<ZWJ>': grammar.Literal('\u200d'),
    '<ZWNBSP>': grammar.Literal('\ufeff'),

    # From <https://tc39.es/ecma262/#table-32>
    '<TAB>': grammar.Literal('\t'),
    '<VT>': grammar.Literal('\u000b'),

Exemple #15

0

Afficher le fichier

Fichier : parse_esgrammar.py Projet : evilpie/jsparagus

    # nonterminals wrapped in vertical bars for no apparent reason
    NTALT=r'\|[A-Z]\w+\|',

    # the spec also gives a few productions names
    PRODID=r'#[A-Za-z]\w*',

    # prose to the end of the line
    PROSE=r'>.*',

    # prose wrapped in square brackets
    WPROSE=r'\[>[^]]*\]',
)

parse_esgrammar_generic = gen.compile(
    parse_pgen.load_grammar(
        os.path.join(os.path.dirname(__file__), "esgrammar.pgen")))

SIGIL_FALSE = '~'
SIGIL_TRUE = '+'

# Productions like
#
#     Expression : AssignmentExpression
#     PrimaryExpression : ArrayLiteral
#     Statement : IfStatement
#
# should not cause an extra method call; the action for each of these
# productions should be `$0`, i.e. just return the right-hand side unchanged.
# Then type inference will make sure that the two nonterminals (Statement and
# IfStatement, for example) are given the same type.