def disabledNegativeLookaheadDisambiguation(self): tokenize = lexer.LexicalGrammar( '( ) { } ; function =', IDENT=r'[A-Za-z_][A-Za-z_0-9]*') grammar = Grammar({ 'stmts': [ ['stmt'], ['stmts', 'stmt'], ], 'stmt': [ [LookaheadRule(set=frozenset({'function'}), positive=False), 'expr', ';'], ['fndecl'], ], 'fndecl': [ ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'], ], 'expr': [ ['term'], ['IDENT', '=', 'expr'], ], 'term': [ ['(', 'expr', ')'], ['fndecl'], ['term', '(', 'expr', ')'], ], }) parse = gen.compile(grammar) # Test that without the lookahead restriction, we reject this grammar # (it's ambiguous): del grammar['stmt'][0][0] self.assertRaisesRegex(ValueError, 'banana', lambda: gen.compile(grammar)) self.assertEqual( parse(tokenize, 'function f() { x = function y() {}; }'), ('stmt', 1, ('fndecl', 'function', 'f', '(', ')', '{', ('stmt', 0, ('expr', 1, 'x', '=', ('expr', 0, ('term', 1, ('fndecl', 'function', 'y', '(', ')', '{', None, '}')))), ';')))) self.assertEqual( parse(tokenize, '(function g(){});'), ('stmts', 0, ('stmt', 0, ('term', 1, ('fndecl', 'function', 'g', '(', ')', '{', None, '}')), ';')))
def testLookaheadWithCanonicalLR(self): """Only a lookahead assertion makes this grammar unambiguous.""" tokenize = lexer.LexicalGrammar("async => { } ;", Identifier=r'\w+') grammar = Grammar({ "script": [ ["Expression", ";"], ], "Expression": [ ["PrimaryExpression"], ["async", "Identifier", "=>", "AsyncConciseBody"], ], "AsyncConciseBody": [ [LookaheadRule(set=frozenset(["{"]), positive=False), "Expression"], ["{", "}"], ], "PrimaryExpression": [ ["{", "}"], ], }) self.compile(tokenize, grammar) self.assertParse("{};") self.assertParse("async x => {};") self.assertParse("async x => async y => {};")
def testLookaheadBeforeOptional(self): self.compile( lexer.LexicalGrammar('= : _', PUBLIC=r'public\b', IDENT=r'[a-z]+\b', NUM=r'[0-9]\b'), Grammar({ 'decl': [ [ LookaheadRule(frozenset({'IDENT'}), True), Optional('attrs'), 'pat', '=', 'NUM' ], ], 'attrs': [ ['attr'], ['attrs', 'attr'], ], 'attr': [ ['PUBLIC', ':'], ['IDENT', ':'], ], 'pat': [ ['IDENT'], ['_'], ], })) self.assertEqual(self.parse("x = 0"), ("decl", None, "x", "=", "0")) self.assertParse("thread: x = 0") self.assertNoParse("public: x = 0", message="expected 'IDENT', got 'PUBLIC'") self.assertNoParse("_ = 0", message="expected 'IDENT', got '_'") self.assertParse("funny: public: x = 0") self.assertParse("funny: _ = 0")
def testNegativeLookahead(self): tokenize = lexer.LexicalGrammar('a b') rules = { 'goal': [ [LookaheadRule(frozenset({'a'}), False), 'abs'], ], 'abs': [ ['a'], ['b'], ['abs', 'a'], ['abs', 'b'], ], } parse = gen.compile(Grammar(rules)) self.assertRaisesRegex(SyntaxError, r"expected 'b', got 'a'", lambda: parse(tokenize, "a b")) self.assertEqual( parse(tokenize, 'b a'), ('goal', ('abs 2', 'b', 'a')) ) # In simple cases like this, the lookahead restriction can even # disambiguate a grammar that would otherwise be ambiguous. rules['goal'].append(prod(['a'], 'goal_a')) parse = gen.compile(Grammar(rules)) self.assertEqual( parse(tokenize, 'a'), ('goal_a', 'a') )
def testLookaheadDisambiguation(self): """A lookahead restriction should be able to rule out certain nonterminals entirely.""" grammar = Grammar({ 'Script': [ ['Statement'], ['Statement', 'Statement'], ], 'Statement': [ [ LookaheadRule(frozenset({'function'}), False), 'Expression', ';' ], ['Function'], ], 'Function': [ ['function', 'x', '(', ')', '{', '}'], ], 'Expression': [ ['Primary'], ['++', 'Primary'], ['Primary', '++'], ], 'Primary': [ ['Function'], ['x'], ], }) self.compile(lexer.LexicalGrammar("function x ( ) { } ++ ;"), grammar) self.assertParse("function x() {}") self.assertParse("++function x() {};") self.assertNoParse("++function x() {}", message="unexpected end") self.assertNoParse("function x() {}++;", message="got ';'") self.assertParse("function x() {} ++x;")
def testTrailingLookahead(self): """Lookahead at the end of a production is banned.""" tokenize = lexer.LexicalGrammar('IF ( X ) ELSE OTHER ;') grammar = gen.Grammar({ 'goal': [['stmt']], 'stmt': [ ['OTHER', ';'], [ 'IF', '(', 'X', ')', 'stmt', LookaheadRule(frozenset({'ELSE'}), False) ], ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'], ], }) def stmt_0(): return ('stmt_0', 'OTHER', ';') def stmt_1(t): return ('stmt_1', 'IF', '(', 'X', ')', t) def stmt_2(t, e): return ('stmt_2', 'IF', '(', 'X', ')', t, 'ELSE', e) self.compile(tokenize, grammar) self.assertParse('IF(X) OTHER;', stmt_1(stmt_0())) self.assertParse('IF(X) OTHER; ELSE OTHER;', stmt_2(stmt_0(), stmt_0())) self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER; ELSE OTHER;', stmt_2(stmt_2(stmt_0(), stmt_0()), stmt_0())) self.assertParse('IF(X) OTHER; ELSE IF(X) OTHER; ELSE OTHER;', stmt_2(stmt_0(), stmt_2(stmt_0(), stmt_0()))) self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER;', stmt_1(stmt_2(stmt_0(), stmt_0())))
def testNegativeLookahead(self): tokenize = lexer.LexicalGrammar('a b') rules = { 'goal': [ [LookaheadRule(frozenset({'a'}), False), 'abs'], ], 'abs': [ ['a'], ['b'], ['abs', 'a'], ['abs', 'b'], ], } self.compile(tokenize, Grammar(rules)) self.assertNoParse("a b", message="expected 'b', got 'a'") self.assertParse( 'b a', ('goal', ('abs_2', 'b', 'a'))) # In simple cases like this, the lookahead restriction can even # disambiguate a grammar that would otherwise be ambiguous. rules['goal'].append(prod(['a'], 'goal_a')) self.compile(tokenize, Grammar(rules)) self.assertParse('a', ('goal_a', 'a'))
def testTrailingLookahead(self): """Lookahead at the end of a production is banned.""" grammar = gen.Grammar({ 'stmt': [ ['OTHER', ';'], ['IF', '(', 'X', ')', 'stmt', LookaheadRule(frozenset({'ELSE'}), False)], ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'], ], }) self.assertRaisesRegex( ValueError, r"invalid grammar: lookahead restriction at end of production", lambda: gen.compile(grammar))
def testForLookahead(self): grammar = Grammar({ 'Stmt': [ [';'], ['ForStmt'], ], 'ForStmt': [ ["for", "(", LookaheadRule(frozenset({"let"}), False), "Expr", ";", ";", ")", "Stmt"], ], 'Expr': [ ["0"], ["let"], ], }) self.compile(lexer.LexicalGrammar("for ( let ; ) 0"), grammar) self.assertParse("for (0;;) ;") self.assertNoParse("for (let;;) ;", message="expected '0', got 'let'")
def testPositiveLookahead(self): self.compile( lexer.LexicalGrammar('A B + ( )'), Grammar({ 'goal': [ [LookaheadRule(frozenset({'A', 'B'}), True), 'expr'], ], 'expr': [ ['term'], ['expr', '+', 'term'], ], 'term': [ ['A'], ['B'], ['(', 'expr', ')'], ] })) self.assertNoParse("(A)", message="expected one of ['A', 'B'], got '('") self.assertParse("A + B")
def testLookaheadDisambiguation(self): """A lookahead restriction should be able to rule out certain nonterminals entirely.""" grammar = Grammar({ 'Script': [ ['Statement'], ['Statement', 'Statement'], ], 'Statement': [ [ LookaheadRule(frozenset({'function'}), False), 'Expression', ';' ], ['Function'], ], 'Function': [ ['function', 'x', '(', ')', '{', '}'], ], 'Expression': [ ['Primary'], ['++', 'Primary'], ['Primary', '++'], ], 'Primary': [ ['Function'], ['x'], ], }) self.compile(lexer.LexicalGrammar("function x ( ) { } ++ ;"), grammar) self.assertParse("function x() {}") self.assertParse("++function x() {};") self.assertNoParse("++function x() {}", message="unexpected end") # TODO: The parser generator fails to handle this case because it does # not forward the restriction from producting a Function to the # Primitive rule. Therefore, `Function [lookahead: ;]` is incorrectly # reduced to a `Primitive [lookahead: ;]` # self.assertNoParse("function x() {}++;", message="got ';'") self.assertParse("function x() {} ++x;")