def testExpandOptional(self): grammar = Grammar({'goal': [[]]}) empties = {} # Unit test for gen.expand_optional_symbols_in_rhs self.assertEqual( list( gen.expand_optional_symbols_in_rhs(['ONE', 'TWO', '3'], grammar, empties)), [(['ONE', 'TWO', '3'], {})]) self.assertEqual( list( gen.expand_optional_symbols_in_rhs( ['a', 'b', Optional('c')], grammar, empties)), [(['a', 'b'], { 2: None }), (['a', 'b', 'c'], {})]) self.assertEqual( list( gen.expand_optional_symbols_in_rhs( [Optional('a'), Optional('b')], grammar, empties)), [([], { 0: None, 1: None }), (['a'], { 1: None }), (['b'], { 0: None }), (['a', 'b'], {})])
def testAmbiguousEmpty(self): """Reject grammars that are ambiguous due to empty productions. (Empty productions are ones that match the empty string.)""" def check(rules): grammar = Grammar(rules, goal_nts=['goal']) out = io.StringIO() self.assertRaisesRegex( ValueError, r"ambiguous grammar|reduce-reduce conflict", lambda: gen.generate_parser(out, grammar)) check({'goal': [[], []]}) check({'goal': [[Optional('X')], []]}) check({'goal': [[Optional('X')], [Optional('Y')]]}) check({'goal': [[Optional('X'), Optional('Y')], [Optional('Z')]]}) # Issue #3: This also has an abiguity; empty string matches either # `goal ::= [empty]` or `goal ::= phrase, phrase ::= [empty]`. check({ 'goal': [[Optional('phrase')]], 'phrase': [[Optional('X')]], }) # Input "X" is ambiguous, could be ('goal', ('a', None), ('a', 'X')) # or the other 'a' could be the one that's missing. check({ 'goal': [['a', 'a']], 'a': [[Optional('X')]], })
def testMultiGoal(self): tokenize = lexer.LexicalGrammar("WHILE DEF FN { } ( ) -> ;", ID=r'\w+') grammar = Grammar({ "stmt": [ ["expr", ";"], ["{", "stmts", "}"], ["WHILE", "(", "expr", ")", "stmt"], ["DEF", "ID", "(", "ID", ")", "{", Optional("stmts"), "}"], ], "stmts": [ ["stmt"], ["stmts", "stmt"], ], "expr": [ ["FN", "ID", "->", "expr"], ["call_expr"], ], "call_expr": [ ["ID"], ["call_expr", "(", "expr", ")"], ["(", "expr", ")"], ], }, goal_nts=["stmts", "expr"]) self.compile_multi(tokenize, grammar) self.assertParse("WHILE ( x ) { decx ( x ) ; }", goal="stmts") self.assertNoParse( "WHILE ( x ) { decx ( x ) ; }", goal="expr", message="expected one of ['(', 'FN', 'ID'], got 'WHILE'") self.assertParse("f(x);", goal="stmts") self.assertNoParse("f(x);", goal="expr", message="expected 'end of input', got ';'") self.assertParse("(FN x -> f ( x ))(x)", goal="expr") self.assertNoParse("(FN x -> f ( x ))(x)", goal="stmts", message="unexpected end of input")
def disabledNegativeLookaheadDisambiguation(self): tokenize = lexer.LexicalGrammar( '( ) { } ; function =', IDENT=r'[A-Za-z_][A-Za-z_0-9]*') grammar = Grammar({ 'stmts': [ ['stmt'], ['stmts', 'stmt'], ], 'stmt': [ [LookaheadRule(set=frozenset({'function'}), positive=False), 'expr', ';'], ['fndecl'], ], 'fndecl': [ ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'], ], 'expr': [ ['term'], ['IDENT', '=', 'expr'], ], 'term': [ ['(', 'expr', ')'], ['fndecl'], ['term', '(', 'expr', ')'], ], }) parse = gen.compile(grammar) # Test that without the lookahead restriction, we reject this grammar # (it's ambiguous): del grammar['stmt'][0][0] self.assertRaisesRegex(ValueError, 'banana', lambda: gen.compile(grammar)) self.assertEqual( parse(tokenize, 'function f() { x = function y() {}; }'), ('stmt', 1, ('fndecl', 'function', 'f', '(', ')', '{', ('stmt', 0, ('expr', 1, 'x', '=', ('expr', 0, ('term', 1, ('fndecl', 'function', 'y', '(', ')', '{', None, '}')))), ';')))) self.assertEqual( parse(tokenize, '(function g(){});'), ('stmts', 0, ('stmt', 0, ('term', 1, ('fndecl', 'function', 'g', '(', ')', '{', None, '}')), ';')))
def testLookaheadBeforeOptional(self): self.compile( lexer.LexicalGrammar('= : _', PUBLIC=r'public\b', IDENT=r'[a-z]+\b', NUM=r'[0-9]\b'), Grammar({ 'decl': [ [ LookaheadRule(frozenset({'IDENT'}), True), Optional('attrs'), 'pat', '=', 'NUM' ], ], 'attrs': [ ['attr'], ['attrs', 'attr'], ], 'attr': [ ['PUBLIC', ':'], ['IDENT', ':'], ], 'pat': [ ['IDENT'], ['_'], ], })) self.assertEqual(self.parse("x = 0"), ("decl", None, "x", "=", "0")) self.assertParse("thread: x = 0") self.assertNoParse("public: x = 0", message="expected 'IDENT', got 'PUBLIC'") self.assertNoParse("_ = 0", message="expected 'IDENT', got '_'") self.assertParse("funny: public: x = 0") self.assertParse("funny: _ = 0")
def testOptionalEmpty(self): tokenize = lexer.LexicalGrammar("X Y") grammar = Grammar({ 'a': [ [Optional('b'), Optional('c')], ], 'b': [ prod(['X'], 'b'), ], 'c': [ prod(['Y'], 'c'), ] }) parse = gen.compile(grammar) self.assertEqual(parse(tokenize, ""), ('a', None, None)) self.assertEqual(parse(tokenize, "X"), ('a', ('b', 'X'), None)) self.assertEqual(parse(tokenize, "Y"), ('a', None, ('c', 'Y'))) self.assertEqual(parse(tokenize, "X Y"), ('a', ('b', 'X'), ('c', 'Y')))
def testOptional(self): tokenize = lexer.LexicalGrammar('[ ] , X') grammar = Grammar({ 'array': [['[', Optional('elision'), ']'], ['[', 'elements', ']'], ['[', 'elements', ',', Optional('elision'), ']']], 'elements': [[Optional('elision'), 'X'], ['elements', ',', Optional('elision'), 'X']], 'elision': [[','], ['elision', ',']] }) self.compile(tokenize, grammar) self.assertParse("[]", ('array 0', '[', None, ']')) self.assertParse("[,]", ('array 0', '[', ',', ']')) self.assertParse( "[,,X,,X,]", ('array 2', '[', ('elements 1', ('elements 0', ('elision 1', ',', ','), 'X'), ',', ',', 'X'), ',', None, ']'))
def testCheckCycleFree(self): tokenize = lexer.LexicalGrammar("!") grammar = Grammar({ "problem": [ ["one", "two"], ], "one": [ ["!"], ], "two": [ [Optional("problem")], ], }) self.compile(tokenize, grammar) self.assertParse("! ! ! ! !")
def testHugeExample(self): grammar = Grammar( { 'grammar': [['nt_def_or_blank_line'], ['grammar', 'nt_def_or_blank_line']], 'arg': [['sigil', 'NT']], 'args': [['arg'], ['args', ',', 'arg']], 'definite_sigil': [['~'], ['+']], 'exclusion': [['terminal'], ['nonterminal'], ['CHR', 'through', 'CHR']], 'exclusion_list': [['exclusion'], ['exclusion_list', 'or', 'exclusion']], 'ifdef': [['[', 'definite_sigil', 'NT', ']']], 'line_terminator': [['NT'], ['NTALT']], 'lookahead_assertion': [ ['==', 'terminal'], ['!=', 'terminal'], ['<!', 'NT'], ['<!', '{', 'lookahead_exclusions', '}']], 'lookahead_exclusion': [['lookahead_exclusion_element'], ['lookahead_exclusion', 'lookahead_exclusion_element']], 'lookahead_exclusion_element': [['terminal'], ['no_line_terminator_here']], 'lookahead_exclusions': [['lookahead_exclusion'], ['lookahead_exclusions', ',', 'lookahead_exclusion']], 'no_line_terminator_here': [ ['[', 'no', 'line_terminator', 'here', ']']], 'nonterminal': [['NT'], ['NTCALL', '[', 'args', ']']], 'nt_def': [['nt_lhs', 'EQ', 'NL', 'rhs_lines', 'NL'], ['nt_lhs', 'EQ', 'one', 'of', 'NL', 't_list_lines', 'NL']], 'nt_def_or_blank_line': [['NL'], ['nt_def']], 'nt_lhs': [['NT'], ['NTCALL', '[', 'params', ']']], 'param': [['NT']], 'params': [['param'], ['params', ',', 'param']], 'rhs': [['symbols'], ['[', 'empty', ']']], 'rhs_line': [[Optional(inner='ifdef'), 'rhs', Optional(inner='PRODID'), 'NL'], ['PROSE', 'NL']], 'rhs_lines': [['rhs_line'], ['rhs_lines', 'rhs_line']], 'sigil': [['definite_sigil'], ['?']], 'symbol': [['terminal'], ['nonterminal'], ['nonterminal', '?'], ['nonterminal', 'but', 'not', 'exclusion'], ['nonterminal', 'but', 'not', 'one', 'of', 'exclusion_list'], ['[', 'lookahead', 'lookahead_assertion', ']'], ['no_line_terminator_here'], ['WPROSE']], 'symbols': [['symbol'], ['symbols', 'symbol']], 't_list_line': [['terminal_seq', 'NL']], 't_list_lines': [['t_list_line'], ['t_list_lines', 't_list_line']], 'terminal': [['T'], ['CHR']], 'terminal_seq': [['terminal'], ['terminal_seq', 'terminal']] }, variable_terminals='EQ T CHR NTCALL NT NTALT ' 'PRODID PROSE WPROSE'.split() ) # Note: This lexical grammar is not suitable for use with incremental # parsing. emu_grammar_lexer = lexer.LexicalGrammar( # the operators and keywords: "[ ] { } , ~ + ? <! == != " "but empty here lookahead no not of one or through", NL="\n", # any number of colons together EQ=r':+', # terminals of the ES grammar, quoted with backticks T=r'`[^` \n]+`|```', # also terminals, denoting control characters CHR=r'<[A-Z]+>|U\+[0-9A-f]{4}', # nonterminals that will be followed by boolean parameters NTCALL=r'(?:uri|[A-Z])\w*(?=\[)', # nonterminals (also, boolean parameters) NT=r'(?:uri|[A-Z])\w*', # nonterminals wrapped in vertical bars for no apparent reason NTALT=r'\|[A-Z]\w+\|', # the spec also gives a few productions names PRODID=r'#[A-Za-z]\w*', # prose to the end of the line PROSE=r'>.*', # prose wrapped in square brackets WPROSE=r'\[>[^]]*\]', ) self.compile(emu_grammar_lexer, grammar) source = """\ IdentifierReference[Yield, Await] : Identifier [~Yield] `yield` [~Await] `await` """ self.assertParse(source)