def test_redefine_non_terminal(self): grammar = Grammar() grammar.add_terminal('a') self.assertTrue(grammar.is_terminal('a')) grammar.add_production('b', ['a', 'a']) with self.assertRaises(ParserGenerationException): grammar.add_terminal('b')
def test_undefined_terminal(self): """ Test correct behavior when a terminal is undefined """ g = Grammar() g.add_terminals(['b']) g.add_production('goal', ['a']) g.add_production('a', ['b']) g.add_production('a', ['c']) g.start_symbol = 'goal' with self.assertRaises(ParserGenerationException): LrParserBuilder(g).generate_parser()
def test_print(self, mock_stdout): grammar = Grammar() grammar.add_terminal('a') grammar.add_production('b', ['a', 'a']) print_grammar(grammar) grammar.dump() self.assertTrue(mock_stdout.getvalue())
def test_eps_sequence(self): """ Test epsilon terminal for use in sequences """ g = Grammar() g.add_terminals(['a']) g.add_production('aas', []) g.add_production('aas', ['aas', 'a']) g.start_symbol = 'aas' p = LrParserBuilder(g).generate_parser() tokens = gen_tokens(['a', 'a', 'a']) p.parse(tokens) tokens = gen_tokens([]) p.parse(tokens)
def test_dump_parse(self, mock_stdout): """ Test the debug dump of the parser """ g = Grammar() g.add_terminals(['a', 'b', 'c']) g.add_production('goal', ['a', 'c', 'b']) g.start_symbol = 'goal' p = EarleyParser(g) tokens = gen_tokens(['a', 'c', 'b']) p.parse(tokens, debug_dump=True)
def test_invalid_parse(self): """ Check what happens when """ g = Grammar() g.add_terminals(['a', 'b', 'c']) g.add_production('goal', ['a', 'c', 'b']) g.start_symbol = 'goal' p = EarleyParser(g) tokens = gen_tokens(['d']) with self.assertRaises(CompilerError): p.parse(tokens)
def test_empty(self): """ Test empty token stream """ g = Grammar() g.add_terminals([',']) g.add_production('input', [',']) g.start_symbol = 'input' p = LrParserBuilder(g).generate_parser() tokens = gen_tokens([]) with self.assertRaises(ParserException): p.parse(tokens)
def test_cb(self): """ Test callback of one rule and order or parameters """ self.cb_called = False def cb(a, c, b): self.cb_called = True self.assertEqual(a.val, 'a') self.assertEqual(b.val, 'b') self.assertEqual(c.val, 'c') g = Grammar() g.add_terminals(['a', 'b', 'c']) g.add_production('goal', ['a', 'c', 'b'], cb) g.start_symbol = 'goal' p = EarleyParser(g) tokens = gen_tokens(['a', 'c', 'b']) p.parse(tokens) self.assertTrue(self.cb_called)
def test_ambiguous_grammar(self): """ Test if ambiguous grammar is handled correctly by priorities """ # TODO: check that ambiguous grammars have different priorities! grammar = Grammar() grammar.add_terminals(['mov', 'num', '+']) grammar.add_production('expr', ['num', '+', 'num'], lambda rh1, _, rh3: rh1.val + rh3.val, priority=3) grammar.add_production('expr', ['num', '+', 'num'], lambda rh1, _, rh3: rh1.val + rh3.val + 1, priority=2) grammar.add_production('ins', ['mov', 'expr'], lambda _, rh2: rh2, priority=2) grammar.start_symbol = 'ins' parser = EarleyParser(grammar) result = parser.parse(gen_tokens(['mov', ('num', 1), '+', ('num', 1)])) self.assertEqual(3, result)
def test_expression_grammar(self): grammar = Grammar() grammar.add_terminals(['EOF', 'identifier', '(', ')', '+', '*', 'num']) grammar.add_production('input', ['expression'], lambda rhs: rhs) grammar.add_production('expression', ['term'], lambda rhs: rhs) grammar.add_production('expression', ['expression', '+', 'term'], lambda rh1, rh2, rh3: rh1 + rh3) grammar.add_production('term', ['factor'], lambda rhs: rhs) grammar.add_production('term', ['term', '*', 'factor'], lambda rh1, rh2, rh3: rh1 * rh3) grammar.add_production('factor', ['(', 'expression', ')'], lambda rh1, rh2, rh3: rh2) grammar.add_production('factor', ['identifier']) grammar.add_production('factor', ['num'], lambda rhs: rhs.val) grammar.start_symbol = 'input' parser = EarleyParser(grammar) result = parser.parse( gen_tokens([('num', 7), '*', ('num', 11), '+', ('num', 3)])) self.assertEqual(80, result)
def test_redefine_terminal(self): """ Test correct behavior when a terminal is redefined """ g = Grammar() g.add_terminals([EOF, 'b', 'c']) g.add_production('goal', ['a']) with self.assertRaises(ParserGenerationException): g.add_production('b', ['c']) # Not allowed g.add_production('a', ['c']) g.start_symbol = 'goal' LrParserBuilder(g).generate_parser()
def test_shift_reduce_conflict(self): """ Must be handled automatically by doing shift """ g = Grammar() g.add_terminals([EOF, 'if', 'then', 'else', 'ass']) # Ambiguous grammar: g.add_production('if_stmt', ['if', 'then', 'stmt']) g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt']) g.add_production('stmt', ['if_stmt']) g.add_production('stmt', ['ass']) g.start_symbol = 'stmt' p = LrParserBuilder(g).generate_parser() # Ambiguous program: tokens = gen_tokens(['if', 'then', 'if', 'then', 'ass', 'else', 'ass']) p.parse(tokens)
def test_rewrite_epsilons(self): """ Test grammar rewriting. This involves the removal of epsilon rules. """ grammar = Grammar() grammar.add_terminals(['a', 'b', 'c']) grammar.add_production('X', []) grammar.add_production('X', ['c']) grammar.add_production('Y', ['X', 'a']) self.assertFalse(grammar.is_normal) self.assertEqual(1, len(grammar.productions_for_name('Y'))) grammar.rewrite_eps_productions() self.assertEqual(2, len(grammar.productions_for_name('Y'))) self.assertTrue(grammar.is_normal)
def test_eps2(self): g = Grammar() g.add_terminals(['id', ':']) g.add_production('input', ['opt_lab', 'ins', 'op1']) g.add_production('input', ['ins', 'op1']) g.add_production('opt_lab', ['id', ':']) g.add_production('ins', ['id']) g.add_production('op1', ['id']) g.start_symbol = 'input' p = LrParserBuilder(g).generate_parser() tokens = gen_tokens(['id', ':', 'id', 'id']) # i.e. "lab_0: inc rax" p.parse(tokens) tokens = gen_tokens(['id', 'id']) # i.e. "inc rax" p.parse(tokens)
def test_eps(self): """ Test epsilon terminal """ g = Grammar() g.add_terminals(['a', 'b']) g.add_production('input', ['optional_a', 'b']) g.add_production('optional_a', ['a']) g.add_production('optional_a', []) g.start_symbol = 'input' p = LrParserBuilder(g).generate_parser() tokens = gen_tokens(['b']) p.parse(tokens)
def test_simple_grammar(self): # 1. define a simple grammar: g = Grammar() g.add_terminals(['identifier', '(', ')', '+', '*']) g.add_production('input', ['expression']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) g.add_production('term', ['factor']) g.add_production('term', ['term', '*', 'factor']) g.add_production('factor', ['(', 'expression', ')']) g.add_production('factor', ['identifier']) g.start_symbol = 'input' # 2. define input: tokens = gen_tokens( ['identifier', '+', 'identifier', '+', 'identifier']) # 3. build parser: p = LrParserBuilder(g).generate_parser() # 4. feed input: p.parse(tokens)
def test_reduce_reduce_conflict(self): """ Check if a reduce-reduce conflict is detected """ # Define a grammar with an obvious reduce-reduce conflict: g = Grammar() g.add_terminals(['id']) g.add_production('goal', ['a']) g.add_production('a', ['b']) g.add_production('a', ['c']) g.add_production('b', ['id']) g.add_production('c', ['id']) g.start_symbol = 'goal' with self.assertRaises(ParserGenerationException): LrParserBuilder(g).generate_parser()
def setUp(self): g = Grammar() g.add_terminals(['EOF', 'identifier', '(', ')', '+', '*', 'num']) g.add_production('input', ['expression']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) g.add_production('term', ['factor']) g.add_production('term', ['term', '*', 'factor']) g.add_production('factor', ['(', 'expression', ')']) g.add_production('factor', ['identifier']) g.add_production('factor', ['num']) g.start_symbol = 'input' self.g = g
def setUp(self): g = Grammar() g.add_terminals(['(', ')']) g.add_production('goal', ['list']) g.add_production('list', ['list', 'pair']) g.add_production('list', ['pair']) g.add_production('pair', ['(', 'pair', ')']) g.add_production('pair', ['(', ')']) g.start_symbol = 'goal' self.g = g
def __init__(self, kws, sigfilter, opt_timescale): toks = ['ID', 'NUMBER', 'STRING', 'TIMEUNIT', EPS, EOF] + kws g = Grammar() g.add_terminals(toks) g.add_production('input', ['exp_list']) g.add_one_or_more('exp', 'exp_list') g.add_production('exp', ['$SCOPE', 'scopetype', 'ID', '$END'], self.handle_start_module) g.add_production('scopetype', ['MODULE'], lambda l: l) g.add_production('scopetype', ['TASK'], lambda l: l) g.add_production('scopetype', ['BEGIN'], lambda l: l) g.add_production('exp', ['$UPSCOPE', '$END'], self.handle_end_module) g.add_production('exp', ['$TIMESCALE', 'TIMEUNIT', '$END'], self.handle_timescale) g.add_production('exp', ['$VAR', 'type', 'NUMBER', 'code', 'name', '$END'], self.handle_vardecl) g.add_production('code', ['ID'], lambda l: l) g.add_production('code', ['NUMBER'], self.handle_number) g.add_production('name', ['ID'], lambda l: l) g.add_production('name', ['ID', 'ID'], self.handle_index) g.add_production('type', ['WIRE'], lambda l: l) g.add_production('type', ['REG'], lambda l: l) g.add_production('type', ['INTEGER'], lambda l: l) g.add_production('type', ['TRIREG'], lambda l: l) g.start_symbol = 'input' self.p = LrParserBuilder(g).generate_parser() self.sigprefix = [] self.sigfilter = sigfilter self.timescale = opt_timescale