def test_nested_left_recursion(self, trace=False): grammar_a = ''' @@left_recursion :: True s = e $ ; e = [e '+'] t ; t = [t '*'] a ; a = ?/[0-9]/? ; ''' grammar_b = ''' @@left_recursion :: True s = e $ ; e = [e '+'] a ; a = n | p ; n = ?/[0-9]/? ; p = '(' @:e ')' ; ''' model_a = compile(grammar_a, "test") model_b = compile(grammar_b, "test") ast = model_a.parse("1*2+3*4", trace=trace, colorize=True) self.assertEqual([['1', '*', '2'], '+', ['3', '*', '4']], ast) ast = model_b.parse("(1+2)+(3+4)", trace=trace, colorize=True) self.assertEqual([['1', '+', '2'], '+', ['3', '+', '4']], ast) ast = model_a.parse("1*2*3", trace=trace, colorize=True) self.assertEqual([['1', '*', '2'], '*', '3'], ast) ast = model_b.parse("(((1+2)))", trace=trace, colorize=True) self.assertEqual(['1', '+', '2'], ast)
def test_multiline_pattern(self): grammar = r''' start = /(?x) foo bar / $ ; ''' model = compile(grammar=trim(grammar)) print(codegen(model.rules[0].exp.sequence[0])) self.assertEqual( codegen(model.rules[0].exp.sequence[0]), urepr("self._pattern('(?x)\nfoo\nbar\n')").strip('"\'') ) grammar = r''' start = /(?x)foo\nbar blort/ $ ; ''' model = compile(grammar=trim(grammar)) print(codegen(model.rules[0].exp.sequence[0])) self.assertEqual( trim(codegen(model.rules[0].exp.sequence[0])), urepr("self._pattern('(?x)foo\\nbar\nblort')").strip(r'"\.') )
def test_skip_to(self, trace=False): grammar = ''' start = 'x' ab $ ; ab = | 'a' 'b' | ->'a' 'b' ; ''' m = compile(grammar, trace=trace) ast = m.parse('x xx yyy a b') self.assertEqual(['x', ['a', 'b']], ast) grammar = ''' start = 'x' ab $ ; ab = | 'a' 'b' | ->&'a' 'a' 'b' ; ''' m = compile(grammar, trace=trace) ast = m.parse('x xx yyy a b') self.assertEqual(['x', ['a', 'b']], ast)
def test_builder_semantics(self): grammar = ''' start::sum = {number}+ $ ; number::int = /\d+/ ; ''' text = '5 4 3 2 1' semantics = ModelBuilderSemantics() model = compile(grammar, 'test') ast = model.parse(text, semantics=semantics) self.assertEqual(15, ast) import functools dotted = functools.partial(type('').join, '.') dotted.__name__ = 'dotted' grammar = ''' start::dotted = {number}+ $ ; number = /\d+/ ; ''' semantics = ModelBuilderSemantics(types=[dotted]) model = compile(grammar, 'test') ast = model.parse(text, semantics=semantics) self.assertEqual('5.4.3.2.1', ast)
def test_empty_match_token(self): grammar = """ table = { row }+ ; row = (cell1:cell "|" cell2:cell) "\n"; cell = /[a-z]+/ ; """ try: compile(grammar, "model") self.fail('allowed empty token') except FailedParse: pass
def test_optional_sequence(self): grammar = ''' start = '1' ['2' '3'] '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['1', '2', '3', '4'], ast) grammar = ''' start = '1' foo:['2' '3'] '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['2', '3'], ast.foo)
def test_builder_subclassing(self): from tatsu import synth from tatsu.model import Node registry = getattr(synth, "__REGISTRY") grammar = ''' @@grammar :: Test start::A::B::C = $ ; ''' model = compile(grammar, asmodel=True) model.parse("") A = registry["A"] B = registry["B"] C = registry["C"] self.assertTrue( issubclass(A, B) and issubclass(A, synth._Synthetic) and issubclass(A, Node)) self.assertTrue( issubclass(B, C) and issubclass(B, synth._Synthetic) and issubclass(A, Node)) self.assertTrue( issubclass(C, synth._Synthetic) and issubclass(C, Node))
def test_any(self): grammar = ''' start = /./ 'xx' /./ /./ 'yy' $; ''' model = compile(grammar, "start") ast = model.parse("1xx 2 yy") self.assertEqual(('1', 'xx', ' ', '2', 'yy'), ast)
def test_indirect_left_recursion(self, trace=False): grammar = ''' @@left_recursion :: True start = x $ ; x = expr ; expr = x '-' num | num; num = ?/[0-9]+/? ; ''' model = compile(grammar, "test") x = model.rulemap['x'] expr = model.rulemap['expr'] num = model.rulemap['num'] assert x.is_leftrec assert not expr.is_leftrec assert not num.is_leftrec assert not x.is_memoizable assert not expr.is_memoizable assert num.is_memoizable print('x', x.lookahead()) print('expr', expr.lookahead()) assert ref('x') in x.lookahead() assert ref('expr') in expr.lookahead()
def test_no_left_recursion(self, trace=False): grammar = ''' @@left_recursion :: True start = expre $ ; expre = expre '+' number | expre '*' number | number ; number = ?/[0-9]+/? ; ''' model = compile(grammar, "test") model.parse("1*2+3*5", trace=trace, colorize=True) try: model.parse("1*2+3*5", left_recursion=False, trace=trace, colorize=True) self.fail('expected left recursion failure') except FailedParse: pass
def test_pattern_concatenation(self): grammar = ''' start = {letters_digits}+ ; letters_digits = ?"[a-z]+" + ?'[0-9]+' ; ''' pretty = ''' start = {letters_digits}+ ; letters_digits = /[a-z]+/ + /[0-9]+/ ; ''' model = compile(grammar=grammar) ast = model.parse('abc123 def456') self.assertEqual(['abc123', 'def456'], ast) print(model.pretty()) self.assertEqual(trim(pretty), model.pretty())
def test_dropped_input_bug(self, trace=False): grammar = r''' @@left_recursion :: True start = expr; expr = | expr ',' expr | identifier ; identifier = /\w+/ ; ''' model = compile(grammar) ast = model.parse('foo', trace=trace, colorize=True) self.assertEqual('foo', ast) ast = model.parse('foo bar', trace=trace, colorize=True) self.assertEqual('foo', ast) ast = model.parse('foo, bar', trace=trace, colorize=True) self.assertEqual(['foo', ',', 'bar'], ast)
def test_right_join(self): grammar = r''' start = (op)>{number}+ $ ; op = '+' | '-' ; number = /\d+/ ; ''' text = '1 + 2 - 3 + 4' model = compile(grammar, "test") self.assertEqual(trim(grammar).strip(), str(model).strip()) codegen(model) ast = model.parse(text) self.assertEqual(('+', '1', ('-', '2', ('+', '3', '4'))), ast)
def test_sparse_keywords(self): grammar = r''' @@keyword :: A @@ignorecase :: False start = {id}+ $ ; @@keyword :: B @name id = /\w+/ ; ''' model = compile(grammar, 'test', trace=False, colorize=True) c = codegen(model) parse(c) ast = model.parse('hello world') self.assertEqual(['hello', 'world'], ast) for k in ('A', 'B'): try: ast = model.parse("hello %s world" % k) self.assertEqual(['hello', k, 'world'], ast) self.fail('accepted keyword "%s" as name' % k) except FailedParse as e: self.assertTrue('"%s" is a reserved word' % k in str(e))
def test_ignorecase_keywords(self): grammar = ''' @@ignorecase :: True @@keyword :: if start = rule ; @name rule = @:word if_exp $ ; if_exp = 'if' digit ; word = /\w+/ ; digit = /\d/ ; ''' model = compile(grammar, 'test') model.parse('nonIF if 1', trace=False) with self.assertRaises(FailedParse): model.parse('i rf if 1', trace=False) with self.assertRaises(FailedParse): model.parse('IF if 1', trace=False)
def test_nested_class_synth_model(): grammar = ''' start::ASeq = seqs:aseq $ ; aseq::Seq = values:{'a'}+ ; ''' m = compile(grammar, 'ASeq') model = m.parse('a a a', semantics=ModelBuilderSemantics()) assert 'ASeq' == type(model).__name__ p = pickle.dumps(model) new_model = pickle.loads(p) assert 'ASeq' == type(new_model).__name__ # NOTE: Since we are unpickling an object which contains nested objects, we can't do # self.assertEqual(model.ast, new_model.ast) as the memory locations will be different. # So either (1) we recursively walk the objects and compare fields or (2) we convert it into a # str()/repr()/JSON and compare that. The latter as it is easier. assert asjson(model.ast) == asjson(new_model.ast)
def test_group_ast(self): grammar = ''' start = '1' ('2' '3') '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['1', '2', '3', '4'], ast)
def test_numbers_and_unicode(self): grammar = ''' rúle(1, -23, 4.56, 7.89e-11, Añez) = 'a' ; ''' rule2 = ''' rulé::Añez = '\\xf1' ; ''' rule3 = ''' rúlé::Añez = 'ñ' ; ''' if PY3: grammar += rule3 else: grammar += rule2 model = compile(grammar, "test") self.assertEqual(trim(grammar), ustr(model))
def test_name_in_option(): grammar = ''' start = expr_range ; expr_range = | [from: expr] '..' [to: expr] | expr ; expr = /[\d]+/ ; ''' model = compile(grammar) ast = model.parse('1 .. 10') assert ast == {'from': '1', 'to': '10'} ast = model.parse('10') assert ast == '10' ast = model.parse(' .. 10') assert ast == {'from': None, 'to': '10'} ast = model.parse('1 .. ') assert ast == {'from': '1', 'to': None} ast = model.parse(' .. ') assert ast == {'from': None, 'to': None} code = gencode(grammar=grammar) print(code)
def test_change_start_rule(self, trace=False): grammar = r''' start = expr ; expr = mul | identifier ; mul = expr '*' identifier ; identifier = /\w+/ ; ''' model = compile(grammar) ast = model.parse('a * b', start='expr', trace=trace, colorize=True) self.assertEqual(['a', '*', 'b'], ast) try: model.parse('a * b', start='mul', trace=trace, colorize=True) self.fail( 'failure expected as first recursive rule does not cotain a choice' ) except FailedParse: pass
def test_builder_basetype_codegen(self): grammar = ''' @@grammar :: Test start::A::B::C = a:() b:() $ ; second::D::A = (); third = (); ''' from tatsu.tool import to_python_model src = to_python_model(grammar, base_type=MyNode) globals = {} exec(src, globals) semantics = globals["TestModelBuilderSemantics"]() A = globals["A"] B = globals["B"] C = globals["C"] D = globals["D"] model = compile(grammar, semantics=semantics) ast = model.parse("", semantics=semantics) self.assertIsInstance(ast, MyNode) self.assertIsInstance(ast, (A, B, C)) self.assertTrue(hasattr(ast, "a")) self.assertTrue(hasattr(ast, "b")) self.assertTrue(issubclass(D, (A, B, C)))
def test_left_recursion_bug(self, trace=False): grammar = '''\ @@grammar :: Minus @@left_recursion :: True start = expression $ ; expression = | minus_expression | paren_expression | value ; paren_expression = '(' expression ')' ; minus_expression = expression '-' expression ; value = /[0-9]+/ ; ''' model = compile(grammar=grammar) model.parse('3', trace=trace, colorize=True) model.parse('3 - 2', trace=trace, colorize=True) model.parse('(3 - 2)', trace=trace, colorize=True) model.parse('(3 - 2) - 1', trace=trace, colorize=True) model.parse('3 - 2 - 1', trace=trace, colorize=True) model.parse('3 - (2 - 1)', trace=trace, colorize=True)
def test_check_keywords(self): import parser grammar = ''' @@keyword :: A start = {id}+ $ ; @name id = /\w+/ ; ''' model = compile(grammar, 'test') c = codegen(model) parser.suite(c) ast = model.parse('hello world') self.assertEqual(['hello', 'world'], ast) try: ast = model.parse("hello A world") self.assertEqual(['hello', 'A', 'world'], ast) self.fail('accepted keyword as name') except FailedParse as e: self.assertTrue('"A" is a reserved word' in str(e)) pass
def test_partial_input_bug(self, trace=False): grammar = r''' start = expre ; expre = | '{' expre '}' | expre '->' identifier | identifier ; identifier = /\w+/ ; ''' input = ''' { size } test ''' model = compile(grammar) ast = model.parse(input, trace=trace, colorize=True) assert ['{', 'size', '}'] == ast
def test_ast_assignment(self): grammar = ''' n = @: {"a"}* $ ; f = @+: {"a"}* $ ; nn = @: {"a"}* @: {"b"}* $ ; nf = @: {"a"}* @+: {"b"}* $ ; fn = @+: {"a"}* @: {"b"}* $ ; ff = @+: {"a"}* @+: {"b"}* $ ; ''' model = compile(grammar, "test") def p(input, rule): return model.parse(input, start=rule, whitespace='') e = self.assertEqual e([], p('', 'n')) e(['a'], p('a', 'n')) e(['a', 'a'], p('aa', 'n')) e([[]], p('', 'f')) e([['a']], p('a', 'f')) e([['a', 'a']], p('aa', 'f')) for r in ('nn', 'nf', 'fn', 'ff'): e([[], []], p('', r)) e([['a'], []], p('a', r)) e([[], ['b']], p('b', r)) e([['a', 'a'], []], p('aa', r)) e([[], ['b', 'b']], p('bb', r)) e([['a', 'a'], ['b']], p('aab', r))
def test_direct_left_recursion(self, trace=False): grammar = ''' @@left_recursion :: True start = expre $ ; expre = expre '+' number | expre '*' number | number ; number = ?/[0-9]+/? ; ''' model = compile(grammar, "test") ast = model.parse("1*2+3*5", trace=trace, colorize=True) self.assertEqual([[['1', '*', '2'], '+', '3'], '*', '5'], ast)
def test_include_and_override(self): gr = 'included_grammar' included_grammar = "plu = 'aaaa';" overridden = "%s@override\nplu = 'plu';" inclusion = '#include :: %s.ebnf\n' % gr including_grammar = overridden % (inclusion) whole_grammar = overridden % (included_grammar) class FakeIncludesBuffer(EBNFBuffer): def get_include(self, source, filename): return included_grammar, source + '/' + filename compile(FakeIncludesBuffer(whole_grammar), "test") compile(FakeIncludesBuffer(including_grammar), "test")
def test_36_unichars(self): grammar = ''' start = { rule_positional | rule_keywords | rule_all }* $ ; rule_positional("ÄÖÜäöüß") = 'a' ; rule_keywords(k1='äöüÄÖÜß') = 'b' ; rule_all('ßÄÖÜäöü', k1="ßäöüÄÖÜ") = 'c' ; ''' def _trydelete(pymodule): import os try: os.unlink(pymodule + ".py") except EnvironmentError: pass try: os.unlink(pymodule + ".pyc") except EnvironmentError: pass try: os.unlink(pymodule + ".pyo") except EnvironmentError: pass def assert_equal(target, value): self.assertEqual(target, value) class UnicharsSemantics(object): """Check all rule parameters for expected types and values""" def rule_positional(self, ast, p1): assert_equal("ÄÖÜäöüß", p1) return ast def rule_keyword(self, ast, k1): assert_equal("äöüÄÖÜß", k1) return ast def rule_all(self, ast, p1, k1): assert_equal("ßÄÖÜäöü", p1) assert_equal("ßäöüÄÖÜ", k1) return ast m = compile(grammar, "UnicodeRuleArguments") ast = m.parse("a b c") self.assertEqual(['a', 'b', 'c'], ast) semantics = UnicharsSemantics() ast = m.parse("a b c", semantics=semantics) self.assertEqual(['a', 'b', 'c'], ast) code = codegen(m) import codecs with codecs.open("tc36unicharstest.py", "w", "utf-8") as f: f.write(code) import tc36unicharstest tc36unicharstest _trydelete("tc36unicharstest")
def test_empty_closure(self): grammar = ''' start = {'x'}+ {} 'y'$; ''' model = compile(grammar, "test") codegen(model) ast = model.parse("xxxy", nameguard=False) self.assertEqual([['x', 'x', 'x'], [], 'y'], ast)
def test_update_ast(self): grammar = ''' foo = name:"1" [ name: bar ] ; bar = { "2" } * ; ''' m = compile(grammar, 'Keywords') ast = m.parse('1 2') self.assertEqual(['1', ['2']], ast.name) grammar = ''' start = items: { item } * $ ; item = @:{ subitem } * "0" ; subitem = ?/1+/? ; ''' m = compile(grammar, 'Update') ast = m.parse("1101110100", nameguard=False) self.assertEqual([['11'], ['111'], ['1'], []], ast.items_)