def test_direct_left_recursion_raises(self): g = GrammarBuilder() g.foo = 'a' | g.foo + 'x' grammar = g(start=g.foo) with self.assertRaises(LeftRecursion): grammar.parse('xx', detect_left_recursion=True)
def setUp(self): super().setUp() g = GrammarBuilder() g.xy = Terminal('x') | Terminal('y') g.yx = Terminal('x') | Terminal('y') g.start = flatten(Lookahead('x') + g.xy) | g.yx self.grammar = g(start=g.start)
def test_double_quoted_strings(self): g = GrammarBuilder() g.string = builtins.double_quoted_string grammar = g(start=g.string) self.assert_lexeme(grammar, '"foo"') self.assert_lexeme(grammar, '"say:\\"foo\\""') self.assert_lexeme(grammar, '"say\\\":\\"foo\\""')
def test_single_quoted_strings(self): g = GrammarBuilder() g.string = builtins.single_quoted_string grammar = g(start=g.string) self.assert_lexeme(grammar, "'foo'") self.assert_lexeme(grammar, "'say:\\'foo\\''") self.assert_lexeme(grammar, "'say\\\\':\\'foo\\''") self.assert_lexeme(grammar, "'say\\\\':\\'foo\\''")
def setUp(self): super().setUp() g = GrammarBuilder() g.number_literal = Regexp(r'-?(?:[1-9]\d*|0)(?:\.\d*)?(?:[eE][+-]?\d+)?') >= float g.string_literal = Regexp(r'"(?:[^"]|\\(?:["\\nbfrt]|u[0-9a-fA-F]{4}))*"') >= (lambda s: s[1:-1]) g.array = '[' + flatten(repeat(g.expr, separator=',')) + ']' >= list g.object_item = g.string_literal + ':' + g.expr >= tuple g.object_ = '{' + flatten(repeat(g.object_item, separator=',')) + '}' >= dict g.boolean = keep('true') | keep('false') >= (lambda s: s == 'true') g.null = keep('null') >= const(None) g.expr = flatten(g.number_literal | g.string_literal | g.array | g.object_ | g.boolean | g.null) g.whitespace = Regexp('\\s+') self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)
def test_default_format(self): g = GrammarBuilder() g.string = builtins.double_quoted_string g.sequence = builtins.py_integer + Terminal('x') g.start = g.string | g.sequence | epsilon grammar = g(start=g.start) format = GrammarFormatter() self.assertEqual(format(grammar), textwrap.dedent(r""" string ::= r'"(?:\\\\"|[^"])*"' sequence ::= r'[1-9]\\d*' / '0' / r'0[xX][0-9a-fA-F]+' / r'0[oO][0-7]+' / r'0[0-7]+' / r'0[bB][01]+', 'x' start ::= string / sequence / ɛ """).strip())
def test_default_format(self): g = GrammarBuilder() g.string = builtins.double_quoted_string g.sequence = builtins.py_integer + Terminal('x') g.start = g.string | g.sequence | epsilon grammar = g(start=g.start) format = GrammarFormatter() self.assertEqual( format(grammar), textwrap.dedent(r""" string ::= r'"(?:\\\\"|[^"])*"' sequence ::= r'[1-9]\\d*' / '0' / r'0[xX][0-9a-fA-F]+' / r'0[oO][0-7]+' / r'0[0-7]+' / r'0[bB][01]+', 'x' start ::= string / sequence / ɛ """).strip())
def setUp(self): g = GrammarBuilder() g.a = Terminal('A') g.b = Terminal('B') g.c = Terminal('C') g.ab = g.a | g.b g.start = g.ab + g.ab g.whitespace = Regexp(r'\s+') self.grammar = g(start=g.start, tokenize=[ignore(g.whitespace)])
def test_alias(self): g = GrammarBuilder() g.foo = "foo" g.bar = "bar" g.foo2 = g.foo g.bar2 = g.bar g.whitespace = builtins.whitespace g.start = g.foo | g.bar g.start2 = g.foo2 | g.bar2 grammar = g(start=g.start, tokenize=[ignore(g.whitespace)]) self.assertEqual( grammar.parse("foo").tuple_tree(), ('start', [('foo', 'foo')])) grammar = g(start=g.start2, tokenize=[ignore(g.whitespace)]) self.assertEqual( grammar.parse("foo").tuple_tree(), ('start2', [('foo2', 'foo')]))
def setUp(self): signed = uncurry( lambda ops, x: x * product(-1 for s in ops if s == '-')) g = GrammarBuilder() g.number = Regexp(r'\d+') >= float g.atom = g.number | flatten('(' + g.expr + ')' >= itemgetter(0)) g.signed = repeat(keep('+') | keep('-')) + g.atom >= signed g.product_expr = +repeat(g.signed, separator='*') >= product g.expr = +repeat(g.product_expr, separator='+') >= sum g.whitespace = builtins.horizontal_whitespace self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)
def setUp(self): signed = uncurry(lambda ops, x: x * product(-1 for s in ops if s == '-')) g = GrammarBuilder() g.number = Regexp(r'\d+') >= float g.atom = g.number | flatten('(' + g.expr + ')' >= itemgetter(0)) g.signed = repeat(keep('+') | keep('-')) + g.atom >= signed g.product_expr = +repeat(g.signed, separator='*') >= product g.expr = +repeat(g.product_expr, separator='+') >= sum g.whitespace = builtins.horizontal_whitespace self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)
def test_alias(self): g = GrammarBuilder() g.foo = "foo" g.bar = "bar" g.foo2 = g.foo g.bar2 = g.bar g.whitespace = builtins.whitespace g.start = g.foo | g.bar g.start2 = g.foo2 | g.bar2 grammar = g(start=g.start, tokenize=[ignore(g.whitespace)]) self.assertEqual(grammar.parse("foo").tuple_tree(), ('start', [('foo', 'foo')])) grammar = g(start=g.start2, tokenize=[ignore(g.whitespace)]) self.assertEqual(grammar.parse("foo").tuple_tree(), ('start2', [('foo2', 'foo')]))
def setUp(self): super().setUp() g = GrammarBuilder() g.number_literal = Regexp(r'-?(?:[1-9]\d*|0)(?:\.\d*)?(?:[eE][+-]?\d+)?') g.string_literal = Regexp(r'"(?:[^"]|\\(?:["\\nbfrt]|u[0-9a-fA-F]{4}))*"') g.array = '[' + flatten(repeat(g.expr, separator=drop(','))) + ']' g.object_ = '{' + flatten(repeat(flatten(g.string_literal + ':' + g.expr), separator=',')) + '}' g.expr = flatten(g.number_literal | g.string_literal | g.array | g.object_) g.whitespace = Regexp('\\s+') self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)
def test_indention_tokenizer(self): g = GrammarBuilder() g.whitespace = Regexp(r'\s+') g.block = INDENT + repeat(g.expr) + DEDENT g.label = Regexp(r'\w+') g.expr = g.label + Optional(g.block) | g.label + '(' + g.expr + ')' grammar = g( start=g.expr, tokenize=[ indent('(', ')'), ignore(g.whitespace), ] ) format = GrammarFormatter() source = textwrap.dedent(""" foo bar baz boo x y """).strip() self.assertEqual( [t.lexeme or t.symbol.name for t in grammar.tokenize(source)], ['foo', 'INDENT', 'bar', 'baz', 'INDENT', 'boo', 'DEDENT', 'x', 'y', 'DEDENT'] ) source = textwrap.dedent(""" foo foo baz(arg) boo baz( arg ) boo baz(aaa bbb) xxx """).strip() self.assertEqual( [t.lexeme or t.symbol.name for t in grammar.tokenize(source)], [ 'foo', 'foo', 'INDENT', 'baz', '(', 'arg', ')', 'boo', 'INDENT', 'baz', '(', 'arg', ')', 'DEDENT', 'boo', 'INDENT', 'baz', '(', 'aaa', 'bbb', ')', 'xxx', 'DEDENT', 'DEDENT' ] )
def setUp(self): super().setUp() g = GrammarBuilder() g.number_literal = Regexp( r'-?(?:[1-9]\d*|0)(?:\.\d*)?(?:[eE][+-]?\d+)?') g.string_literal = Regexp( r'"(?:[^"]|\\(?:["\\nbfrt]|u[0-9a-fA-F]{4}))*"') g.array = '[' + flatten(repeat(g.expr, separator=drop(','))) + ']' g.object_ = '{' + flatten( repeat(flatten(g.string_literal + ':' + g.expr), separator=',')) + '}' g.expr = flatten(g.number_literal | g.string_literal | g.array | g.object_) g.whitespace = Regexp('\\s+') self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)
def setUp(self): super().setUp() g = GrammarBuilder() g.seq = repeat('A', separator=',', leading=True) self.grammar = g(start=g.seq)
def setUp(self): super().setUp() g = GrammarBuilder() g.ab = flatten(Terminal('A') | Terminal('B')) g.seq = repeat(g.ab) self.grammar = g(start=g.seq)
def setUp(self): super().setUp() g = GrammarBuilder() g.start = 'A' + Optional('A') self.grammar = g(start=g.start)
def setUp(self): super().setUp() g = GrammarBuilder() g.ab = Terminal('A') | Terminal('B') g.seq = repeat(g.ab, separator=',', trailing=True) self.grammar = g(start=g.seq)
def setUp(self): super().setUp() g = GrammarBuilder() g.ab = Terminal('A') | Terminal('B') g.start = g.ab + g.ab self.grammar = g(start=g.start)
def setUp(self): super().setUp() g = GrammarBuilder() g.number_literal = Regexp( r'-?(?:[1-9]\d*|0)(?:\.\d*)?(?:[eE][+-]?\d+)?') >= float g.string_literal = Regexp( r'"(?:[^"]|\\(?:["\\nbfrt]|u[0-9a-fA-F]{4}))*"') >= ( lambda s: s[1:-1]) g.array = '[' + flatten(repeat(g.expr, separator=',')) + ']' >= list g.object_item = g.string_literal + ':' + g.expr >= tuple g.object_ = '{' + flatten(repeat(g.object_item, separator=',')) + '}' >= dict g.boolean = keep('true') | keep('false') >= (lambda s: s == 'true') g.null = keep('null') >= const(None) g.expr = flatten(g.number_literal | g.string_literal | g.array | g.object_ | g.boolean | g.null) g.whitespace = Regexp('\\s+') self.grammar = g(start=g.expr, tokenize=[ignore(g.whitespace)], drop_terminals=True)