def test_earley_scanless(self): g = Lark("""start: A "b" c A: "a"+ c: "abc" """, parser="earley", lexer=LEXER) x = g.parse('aaaababc')
def test_propagate_positions(self): g = Lark("""start: a a: "a" """, propagate_positions=True) r = g.parse('a') self.assertEqual(r.children[0].line, 1)
def test_earley_scanless4(self): grammar = """ start: A A? A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(res.children, ['aaa'])
def test_anon_in_scanless(self): # Fails an Earley implementation without special handling for empty rules, # or re-processing of already completed rules. g = Lark(r"""start: B B: ("ab"|/[^b]/)* """, lexer=LEXER) self.assertEqual(g.parse('abc').children[0], 'abc')
def test_earley_scanless3(self): "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" grammar = """ start: A A A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(res.children, ['aa', 'a'])
def test_infinite_recurse(self): g = """start: a a: a | "a" """ self.assertRaises(GrammarError, Lark, g, parser='lalr') l = Lark(g, parser='earley', lexer=None) self.assertRaises(ParseError, l.parse, 'a') l = Lark(g, parser='earley', lexer='dynamic') self.assertRaises(ParseError, l.parse, 'a')
def test_earley_scanless2(self): grammar = """ start: statement+ statement: "r" | "c" /[a-z]/+ %ignore " " """ program = """c b r""" l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program)
def test_earley_repeating_empty(self): # This was a sneaky bug! grammar = """ !start: "a" empty empty "b" empty: empty2 empty2: """ parser = Lark(grammar, parser='earley', lexer=LEXER) res = parser.parse('ab') empty_tree = Tree('empty', [Tree('empty2', [])]) self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
def test_ambiguity1(self): grammar = """ start: cd+ "e" !cd: "c" | "d" | "cd" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) x = l.parse('cde') assert x.data == '_ambig', x assert len(x.children) == 2
def test_earley_explicit_ambiguity(self): # This was a sneaky bug! grammar = """ start: a b | ab a: "a" b: "b" ab: "ab" """ parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') res = parser.parse('ab') self.assertEqual(res.data, '_ambig') self.assertEqual(len(res.children), 2)
def test_fruitflies_ambig(self): grammar = """ start: noun verb noun -> simple | noun verb "like" noun -> comparative noun: adj? NOUN verb: VERB adj: ADJ NOUN: "flies" | "bananas" | "fruit" VERB: "like" | "flies" ADJ: "fruit" %import common.WS %ignore WS """ parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) res = parser.parse('fruit flies like bananas') expected = Tree('_ambig', [ Tree('comparative', [ Tree('noun', ['fruit']), Tree('verb', ['flies']), Tree('noun', ['bananas']) ]), Tree('simple', [ Tree('noun', [Tree('adj', ['fruit']), 'flies']), Tree('verb', ['like']), Tree('noun', ['bananas']) ]) ]) # print res.pretty() # print expected.pretty() self.assertEqual(res, expected)
def test_same_ast(self): "Tests that Earley and LALR parsers produce equal trees" g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")" name_list: NAME | name_list "," NAME NAME: /\w+/ """, parser='lalr') l = g.parse('(a,b,c,*x)') g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")" name_list: NAME | name_list "," NAME NAME: /\w/+ """) l2 = g.parse('(a,b,c,*x)') assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
def _Lark(grammar, **kwargs): return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)
def test_embedded_transformer(self): class T(Transformer): def a(self, children): return "<a>" def b(self, children): return "<b>" def c(self, children): return "<c>" # Test regular g = Lark("""start: a a : "x" """, parser='lalr') r = T().transform(g.parse("x")) self.assertEqual(r.children, ["<a>"]) g = Lark("""start: a a : "x" """, parser='lalr', transformer=T()) r = g.parse("x") self.assertEqual(r.children, ["<a>"]) # Test Expand1 g = Lark("""start: a ?a : b b : "x" """, parser='lalr') r = T().transform(g.parse("x")) self.assertEqual(r.children, ["<b>"]) g = Lark("""start: a ?a : b b : "x" """, parser='lalr', transformer=T()) r = g.parse("x") self.assertEqual(r.children, ["<b>"]) # Test Expand1 -> Alias g = Lark("""start: a ?a : b b -> c b : "x" """, parser='lalr') r = T().transform(g.parse("xx")) self.assertEqual(r.children, ["<c>"]) g = Lark("""start: a ?a : b b -> c b : "x" """, parser='lalr', transformer=T()) r = g.parse("xx") self.assertEqual(r.children, ["<c>"])
def test_expand1(self): g = Lark("""start: a ?a: b b: "x" """) r = g.parse('x') self.assertEqual(r.children[0].data, "b") g = Lark("""start: a ?a: b -> c b: "x" """) r = g.parse('x') self.assertEqual(r.children[0].data, "c") g = Lark("""start: a ?a: B -> c B: "x" """) self.assertEqual(r.children[0].data, "c") g = Lark("""start: a ?a: b b -> c b: "x" """) r = g.parse('xx') self.assertEqual(r.children[0].data, "c")
def test_earley_prioritization_sum(self): "Tests effect of priority on result" grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_: "ab" bb_.1: "bb" """ l = _Lark(grammar, ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_.1: "ab" bb_: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.2: "a" b_.1: "b" ab_.3: "ab" bb_.3: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.1: "a" b_.1: "b" ab_.4: "ab" bb_.3: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection')
_NL: /(\r?\n[\t ]*)+/ _INDENT: "<INDENT>" _DEDENT: "<DEDENT>" """ class TreeIndenter(Indenter): NL_type = '_NL' OPEN_PAREN_types = [] CLOSE_PAREN_types = [] INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 8 parser = Lark(tree_grammar, parser='lalr', postlex=TreeIndenter()) test_tree = """ a b c d e f g """ def test(): print(parser.parse(test_tree).pretty())