def test_repr(): """Test repr of ``Node``.""" s = u'hai ö' boogie = u'böogie' n = Node(boogie, s, 0, 3, children=[ Node('', s, 3, 4), Node('', s, 4, 5)]) eq_(repr(n), """s = {hai_o}\nNode({boogie}, s, 0, 3, children=[Node('', s, 3, 4), Node('', s, 4, 5)])""".format(hai_o=repr(s), boogie=repr(boogie)))
def test_all_of(self): """``AllOf`` should return its own node, wrapping the last child.""" expr = AllOf(Literal('a', name='lit_a'), Regex('A', ignore_case=True, name='reg_a'), name='all_of') text = 'a' eq_(expr.match(text), Node('all_of', text, 0, 1, children=[ Node('reg_a', text, 0, 1)]))
def test_lazy_custom_rules(self): """Make sure LazyReferences manually shoved into custom rules are resolved. Incidentally test passing full-on Expressions as custom rules and having a custom rule as the default one. """ grammar = Grammar(""" four = '4' five = '5'""", forty_five=Sequence( LazyReference('four'), LazyReference('five'), name='forty_five')).default('forty_five') s = '45' eq_( grammar.parse(s), Node(grammar['forty_five'], s, 0, 2, children=[ Node(grammar['four'], s, 0, 1), Node(grammar['five'], s, 1, 2) ]))
def test_sequence_nodes(self): """Assert that ``Sequence`` produces nodes with the right children.""" s = Sequence(Literal('heigh', name='greeting1'), Literal('ho', name='greeting2'), name='dwarf') text = 'heighho' self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5), Node(s.members[1], text, 5, 7)]))
def test_complex_custom_rules(self): """Run 5-arg custom rules through their paces. Incidentally tests returning an actual Node from the custom rule. """ grammar = Grammar( """ bracketed_digit = start digit end start = '[' end = ']' real_digit = '6'""", # In this particular implementation of the digit rule, no node is # generated for `digit`; it falls right through to `real_digit`. # I'm not sure if this could lead to problems; I can't think of # any, but it's probably not a great idea. digit=lambda text, pos, cache, error, grammar: grammar[ 'real_digit'].match_core(text, pos, cache, error)) s = '[6]' eq_( grammar.parse(s), Node(grammar['bracketed_digit'], s, 0, 3, children=[ Node(grammar['start'], s, 0, 1), Node(grammar['real_digit'], s, 1, 2), Node(grammar['end'], s, 2, 3) ]))
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[ Node('', s, 0, 0), Node('', s, 0, 3)]))
def _match(self, text, pos, cache, error): node = self.members[0]._match(text, pos, cache, error) if node is not None and self.evaluate(node): out = Node(self.name, text, pos, pos) # like lookahead out.expression = self return out else: return None
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') self.assertRaises(ParseError, grammar.parse, 'burp') s = 'arp' self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[ Node(Lookahead(Literal('a')), s, 0, 0), Node(Regex(r'[a-z]+'), s, 0, 3)]))
def test_spaceless_literal(self): text = '"anything but quotes#$*&^"' eq_(rule_grammar['spaceless_literal'].parse(text), Node('spaceless_literal', text, 0, len(text), children=[ Node('', text, 0, len(text))])) text = r'''r"\""''' eq_(rule_grammar['spaceless_literal'].parse(text), Node('spaceless_literal', text, 0, 5, children=[ Node('', text, 0, 5)]))
def test_spaceless_literal(self): text = '"anything but quotes#$*&^"' spaceless_literal = rule_grammar['spaceless_literal'] self.assertEqual(spaceless_literal.parse(text), Node(spaceless_literal, text, 0, len(text), children=[ Node(spaceless_literal.members[0], text, 0, len(text))])) text = r'''r"\""''' self.assertEqual(spaceless_literal.parse(text), Node(spaceless_literal, text, 0, 5, children=[ Node(spaceless_literal.members[0], text, 0, 5)]))
def test_optional(self): tree = rule_grammar.parse('boy = "howdy"?\n') rules, default_rule = RuleVisitor().visit(tree) howdy = 'howdy' # It should turn into a Node from the Optional and another from the # Literal within. self.assertEqual(default_rule.parse(howdy), Node(default_rule, howdy, 0, 5, children=[ Node(Literal("howdy"), howdy, 0, 5)]))
def test_quantifier(self): text = '*' eq_(rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1)) text = '?' eq_(rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1)) text = '+' eq_(rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1))
def test_quantifier(self): text = '*' quantifier = rule_grammar['quantifier'] eq_( quantifier.parse(text), Node(quantifier, text, 0, 1, children=[ Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1) ])) text = '?' eq_( quantifier.parse(text), Node(quantifier, text, 0, 1, children=[ Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1) ])) text = '+' eq_( quantifier.parse(text), Node(quantifier, text, 0, 1, children=[ Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1) ]))
def test_quantifier(self): text = '*' eq_( rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1, children=[ Node('__Regex__', text, 0, 1), Node('_', text, 1, 1) ])) text = '?' eq_( rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1, children=[ Node('__Regex__', text, 0, 1), Node('_', text, 1, 1) ])) text = '+' eq_( rule_grammar['quantifier'].parse(text), Node('quantifier', text, 0, 1, children=[ Node('__Regex__', text, 0, 1), Node('_', text, 1, 1) ]))
def test_parse_success(self): """Token literals should work.""" s = [Token('token1'), Token('token2')] grammar = TokenGrammar(""" foo = token1 "token2" token1 = "token1" """) self.assertEqual(grammar.parse(s), Node(grammar['foo'], s, 0, 2, children=[ Node(grammar['token1'], s, 0, 1), Node(TokenMatcher('token2'), s, 1, 2)]))
def test_parse_success(self): """Token literals should work.""" s = [Token('token1'), Token('token2')] grammar = TokenGrammar(""" foo = token1 "token2" token1 = "token1" """) eq_(grammar.parse(s), Node('foo', s, 0, 2, children=[ Node('token1', s, 0, 1), Node('', s, 1, 2)]))
def test_one_or_more_one(self): """Test the 1 case of ``OneOrMore``; it should return a node with a child.""" expr = OneOrMore(Literal('a', name='lit'), name='one') text = 'a' eq_( expr.match(text), Node(expr, text, 0, 1, children=[Node(expr.members[0], text, 0, 1)]))
def test_parse_success(self): """Make sure ``parse()`` returns the tree on success. There's not much more than that to test that we haven't already vetted above. """ expr = OneOrMore(Literal('a', name='lit'), name='more') text = 'aa' self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[ Node(expr.members[0], text, 0, 1), Node(expr.members[0], text, 1, 2)]))
def test_optional(self): """``Optional`` should return its own node wrapping the succeeded child.""" expr = Optional(Literal('a', name='lit'), name='opt') text = 'a' eq_(expr.match(text), Node('opt', text, 0, 1, children=[Node('lit', text, 0, 1)])) # Test failure of the Literal inside the Optional; the # LengthTests.test_optional is ambiguous for that. text = '' eq_(expr.match(text), Node('opt', text, 0, 0))
def test_expressions_from_rules(self): """Test the ``Grammar`` base class's ability to compile an expression tree from rules. That the correct ``Expression`` tree is built is already tested in ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's ``_expressions_from_rules`` works. """ greeting_grammar = Grammar('greeting = "hi" / "howdy"') tree = greeting_grammar.parse('hi') self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[ Node(Literal('hi'), 'hi', 0, 2)]))
def test_regex(self): text = '~"[a-zA-Z_][a-zA-Z_0-9]*"LI' eq_( rule_grammar['regex'].parse(text), Node('regex', text, 0, len(text), children=[ Node('', text, 0, 1), Node('literal', text, 1, 25), Node('', text, 25, 27) ]))
def test_match(self): """Make sure partial-matching (with pos) works.""" grammar = Grammar(r""" bold_text = bold_open text bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) s = ' ((boo))yah' self.assertEqual(grammar.match(s, pos=1), Node(grammar['bold_text'], s, 1, 8, children=[ Node(grammar['bold_open'], s, 1, 3), Node(grammar['text'], s, 3, 6), Node(grammar['bold_close'], s, 6, 8)]))
def test_repr(): """Test repr of ``Node``.""" s = u'hai ö' boogie = u'böogie' n = Node(Literal(boogie), s, 0, 3, children=[ Node(Literal(' '), s, 3, 4), Node(Literal(u'ö'), s, 4, 5)]) eq_(repr(n), str("""s = {hai_o}\nNode({boogie}, s, 0, 3, children=[Node({space}, s, 3, 4), Node({o}, s, 4, 5)])""").format( hai_o=repr(s), boogie=repr(Literal(boogie)), space=repr(Literal(" ")), o=repr(Literal(u"ö")), ) )
def test_simple_custom_rules(self): """Run 2-arg custom-coded rules through their paces.""" grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']'""", digit=lambda text, pos: (pos + 1) if text[pos].isdigit() else None) s = '[6]' self.assertEqual(grammar.parse(s), Node(grammar['bracketed_digit'], s, 0, 3, children=[ Node(grammar['start'], s, 0, 1), Node(grammar['digit'], s, 1, 2), Node(grammar['end'], s, 2, 3)]))
def test_optional(self): tree = rule_grammar.parse('boy = "howdy"?\n') _, default_rule = RuleVisitor().visit(tree) howdy = 'howdy' # It should turn into a Node from the Optional and another from the # Literal within. eq_( default_rule.parse(howdy), Node('boy', howdy, 0, 5, children=[Node('__Literal__', howdy, 0, 5)]))
def add_action(self, node: Node) -> None: """ For each node, we accumulate the rules that generated its children in a list. """ if node.expr.name and node.expr.name not in ['ws', 'wsp']: nonterminal = f'{node.expr.name} -> ' if isinstance(node.expr, Literal): right_hand_side = f'["{node.text}"]' else: child_strings = [] for child in node.__iter__(): if child.expr.name in ['ws', 'wsp']: continue if child.expr.name != '': child_strings.append(child.expr.name) else: child_right_side_string = child.expr._as_rhs().lstrip( "(").rstrip(")") # pylint: disable=protected-access child_right_side_list = [ tok for tok in re.split( " wsp |wsp | wsp| ws |ws | ws|", child_right_side_string) if tok ] child_right_side_list = [ tok.upper() if tok.upper() in self.keywords_to_uppercase else tok for tok in child_right_side_list ] child_strings.extend(child_right_side_list) right_hand_side = "[" + ", ".join(child_strings) + "]" rule = nonterminal + right_hand_side self.action_sequence = [rule] + self.action_sequence
def add_action(self, node: Node) -> None: """ For each node, we accumulate the rules that generated its children in a list. """ if node.expr.name and node.expr.name not in ['ws', 'wsp']: nonterminal = f'{node.expr.name} -> ' if isinstance(node.expr, Literal): right_hand_side = f'["{node.text}"]' else: child_strings = [] for child in node.__iter__(): if child.expr.name in ['ws', 'wsp']: continue if child.expr.name != '': child_strings.append(child.expr.name) else: child_right_side_string = child.expr._as_rhs().lstrip("(").rstrip(")") # pylint: disable=protected-access child_right_side_list = [tok for tok in WHITESPACE_REGEX.split(child_right_side_string) if tok] child_right_side_list = [tok.upper() if tok.upper() in self.keywords_to_uppercase else tok for tok in child_right_side_list] child_strings.extend(child_right_side_list) right_hand_side = "[" + ", ".join(child_strings) + "]" rule = nonterminal + right_hand_side self.action_sequence = [rule] + self.action_sequence
def test_visitor(): """Assert a tree gets visited correctly.""" grammar = Grammar(r''' bold_text = bold_open text bold_close text = ~'[a-zA-Z 0-9]*' bold_open = '((' bold_close = '))' ''') text = '((o hai))' tree = Node(grammar['bold_text'], text, 0, 9, [Node(grammar['bold_open'], text, 0, 2), Node(grammar['text'], text, 2, 7), Node(grammar['bold_close'], text, 7, 9)]) eq_(grammar.parse(text), tree) result = HtmlFormatter().visit(tree) eq_(result, '<b>o hai</b>')
def _uncached_match(self, text, pos=0, cache=dummy_cache): for m in self.members: node = m.match(text, pos, cache) if node is None: return None if node is not None: return Node(self.name, text, pos, node.end, children=[node])
def _uncached_match(self, text, pos, cache, error): new_pos = pos children = [] while True: node = self.members[0].match_core(text, new_pos, cache, error) if node is None or not (node.end - node.start): # Node was None or 0 length. 0 would otherwise loop infinitely. return Node(self, text, pos, new_pos, children) children.append(node) new_pos += node.end - node.start
def _uncached_match(self, text, pos, cache, error): for rule_idx, m in enumerate(self.members): node = m.match_core(text, pos, cache, error) if node is not None: # Wrap the succeeding child in a node representing the OneOf: return Node(self, text, pos, node.end, children=[node], rule_idx=rule_idx)