def _expressions_from_rules(self, rule_syntax, custom_rules): """Return the rules for parsing the grammar definition syntax. Return a 2-tuple: a dict of rule names pointing to their expressions, and then the top-level expression for the first rule. """ # Hard-code enough of the rules to parse the grammar that describes the # grammar description language, to bootstrap: comment = Regex(r'#[^\r\n]*', name='comment') meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness') _ = ZeroOrMore(meaninglessness, name='_') equals = Sequence(Literal('='), _, name='equals') label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label') reference = Sequence(label, Not(equals), name='reference') quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier') # This pattern supports empty literals. TODO: A problem? spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"', ignore_case=True, dot_all=True, name='spaceless_literal') literal = Sequence(spaceless_literal, _, name='literal') regex = Sequence(Literal('~'), literal, Regex('[ilmsuxa]*', ignore_case=True), _, name='regex') atom = OneOf(reference, literal, regex, name='atom') quantified = Sequence(atom, quantifier, name='quantified') term = OneOf(quantified, atom, name='term') not_term = Sequence(Literal('!'), term, _, name='not_term') term.members = (not_term, ) + term.members sequence = Sequence(term, OneOrMore(term), name='sequence') or_term = Sequence(Literal('/'), _, term, name='or_term') ored = Sequence(term, OneOrMore(or_term), name='ored') expression = OneOf(ored, sequence, term, name='expression') rule = Sequence(label, equals, expression, name='rule') rules = Sequence(_, OneOrMore(rule), name='rules') # Use those hard-coded rules to parse the (more extensive) rule syntax. # (For example, unless I start using parentheses in the rule language # definition itself, I should never have to hard-code expressions for # those above.) rule_tree = rules.parse(rule_syntax) # Turn the parse tree into a map of expressions: return RuleVisitor().visit(rule_tree)
def visit_not_term(self, node, not_term): exclamation, term, _ = not_term return Not(term)
def test_not(self): len_eq(Not(Regex('.')).match(''), 0) # match assert_raises(ParseError, Not(Regex('.')).match, 'Hi') # don't
""" return expression def visit_quantifier(self, quantifier, (symbol, _)): """Turn a quantifier into just its symbol-matching node.""" return symbol def visit_quantified(self, quantified, (atom, quantifier)): return self.quantifier_classes[quantifier.text](atom) def visit_lookahead_term(self, lookahead_term, (ampersand, term, _)): return Lookahead(term) def visit_not_term(self, not_term, (exclamation, term, _)): return Not(term) def visit_rule(self, rule, (label, equals, expression)): """Assign a name to the Expression and return it.""" expression.name = label # Assign a name to the expr. return expression def visit_sequence(self, sequence, (term, other_terms)): """A parsed Sequence looks like [term node, OneOrMore node of ``another_term``s]. Flatten it out.""" return Sequence(term, *other_terms) def visit_ored(self, ored, (first_term, other_terms)): return OneOf(first_term, *other_terms) def visit_or_term(self, or_term, (slash, _, term)):
def visit_not_term(self, not_term, xxx_todo_changeme4): (exclamation, term, _) = xxx_todo_changeme4 return Not(term)
def visit_not_term(self, not_term, children): return Not(children[1])
def test_not(self): len_eq(Not(Regex('.')).match(''), 0) # match len_eq(Not(Regex('.')).match('Hi'), None) # don't