Beispiel #1
0
    def _expressions_from_rules(self, rule_syntax, custom_rules):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        comment = Regex(r'#[^\r\n]*', name='comment')
        meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
        _ = ZeroOrMore(meaninglessness, name='_')
        equals = Sequence(Literal('='), _, name='equals')
        label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
        reference = Sequence(label, Not(equals), name='reference')
        quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                                  ignore_case=True,
                                  dot_all=True,
                                  name='spaceless_literal')
        literal = Sequence(spaceless_literal, _, name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsuxa]*', ignore_case=True),
                         _,
                         name='regex')
        atom = OneOf(reference, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')

        term = OneOf(quantified, atom, name='term')
        not_term = Sequence(Literal('!'), term, _, name='not_term')
        term.members = (not_term, ) + term.members

        sequence = Sequence(term, OneOrMore(term), name='sequence')
        or_term = Sequence(Literal('/'), _, term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        rule = Sequence(label, equals, expression, name='rule')
        rules = Sequence(_, OneOrMore(rule), name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)

        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Beispiel #2
0
 def visit_not_term(self, node, not_term):
     exclamation, term, _ = not_term
     return Not(term)
Beispiel #3
0
 def test_not(self):
     len_eq(Not(Regex('.')).match(''), 0)  # match
     assert_raises(ParseError, Not(Regex('.')).match, 'Hi')  # don't
Beispiel #4
0
        """
        return expression

    def visit_quantifier(self, quantifier, (symbol, _)):
        """Turn a quantifier into just its symbol-matching node."""
        return symbol

    def visit_quantified(self, quantified, (atom, quantifier)):
        return self.quantifier_classes[quantifier.text](atom)

    def visit_lookahead_term(self, lookahead_term, (ampersand, term, _)):
        return Lookahead(term)

    def visit_not_term(self, not_term, (exclamation, term, _)):
        return Not(term)

    def visit_rule(self, rule, (label, equals, expression)):
        """Assign a name to the Expression and return it."""
        expression.name = label  # Assign a name to the expr.
        return expression

    def visit_sequence(self, sequence, (term, other_terms)):
        """A parsed Sequence looks like [term node, OneOrMore node of
        ``another_term``s]. Flatten it out."""
        return Sequence(term, *other_terms)

    def visit_ored(self, ored, (first_term, other_terms)):
        return OneOf(first_term, *other_terms)

    def visit_or_term(self, or_term, (slash, _, term)):
Beispiel #5
0
 def visit_not_term(self, not_term, xxx_todo_changeme4):
     (exclamation, term, _) = xxx_todo_changeme4
     return Not(term)
Beispiel #6
0
 def visit_not_term(self, not_term, children):
     return Not(children[1])
Beispiel #7
0
 def test_not(self):
     len_eq(Not(Regex('.')).match(''), 0)  # match
     len_eq(Not(Regex('.')).match('Hi'), None)  # don't