Example #1
0
 def test_one_or_more(self):
     len_eq(OneOrMore(Literal('b')).match('b'), 1)  # one
     len_eq(OneOrMore(Literal('b')).match('bbb'), 3)  # more
     len_eq(OneOrMore(Literal('b'), min=3).match('bbb'),
            3)  # with custom min; success
     assert_raises(ParseError,
                   OneOrMore(Literal('b'), min=3).match,
                   'bb')  # with custom min; failure
     len_eq(OneOrMore(Regex('^')).match('bb'), 0)  # attempt infinite loop
    def test_parse_success(self):
        """Make sure ``parse()`` returns the tree on success.

        There's not much more than that to test that we haven't already vetted
        above.

        """
        expr = OneOrMore(Literal("a", name="lit"), name="more")
        text = "aa"
        eq_(expr.parse(text), Node("more", text, 0, 2, children=[Node("lit", text, 0, 1), Node("lit", text, 1, 2)]))
Example #3
0
 def test_one_or_more_one(self):
     """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
     expr = OneOrMore(Literal('a', name='lit'), name='one')
     text = 'a'
     eq_(
         expr.match(text),
         Node(expr,
              text,
              0,
              1,
              children=[Node(expr.members[0], text, 0, 1)]))
Example #4
0
    def test_parse_success(self):
        """Make sure ``parse()`` returns the tree on success.

        There's not much more than that to test that we haven't already vetted
        above.

        """
        expr = OneOrMore(Literal('a', name='lit'), name='more')
        text = 'aa'
        self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
            Node(expr.members[0], text, 0, 1),
            Node(expr.members[0], text, 1, 2)]))
    def test_parse_success(self):
        """Make sure ``parse()`` returns the tree on success.

        There's not much more than that to test that we haven't already vetted
        above.

        """
        expr = OneOrMore(Literal('a', name='lit'), name='more')
        text = 'aa'
        eq_(expr.parse(text), Node('more', text, 0, 2, children=[
                                   Node('lit', text, 0, 1),
                                   Node('lit', text, 1, 2)]))
Example #6
0
    def _expressions_from_rules(self, rule_syntax):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        ws = Regex(r'\s+', name='ws')
        _ = Regex(r'[ \t]+', name='_')
        label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label')
        quantifier = Regex(r'[*+?]', name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                        ignore_case=True,
                        dot_all=True,
                        name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsux]*', ignore_case=True),
                         name='regex')
        atom = OneOf(label, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')
        term = OneOf(quantified, atom, name='term')
        another_term = Sequence(_, term, name='another_term')
        sequence = Sequence(term, OneOrMore(another_term), name='sequence')
        or_term = Sequence(_, Literal('/'), another_term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        and_term = Sequence(_, Literal('&'), another_term, name='and_term')
        anded = Sequence(term, OneOrMore(and_term), name='anded')
        poly_term = OneOf(anded, ored, sequence, name='poly_term')
        rhs = OneOf(poly_term, term, name='rhs')
        eol = Regex(r'[\r\n$]', name='eol')  # TODO: Support $.
        rule = Sequence(Optional(ws),
                        label,
                        Optional(_),
                        Literal('='),
                        Optional(_),
                        rhs,
                        Optional(_),
                        eol,
                        name='rule')
        rules = Sequence(OneOrMore(rule), Optional(ws), name='rules')

        # Use those hard-coded rules to parse the (possibly more extensive)
        # rule syntax. (For example, unless I start using parentheses in the
        # rule language definition itself, I should never have to hard-code
        # expressions for those above.)
        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Example #7
0
    def _expressions_from_rules(self, rule_syntax, custom_rules):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        comment = Regex(r'#[^\r\n]*', name='comment')
        meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
        _ = ZeroOrMore(meaninglessness, name='_')
        equals = Sequence(Literal('='), _, name='equals')
        label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
        reference = Sequence(label, Not(equals), name='reference')
        quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                                  ignore_case=True,
                                  dot_all=True,
                                  name='spaceless_literal')
        literal = Sequence(spaceless_literal, _, name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsuxa]*', ignore_case=True),
                         _,
                         name='regex')
        atom = OneOf(reference, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')

        term = OneOf(quantified, atom, name='term')
        not_term = Sequence(Literal('!'), term, _, name='not_term')
        term.members = (not_term, ) + term.members

        sequence = Sequence(term, OneOrMore(term), name='sequence')
        or_term = Sequence(Literal('/'), _, term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        rule = Sequence(label, equals, expression, name='rule')
        rules = Sequence(_, OneOrMore(rule), name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)

        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Example #8
0
    def _expressions_from_rules(self, rule_syntax):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        ws = Regex(r'\s+', name='ws')
        comment = Regex(r'#[^\r\n]*', name='comment')
        _ = Regex(r'[ \t]+', name='_')
        label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label')
        quantifier = Regex(r'[*+?]', name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"', ignore_case=True, dot_all=True, name='literal')
        regex = Sequence(Literal('~'), literal, Regex('[ilmsux]*', ignore_case=True), name='regex')
        atom = OneOf(label, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')
        term = OneOf(quantified, atom, name='term')
        another_term = Sequence(_, term, name='another_term')
        sequence = Sequence(term, OneOrMore(another_term), name='sequence')
        or_term = Sequence(_, Literal('/'), another_term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        eol = Regex(r'[\r\n$]', name='eol')  # TODO: Support $.
        rule = Sequence(label, Optional(_), Literal('='), Optional(_),
                        expression, Optional(_), Optional(comment), eol,
                        name='rule')
        rule_or_rubbish = OneOf(rule, ws, comment, name='rule_or_rubbish')
        rules = OneOrMore(rule_or_rubbish, name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)
        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
 def test_one_or_more_one(self):
     """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
     expr = OneOrMore(Literal('a', name='lit'), name='one')
     text = 'a'
     eq_(expr.match(text), Node('one', text, 0, 1, children=[
                                Node('lit', text, 0, 1)]))
 def test_parse_failure(self):
     """Make sure ``parse()`` fails when it doesn't recognize all the way to
     the end."""
     expr = OneOrMore(Literal("a", name="lit"), name="more")
     text = "aab"
     eq_(expr.parse(text), None)
 def test_one_or_more_one(self):
     """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
     expr = OneOrMore(Literal("a", name="lit"), name="one")
     text = "a"
     eq_(expr.match(text), Node("one", text, 0, 1, children=[Node("lit", text, 0, 1)]))
 def test_parse_failure(self):
     """Make sure ``parse()`` fails when it doesn't recognize all the way to the end."""
     expr = OneOrMore(Literal('a', name='lit'), name='more')
     text = 'aab'
     eq_(expr.parse(text), None)
Example #13
0
 def test_one_or_more(self):
     len_eq(OneOrMore(Literal('b')).match('b'), 1)  # one
     len_eq(OneOrMore(Literal('b')).match('bbb'), 3)  # more
     len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3)  # with custom min; success
     len_eq(OneOrMore(Literal('b'), min=3).match('bb'), None)  # with custom min; failure
     len_eq(OneOrMore(Regex('^')).match('bb'), 0)  # attempt infinite loop
Example #14
0
 def test_parse_failure(self):
     """Make sure ``parse()`` fails when it doesn't recognize all the way to
     the end."""
     expr = OneOrMore(Literal('a', name='lit'), name='more')
     text = 'aab'
     eq_(expr.parse(text), None)