Esempio n. 1
0
 def test_sequence(self):
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'),
         12)  # succeed
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobing'),
         None)  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1),
         5)  # non-0 pos
Esempio n. 2
0
 def test_sequence(self):
     len_eq(
         Sequence(Regex('hi*'), Literal('lo'),
                  Regex('.ingo')).match('hiiiilobingo1234'), 12)  # succeed
     assert_raises(ParseError,
                   Sequence(Regex('hi*'), Literal('lo'),
                            Regex('.ingo')).match, 'hiiiilobing')  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5)  # non-0 pos
Esempio n. 3
0
    def test_zero_or_more(self):
        len_eq(ZeroOrMore(Literal('b')).match(''), 0)  # zero
        len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3)  # more

        len_eq(Regex('^').match(''), 0)  # Validate the next test.

        # Try to make it loop infinitely using a zero-length contained expression:
        len_eq(ZeroOrMore(Regex('^')).match(''), 0)
Esempio n. 4
0
 def test_all_of(self):
     """``AllOf`` should return its own node, wrapping the last child."""
     expr = AllOf(Literal('a', name='lit_a'),
                  Regex('A', ignore_case=True, name='reg_a'), name='all_of')
     text = 'a'
     eq_(expr.match(text), Node('all_of', text, 0, 1, children=[
                                Node('reg_a', text, 0, 1)]))
Esempio n. 5
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        self.assertRaises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
                                      Node(Lookahead(Literal('a')), s, 0, 0),
                                      Node(Regex(r'[a-z]+'), s, 0, 3)]))
Esempio n. 6
0
 def test_one_or_more(self):
     len_eq(OneOrMore(Literal('b')).match('b'), 1)  # one
     len_eq(OneOrMore(Literal('b')).match('bbb'), 3)  # more
     len_eq(OneOrMore(Literal('b'), min=3).match('bbb'),
            3)  # with custom min; success
     assert_raises(ParseError,
                   OneOrMore(Literal('b'), min=3).match,
                   'bb')  # with custom min; failure
     len_eq(OneOrMore(Regex('^')).match('bb'), 0)  # attempt infinite loop
Esempio n. 7
0
 def visit_regex(self, node, regex):
     """Return a ``Regex`` expression."""
     tilde, literal, flags, _ = regex
     flags = flags.text.upper()
     pattern = literal.literal  # Pull the string back out of the Literal
                                # object.
     return Regex(pattern, ignore_case='I' in flags,
                           locale='L' in flags,
                           multiline='M' in flags,
                           dot_all='S' in flags,
                           unicode='U' in flags,
                           verbose='X' in flags)
Esempio n. 8
0
def test_use_regex_library():
    grammar = Grammar(r'''
    unicode_word = ~"[\p{L}]*"
    ''',
                      use_regex_library=True)
    text = 'Тест'
    expected = RegexNode(expr=Regex(pattern=r'[\p{L}]*',
                                    use_regex_library=True),
                         full_text=text,
                         start=0,
                         end=4)
    result = grammar.parse(text=text)
    eq_(result, expected)
Esempio n. 9
0
 def visit_regex(self, regex, children):
     """Return a ``Regex`` expression."""
     _, literal, flags, _ = children
     flags = flags.text.upper()
     # Pull the string back out of the Literal object.
     pattern = literal.literal
     return Regex(pattern,
                  ignore_case='I' in flags,
                  locale='L' in flags,
                  multiline='M' in flags,
                  dot_all='S' in flags,
                  str='U' in flags,
                  verbose='X' in flags)
Esempio n. 10
0
 def visit_regex(self, regex, xxx_todo_changeme11):
     """Return a ``Regex`` expression."""
     (tilde, literal, flags, _) = xxx_todo_changeme11
     flags = flags.text.upper()
     pattern = literal.literal  # Pull the string back out of the Literal
     # object.
     return Regex(pattern,
                  ignore_case='I' in flags,
                  locale='L' in flags,
                  multiline='M' in flags,
                  dot_all='S' in flags,
                  str='U' in flags,
                  verbose='X' in flags)
Esempio n. 11
0
    def _expressions_from_rules(self, rule_syntax):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        ws = Regex(r'\s+', name='ws')
        _ = Regex(r'[ \t]+', name='_')
        label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label')
        quantifier = Regex(r'[*+?]', name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                        ignore_case=True,
                        dot_all=True,
                        name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsux]*', ignore_case=True),
                         name='regex')
        atom = OneOf(label, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')
        term = OneOf(quantified, atom, name='term')
        another_term = Sequence(_, term, name='another_term')
        sequence = Sequence(term, OneOrMore(another_term), name='sequence')
        or_term = Sequence(_, Literal('/'), another_term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        and_term = Sequence(_, Literal('&'), another_term, name='and_term')
        anded = Sequence(term, OneOrMore(and_term), name='anded')
        poly_term = OneOf(anded, ored, sequence, name='poly_term')
        rhs = OneOf(poly_term, term, name='rhs')
        eol = Regex(r'[\r\n$]', name='eol')  # TODO: Support $.
        rule = Sequence(Optional(ws),
                        label,
                        Optional(_),
                        Literal('='),
                        Optional(_),
                        rhs,
                        Optional(_),
                        eol,
                        name='rule')
        rules = Sequence(OneOrMore(rule), Optional(ws), name='rules')

        # Use those hard-coded rules to parse the (possibly more extensive)
        # rule syntax. (For example, unless I start using parentheses in the
        # rule language definition itself, I should never have to hard-code
        # expressions for those above.)
        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Esempio n. 12
0
    def _expressions_from_rules(self, rule_syntax, custom_rules):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        comment = Regex(r'#[^\r\n]*', name='comment')
        meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
        _ = ZeroOrMore(meaninglessness, name='_')
        equals = Sequence(Literal('='), _, name='equals')
        label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
        reference = Sequence(label, Not(equals), name='reference')
        quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                                  ignore_case=True,
                                  dot_all=True,
                                  name='spaceless_literal')
        literal = Sequence(spaceless_literal, _, name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsuxa]*', ignore_case=True),
                         _,
                         name='regex')
        atom = OneOf(reference, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')

        term = OneOf(quantified, atom, name='term')
        not_term = Sequence(Literal('!'), term, _, name='not_term')
        term.members = (not_term, ) + term.members

        sequence = Sequence(term, OneOrMore(term), name='sequence')
        or_term = Sequence(Literal('/'), _, term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        rule = Sequence(label, equals, expression, name='rule')
        rules = Sequence(_, OneOrMore(rule), name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)

        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Esempio n. 13
0
 def test_all_of(self):
     len_eq(AllOf(Literal('0'), Regex('..')).match('01'), 2)  # match
     len_eq(AllOf(Literal('0'), Regex('.2')).match('01'), None)  # don't
Esempio n. 14
0
 def test_not(self):
     len_eq(Not(Regex('.')).match(''), 0)  # match
     len_eq(Not(Regex('.')).match('Hi'), None)  # don't
Esempio n. 15
0
 def test_not(self):
     len_eq(Not(Regex('.')).match(''), 0)  # match
     assert_raises(ParseError, Not(Regex('.')).match, 'Hi')  # don't
Esempio n. 16
0
 def test_one_or_more(self):
     len_eq(OneOrMore(Literal('b')).match('b'), 1)  # one
     len_eq(OneOrMore(Literal('b')).match('bbb'), 3)  # more
     len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3)  # with custom min; success
     len_eq(OneOrMore(Literal('b'), min=3).match('bb'), None)  # with custom min; failure
     len_eq(OneOrMore(Regex('^')).match('bb'), 0)  # attempt infinite loop
Esempio n. 17
0
 def test_regex(self):
     len_eq(Literal('hello').match('ehello', 1), 5)  # simple
     len_eq(Regex('hello*').match('hellooo'), 7)  # *
     assert_raises(ParseError, Regex('hello*').match, 'goodbye')  # no match
     len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
Esempio n. 18
0
        """Stick a :class:`LazyReference` in the tree as a placeholder.

        We resolve them all later.

        """
        return LazyReference(label)

    def visit_regex(self, regex, (tilde, literal, flags, _)):
        """Return a ``Regex`` expression."""
        flags = flags.text.upper()
        pattern = literal.literal  # Pull the string back out of the Literal
        # object.
        return Regex(pattern,
                     ignore_case='I' in flags,
                     locale='L' in flags,
                     multiline='M' in flags,
                     dot_all='S' in flags,
                     unicode='U' in flags,
                     verbose='X' in flags)

    def visit_spaceless_literal(self, spaceless_literal, visited_children):
        """Turn a string literal into a ``Literal`` that recognizes it."""
        # Piggyback on Python's string support so we can have backslash
        # escaping and niceties like \n, \t, etc.
        # string.decode('string_escape') would have been a lower-level
        # possibility.
        return Literal(ast.literal_eval(spaceless_literal.text))

    def visit_literal(self, literal, (spaceless_literal, _)):
        """Pick just the literal out of a literal-and-junk combo."""
        return spaceless_literal
Esempio n. 19
0
 def test_regex(self):
     len_eq(Literal('hello').match('ehello', 1), 5)  # simple
     len_eq(Regex('hello*').match('hellooo'), 7)  # *
     len_eq(Regex('hello*').match('goodbye'), None)  # no match
     len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)