Ejemplo n.º 1
0
 def test_no_flatten_or(self):
     matcher = Or('a', Join(Or('b', 'c')))
     matcher.config.clear().flatten()
     parser = matcher.get_parse()
     text = str(parser.matcher)
     assert text == "Or(Literal, Transform)", text
     result = parser('abcd')
     assert result == ['a'], result
Ejemplo n.º 2
0
def SingleLineString(quote='"', escape='\\', exclude='\n'):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, add_=True)
    return And(Drop(q), content, Drop(q))
Ejemplo n.º 3
0
def SkipString(quote='"', escape='\\', ignore='\n', empty='', join=__add__):
    '''
    Like `String`, matching across multiple lines, but will silently 
    drop newlines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(ignore)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Or(content, Drop(Any(ignore)))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Ejemplo n.º 4
0
def SingleLineString(quote='"',
                     escape='\\',
                     exclude='\n',
                     empty='',
                     join=__add__):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Ejemplo n.º 5
0
    def test_bline(self):
        '''
        Test a simple example: letters introduce numbers in an indented block.
        '''
        #basicConfig(level=DEBUG)

        number = Token(Digit())
        letter = Token(Letter())

        # the simplest whitespace grammar i can think of - lines are either
        # numbers (which are single, simple statements) or letters (which
        # mark the start of a new, indented block).
        block = Delayed()
        line = Or(BLine(number), BLine(letter) & block) > list
        # and a block is simply a collection of lines, as above
        block += Block(line[1:])

        program = Trace(line[1:])

        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.default_line_aware(block_policy=1)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], ['2'],
                          ['a', ['3'], ['b', ['4'], ['5']], ['6']]], result
Ejemplo n.º 6
0
    def test_continued_explicit(self):
        number = Token(Digit())
        letter = Token(Letter())
        
        block = Delayed()
        bline = ContinuedLineFactory(r'x')
        line = Or(bline(number), 
                  bline(letter) & block) > list
        block += Block(line[1:])
        
        program = Trace(line[1:])
        
        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.lines(block_policy=explicit)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], 
                          ['2'], 
                          ['a', ['3'], 
                                ['b', ['4'], 
                                      ['5']], 
                                ['6']]], result
Ejemplo n.º 7
0
    def test_explicit(self):
        #basicConfig(level=DEBUG)
        number = Token(Digit())
        letter = Token(Letter())
        
        block = Delayed()
        line = Or(Line(number), 
                  Line(letter) & block) > list
        block += Block(line[1:])
        
        program = Trace(line[1:])
        
        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.lines(block_policy=explicit)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], 
                          ['2'], 
                          ['a', ['3'], 
                                ['b', ['4'], 
                                      ['5']], 
                                ['6']]], result
Ejemplo n.º 8
0
def UnsignedEFloat(decimal='.', exponent='eE'):
    '''
    As `UnsignedEReal`, but must contain a decimal or exponent.  This
    will match real values that are not integers.
    '''
    return Or(Join(UnsignedReal(decimal), Any(exponent), SignedInteger()),
              UnsignedFloat(decimal))
Ejemplo n.º 9
0
    def __init__(self):
        from lepl.matchers.core import Any
        from lepl.matchers.combine import Or
        max_ = chr(maxunicode)

        def mkhex(char, n):
            from lepl.matchers.derived import Drop
            return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                        (lambda x: chr(int(x, 16)))

        def mkchr(char, range, invert=False):
            from lepl.matchers.core import Literal
            from lepl.matchers.derived import Map
            from lepl.regexp.core import Character
            intervals = lmap(lambda x: (x, x), range)
            if invert:
                # this delays call to invert until after creation of self
                func = lambda _: Character(self.invert(intervals), self)
            else:
                func = lambda _: Character(intervals, self)
            return Map(Literal(char), func)

        range = Or(mkchr('s', _WHITESPACE), mkchr('S',
                                                  _WHITESPACE,
                                                  invert=True))
        escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8)
        super(UnicodeAlphabet, self).__init__(chr(0),
                                              max_,
                                              escaped=escaped,
                                              range=range)
Ejemplo n.º 10
0
def UnsignedReal(decimal='.'):
    '''
    Match a sequence of digits that may include a decimal point.  This
    will match both integer and float values.
    '''
    return Or(
        Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()),
        Join(UnsignedInteger(), Optional(Any(decimal))))
Ejemplo n.º 11
0
 def and_(a, b):
     '''
     Add space only in the case when both consume something.
     '''
     return Or(And(Consumer(a), separator, Consumer(b)),
               And(Consumer(a), Consumer(b, False)),
               And(Consumer(a, False), Consumer(b)),
               And(Consumer(a, False), Consumer(b, False)))
Ejemplo n.º 12
0
def UnsignedFloat(decimal='.'):
    '''
    Match a sequence of digits that must include a decimal point.  This
    will match real values that are not integers.
    '''
    return Or(
        Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()),
        Join(UnsignedInteger(), Any(decimal)))
Ejemplo n.º 13
0
def Literals(*matchers):
    '''
    A series of literals, joined with `Or`.
    '''
    # I considered implementing this by extending Literal() itself, but
    # that would have meant putting "Or-like" functionality in Literal,
    # and I felt it better to keep the base matchers reasonably orthogonal.
    return Or(*lmap(Literal, matchers))
Ejemplo n.º 14
0
 def test_all(self):
     first = Join(UnsignedFloat(), Any('eE'), SignedInteger())
     second = UnsignedFloat()
     all = Or(first, second)
     all.config.default()  # wrong order
     #all.config.compile_to_dfa() # gives 1.e3 only
     #all.config.compile_to_nfa() # wrong order
     #all.config.no_compile_to_regexp() # ok
     #all.config.clear() # ok
     self.assert_direct('1.e3', all, [['1.e3'], ['1.']])
Ejemplo n.º 15
0
def String(quote='"', escape='\\'):
    '''
    Match a string with quotes that can be escaped.  This will match across
    newlines (see `SingleLineString` for an alternative).
    '''
    q = Literal(quote)
    content = AnyBut(q)
    if escape:
        content = Or(And(Drop(escape), q), content)
    content = Repeat(content, add_=True) 
    return And(Drop(q), content, Drop(q))
Ejemplo n.º 16
0
def SignedEFloat(decimal='.', exponent='eE'):
    '''
    As `SignedEReal`, but must contain a decimal or exponent.  This
    will match real values that are not integers.
    '''
    if decimal == '.' and exponent == 'eE':
        # hack to faster direct implementation for now
        return NfaRegexp(
            r'[\+\-]?(?:[0-9]*\.[0-9]+(?:[eE][\+\-]?[0-9]+)?|[0-9]+\.(?:[eE][\+\-]?[0-9]+)?|[0-9]+[eE][\+\-]?[0-9]+)'
        )
    else:
        return Or(Join(SignedReal(decimal), Any(exponent), SignedInteger()),
                  SignedFloat(decimal))
Ejemplo n.º 17
0
    def __init__(self, clean_html=True):
        self.clean_html = clean_html

        self._punctuation = '!"#&\'()*+,.;<=>?@[\\]^_`{|}~'
        self._lctx_1_exceptions = set('/ :'.split())
        self._lctx_2_exceptions = set('discount redeem voucher'.split())
        self._rctx_1_exceptions = set('/ : th am pm hour hours %'.split())
        self._rctx_2_exceptions = set('discount redeem voucher'.split())

        # LEPL Real Number Matchers (w/thousands)
        _comma_three_digits = Join(Drop(','), Add(Digit()[3]))[:]
        _thousand_group = Or(
            Join(_comma_three_digits, Any('.'), UnsignedInteger()),
            Join(_comma_three_digits, Optional(Any('.'))))
        _real = Or(Join(UnsignedInteger(), _thousand_group),
                   UnsignedReal()) >> float
        _any = Join(Star(AnyBut(_real)))
        self._real_partition_matcher = Star(And(_any, _real, _any))
        self._real_simple_matcher = _real[:,
                                          Drop(
                                              Star(Or(Whitespace(), Any(',-')))
                                          )]
Ejemplo n.º 18
0
def String(quote='"', escape='\\', empty='', join=__add__):
    '''
    Match a string with quotes that can be escaped.  This will match across
    newlines (see `SingleLineString` for an alternative).

    More generally, a string is a grouping of results.  Setting `empty` and
    `join` correctly will allow this matcher to work with a variety of types.
    '''
    q = Literal(quote)
    content = AnyBut(q)
    if escape:
        content = Or(And(Drop(escape), q), content)
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Ejemplo n.º 19
0
 def simple_grammar(self):
     '''
     Test a simple example: letters introduce numbers in an indented block.
     '''
     #basicConfig(level=DEBUG)
     
     number = Token(Digit())
     letter = Token(Letter())
     
     # the simplest whitespace grammar i can think of - lines are either
     # numbers (which are single, simple statements) or letters (which
     # mark the start of a new, indented block).
     block = Delayed()
     line = Or(Line(number), 
               Line(letter) & block) > list
     # and a block is simply a collection of lines, as above
     block += Block(line[1:])
     
     program = Trace(line[1:])
     program.config.lines(block_policy=1)
     return program
Ejemplo n.º 20
0
        def and_(matcher_a, matcher_b):
            '''
            Combine two matchers.
            '''
            (requireda, optionala) = non_optional_copy(matcher_a)
            (requiredb, optionalb) = non_optional_copy(matcher_b)

            if not (optionala or optionalb):
                return And(matcher_a, separator, matcher_b)
            else:
                matcher = Or(*filter((lambda x: x is not None), [
                    And(Optional(And(requireda, separator)), requiredb
                        ) if optionala else None,
                    And(requireda, Optional(And(separator, requiredb))
                        ) if optionalb else None
                ]))
                if optionala and optionalb:
                    # making this explicit allows chaining (we can detect it
                    # when called again in a tree of "ands")
                    matcher = Optional(matcher)
                return matcher
Ejemplo n.º 21
0
 def test_simple(self):
     self.assert_direct('a', Or(Any('x'), Any('a'), Any()), [['a'],['a']])
Ejemplo n.º 22
0
def UnsignedFloat(decimal='.'):
    '''Match a sequence of digits that may include a decimal point.'''
    return Or(Join(Optional(UnsignedInteger()), 
                   Any(decimal), UnsignedInteger()),
              Join(UnsignedInteger(), Optional(Any(decimal))))
Ejemplo n.º 23
0
def Newline():
    '''Match newline (Unix) or carriage return newline (Windows)'''
    return Or(Literal('\n'), Literal('\r\n'))
Ejemplo n.º 24
0
 def test_nfa(self):
     first = Join(UnsignedFloat(), Any('eE'), SignedInteger())
     second = UnsignedFloat()
     all = Or(first, second)
     all.config.clear().compile_to_nfa()
     m = all.get_parse()
Ejemplo n.º 25
0
 def test_nfa(self):
     first = Join(UnsignedFloat(), Any('eE'), SignedInteger())
     second = UnsignedFloat()
     all = Or(first, second)
     all.config.clear().compile_to_nfa()
     m = all.get_parse()