Ejemplo n.º 1
0
 def __init__(self, state, parent):
     super(CharacterBuilder, self).__init__(state)
     self._parent = parent
     self._charset = Character([], alphabet=state.alphabet)
     self._invert = None
     self._queue = None
     self._range = False
Ejemplo n.º 2
0
 def append_character(self, character, escaped=False):
     if not escaped and character == '\\':
         return ComplexEscapeBuilder(self._state, self)
     elif not escaped and character == '{':
         return CountBuilder(self._state, self)
     elif not escaped and character == '(':
         return GroupEscapeBuilder(self._state, self)
     elif not escaped and character == '[':
         return CharacterBuilder(self._state, self)
     elif not escaped and character == '.':
         self._sequence.append(Dot(self._state.flags & ParserState.DOTALL))
     elif not escaped and character == '^':
         self._sequence.append(StartOfLine(self._state.flags & ParserState.MULTILINE))
     elif not escaped and character == '$':
         self._sequence.append(EndOfLine(self._state.flags & ParserState.MULTILINE))
     elif not escaped and character == '|':
         self.__start_new_alternative()
     elif character and self._sequence and (not escaped and character in '+?*'):
         return RepeatBuilder(self._state, self, self._sequence.pop(), character)
     elif character and (escaped or self._state.significant(character)):
         (is_pair, value) = self._state.alphabet.unpack(character, 
                                                        self._state.flags)
         if is_pair:
             self._sequence.append(Character([(value[0], value[0]), 
                                          (value[1], value[1])], 
                                          self._state.alphabet))
         else:
             self._sequence.append(String(value))
     return self
Ejemplo n.º 3
0
 def test_contains(self):
     assert 'a' not in Character([('b', 'b')], Ascii())
     assert 'b' in Character([('b', 'b')], Ascii())
     assert 'c' not in Character([('b', 'b')], Ascii())
     assert 'a' in Character([('a', 'b')], Ascii())
     assert 'b' in Character([('a', 'b')], Ascii())
     assert 'c' not in Character([('a', 'b')], Ascii())
     assert 'a' in Character([('a', 'c')], Ascii())
     assert 'b' in Character([('a', 'c')], Ascii())
     assert 'c' in Character([('a', 'c')], Ascii())
     assert 'a' in Character([('a', 'b'), ('b', 'c')], Ascii())
     assert 'b' in Character([('a', 'b'), ('b', 'c')], Ascii())
     assert 'c' in Character([('a', 'b'), ('b', 'c')], Ascii())
     assert 'a' in Character([('a', 'a'), ('c', 'c')], Ascii())
     assert 'b' not in Character([('a', 'a'), ('c', 'c')], Ascii())
     assert 'c' in Character([('a', 'a'), ('c', 'c')], Ascii())
Ejemplo n.º 4
0
 def do_test_str(self, intervals, target):
     result = str(Character(intervals, Ascii()))
     assert result == target, result
Ejemplo n.º 5
0
class CharacterBuilder(Builder):
    '''
    Parse a character range - expressions of the form [...].
    These can include character classes (\\s for example), which we handle
    in the alphabet as functions rather than character code ranges, so the
    final graph node can be quite complex. 
    '''
    
    def __init__(self, state, parent):
        super(CharacterBuilder, self).__init__(state)
        self._parent = parent
        self._charset = Character([], alphabet=state.alphabet)
        self._invert = None
        self._queue = None
        self._range = False
    
    def append_character(self, character, escaped=False):
        
        def unpack(character):
            (is_charset, value) = self._state.alphabet.unpack(character, 
                                                              self._state.flags)
            if not is_charset:
                value = (character, character)
            return value
        
        def append(character=character):
            if self._range:
                if self._queue is None:
                    raise RxpyException('Incomplete range')
                else:
                    (alo, ahi) = unpack(self._queue)
                    (blo, bhi) = unpack(character)
                    self._charset.append_interval((alo, blo))
                    self._charset.append_interval((ahi, bhi))
                    self._queue = None
                    self._range = False
            else:
                if self._queue:
                    (lo, hi) = unpack(self._queue)
                    self._charset.append_interval((lo, lo))
                    self._charset.append_interval((hi, hi))
                self._queue = character

        if self._invert is None and character == '^':
            self._invert = True 
        elif not escaped and character == '\\':
            return SimpleEscapeBuilder(self._state, self)
        elif escaped and character in 'dD':
            self._charset.append_class(self._state.alphabet.digit,
                                       character, character=='D')
        elif escaped and character in 'wW':
            self._charset.append_class(self._state.alphabet.word,
                                       character, character=='W')
        elif escaped and character in 'sS':
            self._charset.append_class(self._state.alphabet.space,
                                       character, character=='S')
        # not charset allows first character to be unescaped - or ]
        elif character and \
                ((not self._charset and not self._queue)
                 or escaped or character not in "-]"):
            append()
        elif character == '-':
            if self._range:
                # repeated - is range to -?
                append()
            else:
                self._range = True
        elif character == ']':
            if self._queue:
                if self._range:
                    self._range = False
                    # convert open range to '-'
                    append('-')
                append(None)
            if self._invert:
                self._charset.invert()
            self._parent._sequence.append(self._charset.simplify())
            return self._parent
        else:
            raise RxpyException('Syntax error in character set')
        
        # after first character this must be known
        if self._invert is None:
            self._invert = False
            
        return self
Ejemplo n.º 6
0
 def test_contains(self):
     assert 0 not in Character([('1', '1')], Digits())
     assert 1 in Character([('1', '1')], Digits())
     assert 2 not in Character([('1', '1')], Digits())
     assert 0 in Character([('0', '1')], Digits())
     assert 1 in Character([('0', '1')], Digits())
     assert 2 not in Character([('0', '1')], Digits())
     assert 0 in Character([('0', '2')], Digits())
     assert 1 in Character([('0', '2')], Digits())
     assert 2 in Character([('0', '2')], Digits())
     assert 0 in Character([('0', '1'), ('1', '2')], Digits())
     assert 1 in Character([('0', '1'), ('1', '2')], Digits())
     assert 2 in Character([('0', '1'), ('1', '2')], Digits())
     assert 0 in Character([('0', '0'), ('2', '2')], Digits())
     assert 1 not in Character([('0', '0'), ('2', '2')], Digits())
     assert 2 in Character([('0', '0'), ('2', '2')], Digits())
Ejemplo n.º 7
0
 def do_test_str(self, intervals, target):
     result = str(Character(intervals, alphabet=Digits()))
     assert result == target, result