Ejemplo n.º 1
0
    def append_character(self, character, escaped=False):
        
        if self._create is None:
            if character == '<':
                self._create = True
            elif character == '=':
                self._create = False
            else:
                raise RxpyException(
                    'Unexpected qualifier after (?P - ' + character)
                
        else:
            if self._create and not escaped and character == '>':
                if not self._name:
                    raise RxpyException('Empty name for group')
                return GroupBuilder(self._state, self._parent, True, self._name)
            elif not self._create and not escaped and character == ')':
                self._parent._sequence.append(
                    GroupReference(self._state.index_for_name_or_count(self._name)))
                return self._parent
            elif not escaped and character == '\\':
                # this is just for the name
                return SimpleEscapeBuilder(self._state, self)
            elif character:
                self._name += character
            else:
                raise RxpyException('Incomplete named group')

        return self
Ejemplo n.º 2
0
 def append_character(self, character):
     if not character:
         raise RxpyException('Incomplete unicode escape')
     self.__buffer += character
     self.__remaining -= 1
     if self.__remaining:
         return self
     try:
         return self.__parent.append_character(
                 self._state.alphabet.code_to_char(int(self.__buffer, 16)), 
                 escaped=True)
     except:
         raise RxpyException('Bad unicode escape: ' + self.__buffer)
Ejemplo n.º 3
0
    def __init__(self,
                 flags=0,
                 alphabet=None,
                 hint_alphabet=None,
                 require=0,
                 refuse=0):
        '''
        `flags` - initial flags set by user (bits as int)
        
        `alphabet` - optional alphabet (if given, checked against flags; if not
        given inferred from flags and hint) 
        
        `hint_alphabet` - used to help auto-detect ASCII and Unicode in 2.6
        
        `require` - fkags required by the alphabet
        
        `refuse` - flags refused by the alphabet
        '''

        self.__new_flags = 0
        self.__initial_alphabet = alphabet
        self.__hint_alphabet = hint_alphabet
        self.__require = require
        self.__refuse = refuse

        flags = flags | require
        # default, if nothing specified, is unicode
        if alphabet is None and not (
                flags & (ParserState.ASCII | ParserState.UNICODE)):
            alphabet = hint_alphabet if hint_alphabet else Unicode()
        # else, if alphabet given, set flag
        elif alphabet:
            if isinstance(alphabet, Ascii): flags |= ParserState.ASCII
            elif isinstance(alphabet, Unicode): flags |= ParserState.UNICODE
            elif flags & (ParserState.ASCII | ParserState.UNICODE):
                raise RxpyException(
                    'The alphabet is inconsistent with the parser flags')
        # if alphabet missing, set from flag
        else:
            if flags & ParserState.ASCII: alphabet = Ascii()
            if flags & ParserState.UNICODE: alphabet = Unicode()
        # check contradictions
        if (flags & ParserState.ASCII) and (flags & ParserState.UNICODE):
            raise RxpyException('Cannot specify Unicode and ASCII together')
        refuse_flags(flags & refuse)

        self.__alphabet = alphabet
        self.__flags = flags
        self.groups = GroupState()
        self.__comment = False  # used to track comments with extended syntax
        self.__unwind_credit = 10
Ejemplo n.º 4
0
 def append_character(self, character):
     if not character:
         raise RxpyException('Incomplete character escape')
     elif character in digits and character != '0':
         return GroupReferenceBuilder(self._state, self._parent, character)
     elif character == 'A':
         self._parent._sequence.append(StartOfLine(False))
         return self._parent
     elif character in 'bB':
         self._parent._sequence.append(WordBoundary(character=='B'))
         return self._parent
     elif character in 'dD':
         self._parent._sequence.append(Digit(character=='D'))
         return self._parent
     elif character in 'wW':
         self._parent._sequence.append(Word(character=='W'))
         return self._parent
     elif character in 'sS':
         self._parent._sequence.append(Space(character=='S'))
         return self._parent
     elif character == 'Z':
         self._parent._sequence.append(EndOfLine(False))
         return self._parent
     else:
         return super(ComplexEscapeBuilder, self).append_character(character)
Ejemplo n.º 5
0
 def index_for_name_or_count(self, name):
     '''
     Given a group name or index (as text), return the group index (as int).
     First, we parse as an integer, then we try as a name.
     '''
     try:
         index = int(name)
         if index not in self.__index_to_name:
             raise RxpyException('Unknown index ' + str(name))
         else:
             return index
     except ValueError:
         if name not in self.__name_to_index:
             raise RxpyException('Unknown name ' + str(name))
         else:
             return self.__name_to_index[name]
Ejemplo n.º 6
0
 def append_character(self, character):
     
     if self._closed:
         if not self._lazy and character == '?':
             self._lazy = True
             return self
         else:
             self.__build()
             return self._parent.append_character(character)
     
     empty = not self._acc and self._begin is None
     if empty and character == '}':
         for character in '{}':
             self._parent.append_character(character, escaped=True)
         return self._parent
     elif character == '}':
         self.__store_value()
         self._closed = True
     elif character == ',':
         self.__store_value()
     elif character:
         self._acc += character
     else:
         raise RxpyException('Incomplete count specification')
     return self
Ejemplo n.º 7
0
 def unescape(self, code):
     # for compatability with python...
     if code < 512:
         return self.code_to_char(code % 256)
     else:
         raise RxpyException('Unexpected character code for ASCII: ' +
                             str(code))
Ejemplo n.º 8
0
 def append_character(self, character, escaped=False):
     if character is None:
         raise RxpyException('Incomplete conditional match')
     elif not escaped and character in self.__terminals:
         return self.__conditional.callback(self, character)
     else:
         return super(YesNoBuilder, self).append_character(character, escaped)
Ejemplo n.º 9
0
 def append_character(self, character):
     if not character:
         raise RxpyException('Incomplete character escape')
     elif character in digits and character != '0':
         return GroupReferenceBuilder(self._state, self._parent, character)
     else:
         return super(IntermediateEscapeBuilder, self).append_character(character)
Ejemplo n.º 10
0
 def append_character(self, character):
     self._count += 1
     if self._count == 1:
         if character == '?':
             return self
         else:
             builder = GroupBuilder(self._state, self._parent)
             return builder.append_character(character)
     else:
         if character == ':':
             return GroupBuilder(self._state, self._parent, 
                                 binding=False)
         elif character in ParserStateBuilder.INITIAL:
             return ParserStateBuilder(self._state, self._parent).append_character(character)
         elif character == 'P':
             return NamedGroupBuilder(self._state, self._parent)
         elif character == '#':
             return CommentGroupBuilder(self._state, self._parent)
         elif character == '=':
             return LookaheadBuilder(
                         self._state, self._parent, True, True)
         elif character == '!':
             return LookaheadBuilder(
                         self._state, self._parent, False, True)
         elif character == '<':
             return LookbackBuilder(self._state, self._parent)
         elif character == '(':
             return ConditionalBuilder(self._state, self._parent)
         else:
             raise RxpyException(
                 'Unexpected qualifier after (? - ' + character)
Ejemplo n.º 11
0
 def append_character(self, character):
     if character == '=':
         return LookaheadBuilder(self._state, self._parent, True, False)
     elif character == '!':
         return LookaheadBuilder(self._state, self._parent, False, False)
     else:
         raise RxpyException(
             'Unexpected qualifier after (?< - ' + character)
Ejemplo n.º 12
0
 def parse(self, text):
     builder = self
     for character in text:
         builder = builder.append_character(character)
     builder = builder.append_character(None)
     if self != builder:
         raise RxpyException('Incomplete expression')
     return self._sequence.join(Match(), self._state)
Ejemplo n.º 13
0
 def unpack(self, char, flags):
     '''
     Return either (True, CharSet) or (False, char)
     '''
     from rxpy.parser.support import ParserState
     if flags & ParserState.IGNORECASE:
         raise RxpyException('Default alphabet does not handle case')
     return (False, self.join(self.coerce(char)))
Ejemplo n.º 14
0
 def append_character(self, character):
     if not character:
         raise RxpyException('Incomplete character escape')
     elif character == 'g':
         return ReplacementGroupReferenceBuilder(self._state, self._parent)
     else:
         return super(ReplacementEscapeBuilder,
                      self).append_character(character)
Ejemplo n.º 15
0
 def append_character(self, character, escaped=False):
     if not escaped and character == ')':
         return self._parent
     elif not escaped and character == '\\':
         return SimpleEscapeBuilder(self._state, self)
     elif character:
         return self
     else:
         raise RxpyException('Incomplete comment')
Ejemplo n.º 16
0
def post_process(graph, actions):
    map = {}
    for (type_, function) in actions:
        if type_ not in map:
            map[type_] = function
        else:
            raise RxpyException('Conflicting actions for ' + str(type_))
    for node in node_iterator(graph):
        map.get(type(node), lambda x: None)(node)
    return graph
Ejemplo n.º 17
0
 def append_character(self, character):
     if not self.__escape and character == '_':
         self.__escape = True
         return self
     elif self.__escape and character in 'lceug':
         self._state.new_flag(self.__table['_' + character])
         self.__escape = False
         return self
     elif not self.__escape and character == 'L':
         raise RxpyException('Locale based classes unsupported')
     elif not self.__escape and character in self.__table:
         self._state.new_flag(self.__table[character])
         return self
     elif not self.__escape and character == ')':
         return self.__parent
     elif self.__escape:
         raise RxpyException('Unexpected characters after (? - _' + character)
     else:
         raise RxpyException('Unexpected character after (? - ' + character)
Ejemplo n.º 18
0
 def append_character(self, character):
     # this is so complex because the tests for different errors are so
     # detailed
     if not self.__buffer and character == '<':
         self.__buffer += character
         return self
     elif len(self.__buffer) > 1 and character == '>':
         self.__parent._sequence.append(self.__decode())
         return self.__parent
     elif character and self.__numeric and character in digits:
         self.__buffer += character
         return self
     elif character and self.__name and character in ALPHANUMERIC:
         self.__buffer += character
         return self
     elif character:
         raise RxpyException('Unexpected character in group escape: ' +
                             character)
     else:
         raise RxpyException('Incomplete group escape')
Ejemplo n.º 19
0
 def append_character(self, character):
     
     lazy = character == '?'
     
     if character and character in '+*':
         raise RxpyException('Compound repeat: ' + 
                              self._initial_character + character)
     elif self._initial_character == '?':
         self.build_optional(self._parent, self._latest, lazy)
     elif self._initial_character == '+':
         self.build_plus(self._parent, self._latest, lazy,
                         self._state)
     elif self._initial_character == '*':
         self.build_star(self._parent, self._latest, lazy, self._state)
     else:
         raise RxpyException('Bad initial character for RepeatBuilder')
         
     if lazy:
         return self._parent
     else:
         return self._parent.append_character(character)
Ejemplo n.º 20
0
 def clone(self):
     '''
     Duplicate this node (necessary when replacing a numbered repeat with
     explicit, repeated, instances, for example).
     
     This copies all "public" attributes as constructor kargs.
     '''
     try:
         return self.__class__(**self._kargs())
     except TypeError as e:
         raise RxpyException('Error cloning {0}: {1}'.format(
             self.__class__.__name__, e))
Ejemplo n.º 21
0
def parse_groups(texts, engine, flags=0, alphabet=None):
    '''
    Parse set of expressions, used to define groups for `Scanner`.
    '''
    state = ParserState(flags=flags, alphabet=alphabet,
                        refuse=engine.REFUSE, require=engine.REQUIRE)
    sequence = SequenceBuilder(state)
    for text in texts:
        sequence.parse_group(text)
    if state.has_new_flags:
        raise RxpyException('Inconsistent flags')
    return (state, sequence.to_sequence().join(Match(), state))
Ejemplo n.º 22
0
 def parse_group(self, text):
     '''
     Parse a set of groups for `Scanner`.
     '''
     builder = GroupBuilder(self._state, self)
     if self._sequence:
         self.__start_new_alternative()
     for character in text:
         builder = builder.append_character(character)
     try:
         builder = builder.append_character(')')
         assert builder == self
     except:
         raise RxpyException('Incomplete group')
Ejemplo n.º 23
0
 def __store_value(self):
     if self._begin is None:
         if not self._acc:
             raise RxpyException('Missing lower limit for repeat')
         else:
             try:
                 self._begin = int(self._acc)
             except ValueError:
                 raise RxpyException(
                         'Bad lower limit for repeat: ' + self._acc)
     else:
         if self._range:
             raise RxpyException('Too many values in repeat')
         self._range = True
         if self._acc:
             try:
                 self._end = int(self._acc)
             except ValueError:
                 raise RxpyException(
                         'Bad upper limit for repeat: ' + self._acc)
             if self._begin > self._end:
                 raise RxpyException('Inconsistent repeat range')
     self._acc = ''
Ejemplo n.º 24
0
 def append_character(self, character):
     if not character:
         raise RxpyException('Incomplete character escape')
     elif character in 'xuU':
         return CharacterCodeBuilder(self._state, self._parent, character)
     elif character in digits:
         return OctalEscapeBuilder(self._state, self._parent, character)
     elif character in self.__std_escapes:
         return self._parent.append_character(
                     self.__std_escapes[character], escaped=True)
     elif character not in ascii_letters: # matches re.escape
         return self._parent.append_character(character, escaped=True)
     else:
         return self._unexpected_character(character)
Ejemplo n.º 25
0
 def parse(self, text):
     '''
     Parse a regular expression.
     '''
     builder = self
     try:
         for (character, index) in zip(text, count()):
             builder = builder.append_character(character)
         builder = builder.append_character(None)
     except ParseException as e:
         e.update(text, index)
         raise
     if self != builder:
         raise RxpyException('Incomplete expression')
     return self.to_sequence().join(Match(), self._state)
Ejemplo n.º 26
0
 def append(character=character):
     if self._range:
         if self._queue is None:
             raise RxpyException('Incomplete range')
         else:
             (alo, ahi) = unpack(self._queue)
             (blo, bhi) = unpack(character)
             self._charset.append_interval((alo, blo))
             self._charset.append_interval((ahi, bhi))
             self._queue = None
             self._range = False
     else:
         if self._queue:
             (lo, hi) = unpack(self._queue)
             self._charset.append_interval((lo, lo))
             self._charset.append_interval((hi, hi))
         self._queue = character
Ejemplo n.º 27
0
def parse(text, state, class_, mutable_flags=True):
    '''
    Parse the text using the given builder.
    
    If the expression sets flags then it is parsed again.  If it changes flags
    on the second parse then an error is raised.
    '''
    try:
        graph = class_(state).parse(text)
    except RxpyException:
        # suppress error if we will parse again
        if not (mutable_flags and state.has_new_flags):
            raise
    if mutable_flags and state.has_new_flags:
        state = state.clone_with_new_flags()
        graph = class_(state).parse(text)
    graph = post_process(graph, resolve_group_names(state))
    if state.has_new_flags:
        raise RxpyException('Inconsistent flags')
    return (state, graph)
Ejemplo n.º 28
0
 def __build(self):
     if not self._parent._sequence:
         raise RxpyException('Nothing to repeat')
     latest = self._parent._sequence.pop()
     if (self._state.flags & ParserState._LOOP_UNROLL) and (
             (self._end is None and self._state.unwind(self._begin)) or
             (self._end is not None and self._state.unwind(self._end))):
         for _i in range(self._begin):
             self._parent._sequence.append(latest.clone())
         if self._range:
             if self._end is None:
                 RepeatBuilder.build_star(
                         self._parent, latest.clone(), 
                         self._lazy, self._state)
             else:
                 for _i in range(self._end - self._begin):
                     RepeatBuilder.build_optional(
                             self._parent, latest.clone(), self._lazy)
     else:
         self.build_count(self._parent, latest, self._begin, 
                          self._end if self._range else self._begin, 
                          self._lazy, self._state)
Ejemplo n.º 29
0
def require_engine(engine):
    if not engine:
        raise RxpyException('Engine must be given for RXPY '
                            '(use an engine-specific re module).')
Ejemplo n.º 30
0
 def decode(buffer, alphabet):
     try:
         return alphabet.unescape(int(buffer, 8))
     except:
         raise RxpyException('Bad octal escape: ' + buffer)