Example #1
0
 def __init__(self, intervals, alphabet, classes=None,
              inverted=False, complete=False):
     super(Character, self).__init__(consumes=True, size=1)
     self.__simple = CharSet(intervals, alphabet)
     self.alphabet = alphabet
     self.classes = classes if classes else []
     self.inverted = inverted
     self.complete = complete
Example #2
0
class Character(BaseNode, SimpleCompilableMixin):
    '''
    Match a single character.  Currently the `__contains__` method should be
    used for testing; that will call the `BaseAlphabet` as required.

    How can this be improved?

    - `intervals` define simple character ranges (eg. 0-9).

    - `alphabet` is the alphabet used.

    - `classes` is a list of `(class_, label, invert)` triplets, where:
      - `class_` is a method on `alphabet` (eg. `.digit`)
      - `label` is used for display
      - `invert` is true if `class_` should fail

    - `inverted` is a global boolean that inverts the entire result (if `True`
      the test should fail).

    - `complete` is True if the test (without `invert`) will always succeed.
    '''

    def __init__(self, intervals, alphabet, classes=None,
                 inverted=False, complete=False):
        super(Character, self).__init__(consumes=True, size=1)
        self.__simple = CharSet(intervals, alphabet)
        self.alphabet = alphabet
        self.classes = classes if classes else []
        self.inverted = inverted
        self.complete = complete

    def _kargs(self):
        kargs = super(Character, self)._kargs()
        kargs['intervals'] = self.__simple.intervals
        return kargs

    def _compile_args(self):
        return [self]

    def append_interval(self, interval):
        '''Add an additional interval.'''
        self.__simple.append(interval, self.alphabet)

    def append_class(self, class_, label, inverted=False):
        '''Add a character class (see class docs).'''
        for (class2, _, inverted2) in self.classes:
            if class_ == class2:
                self.complete = self.complete or inverted != inverted2
                # if inverted matches, complete, else we already have it
                return
        self.classes.append((class_, label, inverted))

    def invert(self):
        '''Invert the selection.'''
        self.inverted = not self.inverted

    def __contains__(self, character):
        result = self.complete
        if not result:
            for (class_, _, invert) in self.classes:
                result = class_(character) != invert
                if result:
                    break
        if not result:
            result = character in self.__simple
        if self.inverted:
            result = not result
        return result

    def __str__(self):
        '''This returns (the illegal) [^] for all and [] for none.'''
        if self.complete:
            return '[]' if self.inverted else '[^]'
        contents = ''.join('\\' + label for (_, label, _) in self.classes)
        contents += self.__simple.to_str(self.alphabet)
        return '[' + ('^' if self.inverted else '') + contents + ']'

    def __hash__(self):
        return hash(str(self))

    def __bool__(self):
        return bool(self.classes or self.__simple)

    def __nonzero__(self):
        return self.__bool__()

    def simplify(self):
        '''Reduce to a simpler opcode if possible.'''
        if self.complete:
            if self.inverted:
                return NoMatch()
            else:
                return Dot(True)
        else:
            if self.classes or self.inverted:
                return self
            else:
                return self.__simple.simplify(self.alphabet, self)