def __init__(self, intervals, alphabet, classes=None, inverted=False, complete=False): super(Character, self).__init__(consumes=True, size=1) self.__simple = CharSet(intervals, alphabet) self.alphabet = alphabet self.classes = classes if classes else [] self.inverted = inverted self.complete = complete
class Character(BaseNode, SimpleCompilableMixin): ''' Match a single character. Currently the `__contains__` method should be used for testing; that will call the `BaseAlphabet` as required. How can this be improved? - `intervals` define simple character ranges (eg. 0-9). - `alphabet` is the alphabet used. - `classes` is a list of `(class_, label, invert)` triplets, where: - `class_` is a method on `alphabet` (eg. `.digit`) - `label` is used for display - `invert` is true if `class_` should fail - `inverted` is a global boolean that inverts the entire result (if `True` the test should fail). - `complete` is True if the test (without `invert`) will always succeed. ''' def __init__(self, intervals, alphabet, classes=None, inverted=False, complete=False): super(Character, self).__init__(consumes=True, size=1) self.__simple = CharSet(intervals, alphabet) self.alphabet = alphabet self.classes = classes if classes else [] self.inverted = inverted self.complete = complete def _kargs(self): kargs = super(Character, self)._kargs() kargs['intervals'] = self.__simple.intervals return kargs def _compile_args(self): return [self] def append_interval(self, interval): '''Add an additional interval.''' self.__simple.append(interval, self.alphabet) def append_class(self, class_, label, inverted=False): '''Add a character class (see class docs).''' for (class2, _, inverted2) in self.classes: if class_ == class2: self.complete = self.complete or inverted != inverted2 # if inverted matches, complete, else we already have it return self.classes.append((class_, label, inverted)) def invert(self): '''Invert the selection.''' self.inverted = not self.inverted def __contains__(self, character): result = self.complete if not result: for (class_, _, invert) in self.classes: result = class_(character) != invert if result: break if not result: result = character in self.__simple if self.inverted: result = not result return result def __str__(self): '''This returns (the illegal) [^] for all and [] for none.''' if self.complete: return '[]' if self.inverted else '[^]' contents = ''.join('\\' + label for (_, label, _) in self.classes) contents += self.__simple.to_str(self.alphabet) return '[' + ('^' if self.inverted else '') + contents + ']' def __hash__(self): return hash(str(self)) def __bool__(self): return bool(self.classes or self.__simple) def __nonzero__(self): return self.__bool__() def simplify(self): '''Reduce to a simpler opcode if possible.''' if self.complete: if self.inverted: return NoMatch() else: return Dot(True) else: if self.classes or self.inverted: return self else: return self.__simple.simplify(self.alphabet, self)