def _state0(self, char, tokenList): # Normal state if char in chars('*'): tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(0, None), self._pos)) elif char in chars('+'): tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(1, None), self._pos)) elif char in chars('.'): tokenList.append(self.Token(self.TOK_CLASS, AnyCharacterClass(), self._pos)) elif char in chars('('): tokenList.append(self.Token(self.TOK_LPAREN, char, self._pos)) elif char in chars(')'): tokenList.append(self.Token(self.TOK_RPAREN, char, self._pos)) elif char in chars('|'): tokenList.append(self.Token(self.TOK_UNION, char, self._pos)) elif char == '[': self._currentClass = io.StringIO() self._currentClass.write(char) return 2 elif char == b'['[0]: self._currentClass = io.BytesIO() self._currentClass.write(bytes([char])) return 2 elif char in chars('{'): return 9 elif char in chars(']') + chars('}'): raise TokenizeError('Unexpected token "%s"' % str(char)) elif char in chars('\\'): return 1 else: tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(char), self._pos))
def ignore(char): """ Override this to ignore characters in input stream. The default is to ignore spaces and tabs. :param char: The character to test :return: True if *char* should be ignored """ return char in chars(' ') + chars('\t')
def _state2(self, char, tokenList): # In character class if char in chars('\\'): return 3 if char in chars(']'): self._currentClass.write(bytes([char]) if isinstance(char, int) else char) tokenList.append(self.Token(self.TOK_CLASS, RegexCharacterClass(self._currentClass.getvalue()), self._pos)) self._currentClass = None return 0 self._currentClass.write(bytes([char]) if isinstance(char, int) else char)
def _state10(self, char, tokenList): # In exponent, computing start value if char in chars('-'): self._startExponent = self._exponentValue return 11 elif char in chars('}'): tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._exponentValue, self._exponentValue), self._pos)) return 0 else: try: v = intValue(char) except ValueError: raise InvalidExponentError('Invalid character "%s"' % char) self._exponentValue *= 10 self._exponentValue += v
def _parse(self, string, pos): while pos < len(string): char = string[pos] try: if self.consumer() is None: if self.ignore(char): pos += 1 continue pos = self._findMatch(string, pos) else: try: tok = self.consumer().feed(char) except SkipToken: self.setConsumer(None) else: if tok is not None: self.setConsumer(None) if tok[0] is not None: self.newToken(self.Token(*tok, self.position())) pos += 1 finally: if char in chars('\n'): self.advanceLine() else: self.advanceColumn() return pos
def _state11(self, char, tokenList): # pylint: disable=W0613 # In exponent, expecting second term of interval if char in chars('}'): raise InvalidExponentError('Missing range end') try: v = intValue(char) except ValueError: raise InvalidExponentError('Invalid character "%s"' % char) self._exponentValue = v return 12
def _state12(self, char, tokenList): # In exponent, computing end value if char in chars('}'): if self._startExponent > self._exponentValue: raise InvalidExponentError('Invalid exponent range %d-%d' % (self._startExponent, self._exponentValue)) tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._startExponent, self._exponentValue), self._pos)) return 0 try: v = intValue(char) except ValueError: raise InvalidExponentError('Invalid character "%s"' % char) self._exponentValue *= 10 self._exponentValue += v
def feed(self, char): """ Handle a single input character. When you're finished, call this with EOF as argument. """ self._input.append((char, self.position())) if char in chars('\n'): self.advanceLine() else: self.advanceColumn() while self._input: char, charPos = self._input.pop(0) for tok in self._feed(char, charPos): self.newToken(tok)
def test_bytes(self): for byte in b'*': self.assertTrue(byte in chars('*'))
def test_str(self): self.assertTrue('*' in chars('*'))
async def _asyncFeed(self, char, charPos): # pylint: disable=R0912,R0915 # Unfortunately this is copy/pasted from ProgressiveLexer._feed to add the async stuff... if char in chars('\n'): self.advanceLine() else: self.advanceColumn() if self.consumer() is not None: tok = await self.consumer().feed(char) if tok is not None: self.setConsumer(None) if tok[0] is not None: await yield_(self.Token(*tok, self.position())) return try: if char is EOF: if self._state == 0: self.restartLexer() await yield_(EOF) return self._maxPos = max(self._maxPos, max(pos[0] for regex, callback, defaultType, pos in self._currentState)) if self._maxPos == 0 and self._currentMatch: raise LexerError(self._currentMatch[0][0], *self._currentMatch[0][1]) self._matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self._currentState if pos[0] == self._maxPos]) self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] else: if self._state == 0 and self.ignore(char): return self._state = 1 newState = list() for regex, callback, defaultType, pos in self._currentState: try: if regex.feed(char): pos[0] = len(self._currentMatch) + 1 except DeadState: if pos[0]: self._matches.append((pos[0], callback)) self._maxPos = max(self._maxPos, pos[0]) else: newState.append((regex, callback, defaultType, pos)) if all([regex.isDeadEnd() for regex, callback, defaultType, pos in newState]): for regex, callback, defaultType, pos in newState: self._matches.append((len(self._currentMatch) + 1, callback)) self._maxPos = max(self._maxPos, len(self._currentMatch) + 1) newState = list() self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] self._currentState = newState self._currentMatch.append((char, self.position() if charPos is None else charPos)) if self._currentState: return if self._maxPos == 0: raise LexerError(char, *self.position()) except LexerError: self.restartLexer() raise tok = self._finalizeMatch() if tok is not None: await yield_(tok) if char is EOF: self.restartLexer() await yield_(EOF)
def __contains__(self, char): return char not in chars('\n')