def __call__(self, tokenStream): tokens = PrettyPrintingList() for token in tokenStream: if token.__class__ not in self._ignore: tokens.append(token) return (tokens, [])
def __call__(self, source): # This approach is more memory intensive but so much simpler to get right. characterStream = PrettyPrintingList() while True: # Be sure to get the position before reading the character. position = source.tell() char = source.read(1) if not char: break else: characterStream.append((char, position)) characterStream.append((EOFSymbol, source.tell())) return (characterStream, [])
def __call__(self, charStream): tokenStream = PrettyPrintingList() issues = [] # This offset can be done away with and instead iterate over the stream. offset = 0 while offset < len(charStream): (numCharsMatched, tokenType) = self._longestMatch(charStream[offset:]) if numCharsMatched: if tokenType != None: # XXX should this be an ASSERT tokenType? startIndex = charStream[offset][1] try: endIndex = charStream[offset + numCharsMatched][1] matched = charStream[offset : offset + numCharsMatched] except IndexError: # It the file doesn't end with a newline. endIndex = charStream[offset + numCharsMatched -1][1] matched = charStream[offset : offset + numCharsMatched -1] # XXX this is kind of ugly. Only certain token types need to # hold on the the original character information for their # values. In some cases this even involved re-scanning (eg: # numeric literals). lexeme = "".join([item[0] for item in matched]) nextTok = tokenType(startIndex, endIndex, lexeme) tokenStream.append(nextTok) offset += numCharsMatched else: # XXX Lots more smarts here in error recovery. tokenStream = None issues.append(UnexpectedSymbolError(charStream[offset][1],\ charStream[offset][1])) break # XXX jsut hack in the insertion of BOF and EOF for now if tokenStream != None: tokenStream.insert(0, token.BOF()) tokenStream.append(token.EOF()) return (tokenStream, issues)