def tokenize_file(fh): """Use the Python pygments library to tokenize a C++ file""" code = fh.read() c = CppLexer() scan = [] for (index, tok, value) in c.get_tokens_unprocessed(code): scan.append((tok, value)) return scan
def get_tokens_unprocessed(self, text): for index, token, value in CppLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.EXTRA_CLASSNAMES: yield index, Name.Class, value else if token is Name and value in self.EXTRA_KEYWORDS: yield index, Keyword, value else: yield index, token, value