def setup(app): from sphinx.highlighting import lexers from pygments.lexers.compiled import CppLexer lexers['cpp'] = CppLexer(tabsize=3) lexers['c++'] = CppLexer(tabsize=3) from pygments.lexers.agile import PythonLexer lexers['python'] = PythonLexer(tabsize=3) app.connect('source-read', fixSrc) app.connect('autodoc-skip-member', customExclude) app.connect('autodoc-process-signature', fixSignature) app.connect('autodoc-process-docstring', fixDocstring) import sphinx.versioning if (sphinx.version_info[0] == 1): # In newer sphinx version add_description_unit was deprecated. Keep for backcompatability app.add_description_unit('ystaticattr', None, objname='static attribute', indextemplate='pair: %s; static method', parse_node=parse_ystaticattr) else: app.add_object_type('ystaticattr', None, objname='static attribute', indextemplate='pair: %s; static method', parse_node=parse_ystaticattr)
def tokenize_file(fh): """Use the Python pygments library to tokenize a C++ file""" code = fh.read() c = CppLexer() scan = [] for (index, tok, value) in c.get_tokens_unprocessed(code): scan.append((tok, value)) return scan
def setup(app): from sphinx.highlighting import lexers from pygments.lexers.compiled import CppLexer lexers['cpp'] = CppLexer(tabsize=3) lexers['c++'] = CppLexer(tabsize=3) from pygments.lexers.agile import PythonLexer lexers['python'] = PythonLexer(tabsize=3) app.connect('source-read',fixSrc) app.connect('autodoc-skip-member',customExclude) app.connect('autodoc-process-signature',fixSignature) app.connect('autodoc-process-docstring',fixDocstring) app.add_description_unit('ystaticattr',None,objname='static attribute',indextemplate='pair: %s; static method',parse_node=parse_ystaticattr)
def get_tokens_unprocessed(self, text): for index, token, value in CppLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.EXTRA_CLASSNAMES: yield index, Name.Class, value else if token is Name and value in self.EXTRA_KEYWORDS: yield index, Keyword, value else: yield index, token, value
class ProgrammingLexer(object): """lexes a string with multiple programming lexers and returns tokens""" lexers = { 'actionscript': ActionScript3Lexer(), 'c': CLexer(), 'cpp': CppLexer(), 'cs': CSharpLexer(), 'java': JavaLexer(), 'javascript': JavascriptLexer(), 'perl': PerlLexer(), 'php': PhpLexer(startinline=True), 'python': PythonLexer(), 'ruby': RubyLexer(), 'vb': VbNetLexer(), } matched_languages = [] data = None def __init__(self, matched_langs, data_string): self.matched_languages = matched_langs self.data = data_string def lex(self): """ For every possible matched language, we run a lexer to see if we can eliminate it as a possible match. If we detect errors, or have no lexer matches, we remove it from the list. :return: the list of lexer results :rtype: list """ results = {} threads = [] # Looping through each matched language that has a lexer for lexer_id, lexer in \ [[lexid, lxr] for lexid, lxr in self.lexers.items() if lexid in self.matched_languages]: # Creating a thread for each lexer thread = ProgrammingLexerThread(lexer_id, lexer, self.data) thread.start() threads.append(thread) for thr in threads: thr.join() for thr in [th for th in threads if th.result]: results[thr.thread_id] = thr.result return results
def test_lexer_on_cpp_class(): code = textwrap.dedent(''' #include <iostream> using namespace std; class Rectangle { int width, height; public: void set_values (int,int); int area() {return width*height;} }; ''') result = [ (Token.Comment.Preproc, '#'), (Token.Comment.Preproc, 'include'), (Token.Text, ' '), (Token.Comment.PreprocFile, '<iostream>'), (Token.Comment.Preproc, '\n'), (Token.Keyword, 'using'), (Token.Text, ' '), (Token.Keyword, 'namespace'), (Token.Text, ' '), (Token.Name, 'std'), (Token.Punctuation, ';'), (Token.Text, '\n'), (Token.Text, '\n'), (Token.Keyword, 'class'), (Token.Text, ' '), (Token.Name.Class, 'Rectangle'), # <-- (Token.Text, ' '), (Token.Punctuation, '{'), (Token.Text, '\n'), (Token.Text, ' '), (Token.Keyword.Type, 'int'), (Token.Text, ' '), (Token.Name, 'width'), (Token.Punctuation, ','), (Token.Text, ' '), (Token.Name, 'height'), (Token.Punctuation, ';'), (Token.Text, '\n'), (Token.Text, ' '), (Token.Keyword, 'public'), (Token.Operator, ':'), (Token.Text, '\n'), (Token.Text, ' '), (Token.Keyword.Type, 'void'), (Token.Text, ' '), (Token.Name, 'set_values'), (Token.Text, ' '), (Token.Punctuation, '('), (Token.Keyword.Type, 'int'), (Token.Punctuation, ','), (Token.Keyword.Type, 'int'), (Token.Punctuation, ')'), (Token.Punctuation, ';'), (Token.Text, '\n'), (Token.Text, ' '), (Token.Keyword.Type, 'int'), (Token.Text, ' '), (Token.Name.Function, 'area'), # <-- (Token.Punctuation, '('), (Token.Punctuation, ')'), (Token.Text, ' '), (Token.Punctuation, '{'), (Token.Keyword, 'return'), (Token.Text, ' '), (Token.Name, 'width'), (Token.Operator, '*'), (Token.Name, 'height'), (Token.Punctuation, ';'), (Token.Punctuation, '}'), (Token.Text, '\n'), (Token.Punctuation, '}'), (Token.Punctuation, ';'), (Token.Text, '\n') ] lex = CppLexer() tokenList = lex.get_tokens(code) # print(list(tokenList)) assert list(tokenList) == result