def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) # print '\nTEXT > \n', text, '\n TEXT' for line in text.splitlines(): lstrip = line.lstrip() if lstrip.startswith('Out'): line = lstrip + '\n' elif lstrip.startswith('...'): line = line + '\n' else: line = line + '\n' input_prompt = self.input_prompt.match(line) output_prompt = self.output_prompt.match(line) if input_prompt is not None: yield (0, Generic.Prompt, input_prompt.group()) code = line[input_prompt.end():] for item in pylexer.get_tokens_unprocessed(code): yield item elif output_prompt is not None: # Use the 'error' token for output. We should probably make # our own token, but error is typicaly in a bright color like # red, so it works fine for our output prompts. yield (0, Generic.Error, output_prompt.group()) index = output_prompt.end() yield index, Generic.Output, line[index:] else: yield 0, Generic.Output, line
def highlight_code(codebox): lexer = PythonLexer() cursor = codebox.createTextCursor() style = styles.get_style_by_name('default') cursor.gotoStart(False) for tok_type, tok_value in lexer.get_tokens(codebox.String): cursor.goRight(len(tok_value), True) # selects the token's text cursor.CharColor = to_rgbint(style.style_for_token(tok_type)['color']) cursor.goRight(0, False) # deselects the selected text
def get_tokens_unprocessed(self, text): for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.lpy_modules: # Colourize previously detected modules yield index, Keyword, value else: yield index, token, value
def get_tokens_unprocessed(self, text): for index, token, value in \ PythonLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.EXTRA_KEYWORDS: yield index, Keyword.Pseudo, value else: yield index, token, value
def get_tokens_unprocessed(self, text): offset = 0 if re.search(r'^----\s*$', text, re.MULTILINE): py, _, text = text.partition('----') lexer = PythonLexer(**self.options) for i, token, value in lexer.get_tokens_unprocessed(py): yield i, token, value offset = i + 1 yield offset, Text, u'----' offset += 1 lexer = HtmlDjangoLexer(**self.options) for i, token, value in lexer.get_tokens_unprocessed(text): yield offset + i, token, value
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif line.startswith("<warning>"): insertions.append((len(curcode), [(0, Generic.Error, line[9:])])) elif input_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: # Use the 'error' token for output. We should probably make # our own token, but error is typicaly in a bright color like # red, so it works fine for our output prompts. insertions.append((len(curcode), [(0, Generic.Error, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def setLexerFromFilename(self, filename): """ Change the lexer based on the filename (actually only the extension is needed) :param filename: Filename or extension """ try: self._lexer = get_lexer_for_filename(filename) except ClassNotFound: self._lexer = PythonLexer()
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif input_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: insertions.append((len(curcode), [(0, Generic.Output, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): if value.startswith("###"): continue if token == Token.Error and value == "$": yield index, Token.Keyword, value elif token in [ Name, Operator.Word ] and value in KEYWORDS: yield index, Token.Keyword, value elif token in Name and value in PROPERTIES: yield index, Name.Attribute, value else: yield index, token, value
def get_tokens_unprocessed(self, text): for index, token, value in PythonLexer.get_tokens_unprocessed( self, text): if value.startswith("###"): continue if token == Token.Error and value == "$": yield index, Token.Keyword, value elif token in [Name, Operator.Word] and value in KEYWORDS: yield index, Token.Keyword, value elif token in Name and value in PROPERTIES: yield index, Name.Attribute, value else: yield index, token, value
def setup(app): from sphinx.highlighting import lexers from pygments.lexers.compiled import CppLexer lexers['cpp'] = CppLexer(tabsize=3) lexers['c++'] = CppLexer(tabsize=3) from pygments.lexers.agile import PythonLexer lexers['python'] = PythonLexer(tabsize=3) app.connect('source-read', fixSrc) app.connect('autodoc-skip-member', customExclude) app.connect('autodoc-process-signature', fixSignature) app.connect('autodoc-process-docstring', fixDocstring) app.add_description_unit('ystaticattr', None, objname='static attribute', indextemplate='pair: %s; static method', parse_node=parse_ystaticattr)
def main(): """ Función principal del script. """ if len(sys.argv) != 2: print 'Usage: {0} <input-file>'.format(os.path.basename(sys.argv[0])) else: input_file = os.path.abspath(sys.argv[1]) if input_file.endswith('.py'): lexer = PythonLexer() elif input_file.endswith('.c') or input_file.endswith('.h'): lexer = CLexer() elif input_file.endswith('.tiger') or input_file.endswith('.tig'): lexer = TigerLexer() else: print 'Error: Invalid input file. Only C, Python and Tiger programs accepted.' sys.exit() dot_index = -len(input_file) + input_file.rfind('.') output_file = '%s.tex' % input_file[:dot_index] with codecs.open(input_file, encoding='utf-8') as input: with codecs.open(output_file, mode='w', encoding='utf-8') as output: highlight(input.read(), lexer, LatexFormatter(), output)
def find_offending_line(mod, clsname, state, idx, pos): """ Returns a tuple of (lineno, charpos_start, charpos_end, line_content) """ mod_text = get_module_text(mod) # skip as far as possible using regular expressions, then start lexing. for m in R_CLASSEXTRACT.finditer(mod_text): #print "Got class", m.group(1), '...' + repr(m.group(0)[-40:]) if m.group(1) == clsname: break else: return None #raise ValueError("Can't find class %r" % (clsname,)) def match_brace(brace): target = close_braces[open_braces.index(brace)] #print "matching brace", brace, target, it for x in it: #print x if x[-2] in Punctuation and x[-1] == target: #print " found" return True elif x[-2] in Punctuation and x[-1] in open_braces: # begin again! if not match_brace(x[-1]): #print " fail2" return False #print " inner fail" return False def until(t, match_braces=False): #print "until", t for x in it: if t == x[-1]: #print " found", repr(x[-1]) return elif match_braces and x[-1] in open_braces: match_brace(x[-1]) level = 0 tuple_idx = 0 string_pos = 0 def amal(i): si = None pt = None pd = None for idx, tok, data in i: if tok in String: tok = String if tok == pt == String: pd += data else: if pt: yield si, pt, pd pt = tok pd = data si = idx if pt: yield si, pt, pd def filt(i): line = 1 + mod_text[:m.start()].count('\n') col = 0 for _, b, c in i: #print "got", b, repr(c) yield line, col, b, c line += c.count('\n') if '\n' in c: col = len(c) - c.rindex('\n') - 1 else: col += len(c) it = filt(amal(PythonLexer().get_tokens_unprocessed(m.group(0)))) for x, y, ttyp, text in it: #print "Loop", level, ttyp, repr(text) if level == 0 and ttyp is Name: if text == 'tokens': until('=') until('{') level = 1 elif level == 1 and ttyp in String: #print "key", text key = eval(text, {}, {}) # next is expected to be the colon. it.next() # next is either a left brace, or maybe a function call t = '' try: while not t.strip(): _, _, _, t = it.next() except StopIteration: return None # t is now the first token of the value, either '[' or 'SomeFunc' if key != state: if t == '[': match_brace('[') else: until(',', match_braces=True) else: level = 2 if t != '[': return None elif level == 2: if text == '(': # open a tuple level = 3 elif text == ')': level = 1 # honestly this should be able to just return #print "too late", idx, tuple_idx return elif level == 3: #print " idx", tuple_idx if text == ')': level = 2 tuple_idx += 1 elif text == '(': match_brace('(') elif tuple_idx == idx and ttyp in String: # this might be it! s = eval(text, {}, {}) #print "maybe", string_pos, pos, (string_pos+len(s)) if string_pos <= pos < (string_pos + len(s)): # need to point in here (dx, d1, d2) = find_substr_pos(text, pos - string_pos) if dx == 0: d1 += y d2 += y return (x + dx, d1, d2, mod_text.splitlines()[x + dx - 1]) else: string_pos += len(s) elif tuple_idx == idx and ttyp in Name: # If they're concatenating strings with vars, ignore. break
class PythonDocumentationFilter(Filter): ALIASES = ["pydoc"] INPUT_EXTENSIONS = [".txt"] OUTPUT_EXTENSIONS = ['.json'] COMPOSER = Composer() OUTPUT_DATA_TYPE = 'keyvalue' LEXER = PythonLexer() LATEX_FORMATTER = LatexFormatter() HTML_FORMATTER = HtmlFormatter(lineanchors="pydoc") def fetch_item_content(self, key, item): is_method = inspect.ismethod(item) is_function = inspect.isfunction(item) if is_method or is_function: # Get source code try: source = inspect.getsource(item) except IOError as e: source = "" # Process any idiopidae tags builder = idiopidae.parser.parse('Document', source + "\n\0") sections = {} for i, s in enumerate(builder.sections): lines = builder.statements[i]['lines'] sections[s] = "\n".join(l[1] for l in builder.statements[i]['lines']) if isinstance(sections, dict): if len(sections.keys()) > 1 or sections.keys()[0] != '1': for section_name, section_content in sections.iteritems(): self.add_source_for_key("%s:%s" % (key, section_name), section_content) else: self.add_source_for_key(key, sections['1']) else: self.add_source_for_key(key, sections) self.artifact.output_data.append("%s:doc" % key, inspect.getdoc(item)) self.artifact.output_data.append("%s:comments" % key, inspect.getcomments(item)) else: # not a function or a method try: # If this can be JSON-serialized, leave it alone... json.dumps(item) self.add_source_for_key(key, item) except TypeError: # ... if it can't, convert it to a string to avoid problems. self.add_source_for_key(key, str(item)) def highlight_html(self, source): return highlight(source, self.LEXER, self.HTML_FORMATTER) def highlight_latex(self, source): return highlight(source, self.LEXER, self.LATEX_FORMATTER) def add_source_for_key(self, key, source): """ Appends source code + syntax highlighted source code to persistent store. """ self.artifact.output_data.append("%s:value" % key, source) if not (type(source) == str or type(source) == unicode): source = inspect.getsource(source) self.artifact.output_data.append("%s:source" % key, source) self.artifact.output_data.append("%s:html-source" % key, self.highlight_html(source)) self.artifact.output_data.append("%s:latex-source" % key, self.highlight_latex(source)) def process_members(self, package_name, mod): """ Process all members of the package or module passed. """ name = mod.__name__ for k, m in inspect.getmembers(mod): self.log.debug("in %s processing element %s" % (mod.__name__, k)) if not inspect.isclass(m) and hasattr(m, '__module__') and m.__module__ and m.__module__.startswith(package_name): key = "%s.%s" % (m.__module__, k) self.fetch_item_content(key, m) elif inspect.isclass(m) and m.__module__.startswith(package_name): key = "%s.%s" % (mod.__name__, k) try: item_content = inspect.getsource(m) self.artifact.output_data.append("%s:doc" % key, inspect.getdoc(m)) self.artifact.output_data.append("%s:comments" % key, inspect.getcomments(m)) self.add_source_for_key(key, item_content) except IOError: self.log.debug("can't get source for %s" % key) self.add_source_for_key(key, "") try: for ck, cm in inspect.getmembers(m): key = "%s.%s.%s" % (name, k, ck) self.fetch_item_content(key, cm) except AttributeError: pass else: key = "%s.%s" % (name, k) self.fetch_item_content(key, m) def process_module(self, package_name, name): try: self.log.debug("Trying to import %s" % name) __import__(name) mod = sys.modules[name] self.log.debug("Success importing %s" % name) try: module_source = inspect.getsource(mod) json.dumps(module_source) self.add_source_for_key(name, inspect.getsource(mod)) except (UnicodeDecodeError, IOError, TypeError): self.log.debug("Unable to load module source for %s" % name) self.process_members(package_name, mod) except (ImportError, TypeError) as e: self.log.debug(e) def process(self): """ input_text should be a list of installed python libraries to document. """ package_names = self.artifact.input_data.as_text().split() packages = [__import__(package_name) for package_name in package_names] for package in packages: self.log.debug("processing package %s" % package) package_name = package.__name__ prefix = package.__name__ + "." self.process_members(package_name, package) if hasattr(package, '__path__'): for module_loader, name, ispkg in pkgutil.walk_packages(package.__path__, prefix=prefix): self.log.debug("in package %s processing module %s" % (package_name, name)) if not name.endswith("__main__"): self.process_module(package_name, name) else: self.process_module(package.__name__, package.__name__) self.artifact.output_data.save()
from bs4 import BeautifulSoup from pygments import highlight from pygments.formatters.html import HtmlFormatter from pygments.lexers.agile import PythonLexer import dexy.plugin import dexy.datas import dexy.data import inspect import json import os import shutil py_lexer = PythonLexer() fm = HtmlFormatter(lineanchors="l", anchorlinenos=True, linenos='table') datas_info = {} for alias, info in dexy.data.Data.plugins.iteritems(): data_class, class_args = info source = inspect.getsource(data_class) html_source = highlight(source, py_lexer, fm) members = inspect.getmembers(data_class) member_info = {} for member_name, member in members: member_info[member_name] = {} if hasattr(member, 'im_func'): code = member.im_func.func_code
def prettify_params(args): args = pprint.pformat(args) return indent(highlight(args, PythonLexer(), Terminal256Formatter(style='native'))).rstrip()
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) for pos, type_, value in pylexer.get_tokens_unprocessed(text): if type_ == Token.Error and value == '$': type_ = Comment.Preproc yield pos, type_, value
def get_tokens_unprocessed(self, text): for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): if token is Name and value in self._extra_commands: yield index, Name.Builtin, value else: yield index, token, value
def inspect_order(args, debug, only_file=False, excludes=[], distinguish_from_import=False): argument = sort_by_type(args) if not argument.local_packages and not only_file: raise ValueError('At least 1 local package name required.') filters = [] if excludes: filters.append(Exclude(excludes)) files = list_all_argument(argument, filters=filters) errored = False for filename in files: with open(filename, encoding='utf-8') as file_: if IGNORE_RE.search('\n'.join(file_.readline() for _ in range(3))): continue file_.seek(0) tree = ast.parse(file_.read(), filename) import_names = list(list_import_names(tree)) canonical_order = sort_import_names(import_names, argument.local_packages, distinguish_from_import) prev_import = None for actual, expected in zip(import_names, canonical_order): if actual[0] != expected[0]: errored = True code_offset = min(expected[1], actual[1]) code_end = max(expected[1], actual[1]) print('\x1b[35m{0}\x1b[39m:{1}-{2}:'.format( filename, code_offset, code_end), end=' ', file=sys.stderr) if prev_import is None: # first print(expected[0], 'must be the first, not', actual[0], file=sys.stderr) else: print(expected[0], 'must be above than', actual[0], file=sys.stderr) lineno_cols = len(str(code_end)) format_line = (u'{0:' + str(lineno_cols) + '} {1}').format with open(filename, 'rb') as file_: highlighted = highlight(file_.read(), PythonLexer(), Terminal256Formatter()) sliced = highlighted.splitlines()[code_offset - 1:code_end] codelisting = '\n'.join( format_line(i + code_offset, line) for i, line in enumerate(sliced)) print(codelisting, file=sys.stderr) if debug: print('\x1b[32;49;1mExpected order:\x1b[39;49;00m', debug_import_names(canonical_order, argument.local_packages, expected), file=sys.stderr) print('\x1b[31;49;1mActual order:\x1b[39;49;00m ', debug_import_names(import_names, argument.local_packages, actual), file=sys.stderr) print(file=sys.stderr) break prev_import = actual if errored: raise SystemExit(1)
class QPygmentsHighlighter(QSyntaxHighlighter): """ Syntax highlighter that uses Pygments for parsing. """ hilighlightingBlock = Signal(unicode, QSyntaxHighlighter) #--------------------------------------------------------------------------- # 'QSyntaxHighlighter' interface #--------------------------------------------------------------------------- def __init__(self, parent, lexer=None): super(QPygmentsHighlighter, self).__init__(parent) self._document = QtGui.QTextDocument() self._formatter = HtmlFormatter(nowrap=True) self._lexer = lexer if lexer else PythonLexer() self.style = styles.getStyle("Default").pygmentsStyle self.enabled = True def setLexerFromFilename(self, filename): """ Change the lexer based on the filename (actually only the extension is needed) :param filename: Filename or extension """ try: self._lexer = get_lexer_for_filename(filename) except ClassNotFound: self._lexer = PythonLexer() def highlightBlock(self, text): """ Highlight a block of text """ if self.enabled is False: return text = unicode(text) original_text = text prev_data = self.currentBlock().previous().userData() if prev_data is not None: self._lexer._saved_state_stack = prev_data.syntax_stack elif hasattr(self._lexer, '_saved_state_stack'): del self._lexer._saved_state_stack # Lex the text using Pygments index = 0 for token, text in self._lexer.get_tokens(text): length = len(text) self.setFormat(index, length, self._get_format(token)) index += length if hasattr(self._lexer, '_saved_state_stack'): data = PygmentsBlockUserData( syntax_stack=self._lexer._saved_state_stack) self.currentBlock().setUserData(data) # Clean up for the next go-round. del self._lexer._saved_state_stack #Spaces expression = QRegExp('\s+') index = expression.indexIn(original_text, 0) while index >= 0: index = expression.pos(0) length = len(expression.cap(0)) self.setFormat(index, length, self._get_format(Whitespace)) index = expression.indexIn(original_text, index + length) self.hilighlightingBlock.emit(original_text, self) # expression = QRegExp('\s+') # index = expression.indexIn(original_text, 0) # while index >= 0: # index = expression.pos(0) # length = len(expression.cap(0)) # self.setFormat(index, length, self._get_format(Whitespace)) # index = expression.indexIn(original_text, index + length) #--------------------------------------------------------------------------- # 'PygmentsHighlighter' interface #--------------------------------------------------------------------------- def __set_style(self, style): """ Sets the style to the specified Pygments style. """ if (isinstance(style, str) or isinstance(style, unicode)): style = get_style_by_name(style) self._style = style self._clear_caches() def set_style_sheet(self, stylesheet): """ Sets a CSS stylesheet. The classes in the stylesheet should correspond to those generated by: pygmentize -S <style> -f html Note that 'set_style' and 'set_style_sheet' completely override each other, i.e. they cannot be used in conjunction. """ self._document.setDefaultStyleSheet(stylesheet) self._style = None self._clear_caches() def __get_style(self): return self._style #: gets/sets the **pygments** style. style = property(__get_style, __set_style) #--------------------------------------------------------------------------- # Protected interface #--------------------------------------------------------------------------- def _clear_caches(self): """ Clear caches for brushes and formats. """ self._brushes = {} self._formats = {} def _get_format(self, token): """ Returns a QTextCharFormat for token or None. """ if token in self._formats: return self._formats[token] if self._style is None: result = self._get_format_from_document(token, self._document) else: result = self._get_format_from_style(token, self._style) self._formats[token] = result return result def _get_format_from_document(self, token, document): """ Returns a QTextCharFormat for token by """ code, html = next(self._formatter._format_lines([(token, 'dummy')])) self._document.setHtml(html) return QtGui.QTextCursor(self._document).charFormat() def _get_format_from_style(self, token, style): """ Returns a QTextCharFormat for token by reading a Pygments style. """ result = QtGui.QTextCharFormat() for key, value in list(style.style_for_token(token).items()): if value: if key == 'color': result.setForeground(self._get_brush(value)) elif key == 'bgcolor': result.setBackground(self._get_brush(value)) elif key == 'bold': result.setFontWeight(QtGui.QFont.Bold) elif key == 'italic': result.setFontItalic(True) elif key == 'underline': result.setUnderlineStyle( QtGui.QTextCharFormat.SingleUnderline) elif key == 'sans': result.setFontStyleHint(QtGui.QFont.SansSerif) elif key == 'roman': result.setFontStyleHint(QtGui.QFont.Times) elif key == 'mono': result.setFontStyleHint(QtGui.QFont.TypeWriter) return result def _get_brush(self, color): """ Returns a brush for the color. """ result = self._brushes.get(color) if result is None: qcolor = self._get_color(color) result = QtGui.QBrush(qcolor) self._brushes[color] = result return result def _get_color(self, color): """ Returns a QColor built from a Pygments color string. """ color = unicode(color).replace("#", "") qcolor = QtGui.QColor() qcolor.setRgb(int(color[:2], base=16), int(color[2:4], base=16), int(color[4:6], base=16)) return qcolor
from pygments import highlight from pygments.lexers.agile import PythonLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter from pygments.styles import emacs from pygments.filters import NameHighlightFilter from pygments.token import Other from talkstyle import TalkStyle fltr = NameHighlightFilter(names=['video_cap'], tokentype=Other) from asciipoint import * slide = open('webcam.py').read() lex = PythonLexer() lex.add_filter(fltr) slide = highlight(slide, lex, Terminal256Formatter(style=TalkStyle) #TerminalFormatter(bg='dark') ) hl = [(('(import ecto)', inv),), ((r'(from ecto_opencv.*)', inv),), ((r'(plasm =.*)', inv),), ((r'(VideoCapture)', inv),), ((r'(highgui\.VideoCapture)', inv),), ((r'(imshow)', inv),), ]