def try_lexer(name, filename=None): """ Try getting a pygments lexer by name. None is returned if no lexer can be found by that name, unless 'filename' is given. If 'filename' is given the lexer is guessed by file name. Ultimately returns None on failure. """ if not name: if filename_is_stdin(filename): # No lexer or file name. return None try: lexer = lexers.get_lexer_for_filename(filename) except pygments.util.ClassNotFound: return None # Retrieved by file name only. return lexer try: lexer = lexers.get_lexer_by_name(name) except pygments.util.ClassNotFound: if filename_is_stdin(filename): # No lexer found. return None try: lexer = lexers.get_lexer_for_filename(filename) except pygments.util.ClassNotFound: return None # Retrieved by falling back to file name. return lexer # Successful lexer by name. return lexer
def iscode(self): if pygments is None: return False try: get_lexer_for_filename(self.filename) return True except ClassNotFound: return False
def get_renderer(full_path): if full_path.endswith('.ipynb'): return nb_renderer else: try: get_lexer_for_filename(full_path) return pygments_renderer except ClassNotFound: return raw_renderer return raw_renderer
def pygments_lexer(filename): try: from pygments.lexers import get_lexer_for_filename from pygments.util import ClassNotFound except ImportError: return None try: return get_lexer_for_filename(filename) except ClassNotFound: if filename.lower().endswith('.recipe'): return get_lexer_for_filename('a.py') return None
def get_lexers(): '''''' import pygments.lexers as t count = 0 for item in t.get_all_lexers(): print item count += 1 try: print t.get_lexer_for_filename('this.xsd') except: print 'found' print 'NUMBER OF LEXERS: %s'%count
def is_parsable(filename): u''' ファイル名からパースできるかどうかをチェック :param str filename: ファイル名 :return: パース可能かどうか :rtype: bool ''' try: lexers.get_lexer_for_filename(filename) return True except util.ClassNotFound: return False
def render_listing(in_name, out_name): with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(f), anchorlinenos=True)) title = os.path.basename(in_name) crumbs = out_name.split(os.sep)[1:-1] + [title] # TODO: write this in human paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in range(len(crumbs[:-2]))] + ['.', '#'] context = { 'code': code, 'title': title, 'crumbs': zip(paths, crumbs), 'lang': kw['default_lang'], 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def colorize(language, title, text): """Colorize the text syntax. Guess the language of the text and colorize it. Returns a tuple containing the colorized text and the language name. """ formatter = HtmlFormatter( linenos=True, style=PygmentsStyle, noclasses=True, nobackground=True) #Try to get the lexer by name try: lexer = get_lexer_by_name(language.lower()) return highlight(text, lexer, formatter), lexer.name except LexerNotFound: pass #Try to get the lexer by filename try: lexer = get_lexer_for_filename(title.lower()) return highlight(text, lexer, formatter), lexer.name except LexerNotFound: pass #Try to guess the lexer from the text try: lexer = guess_lexer(text) if lexer.analyse_text(text) > .3: return highlight(text, lexer, formatter), lexer.name except LexerNotFound: pass #Fallback to the plain/text lexer lexer = get_lexer_by_name('text') return highlight(text, lexer, formatter), lexer.name
def set_lexer_from_filename(self, filename): """ Change the lexer based on the filename (actually only the extension is needed) :param filename: Filename or extension """ if filename.endswith("~"): filename = filename[0:len(filename) - 1] try: self._lexer = get_lexer_for_filename(filename) _logger().debug('lexer for filename (%s): %r', filename, self._lexer) except ClassNotFound: _logger().warning('failed to get lexer from filename: %s, using ' 'plain text instead...', filename) self._lexer = TextLexer() return False except ImportError: # import error while loading some pygments plugins, the editor # should not crash _logger().warning('failed to get lexer from filename: %s, using ' 'plain text instead...', filename) self._lexer = TextLexer() return False else: return True
def get_used_tokens(filename) : """ Get all the tokens that are referenced in this file """ lines = (line for line in open(filename, 'r')) lexer = lexers.get_lexer_for_filename(filename) seen = set([]) used_tokens = [] declared_tokens = set([]) token_iter = lexer.get_tokens('\n'.join(lines)) filtered_iter = (tok for tok in token_iter if tok[1].strip()) prev_iter, curr_iter, next_iter = itertools.tee(filtered_iter, 3) # we miss the first token here... whatever next(curr_iter) next(next_iter) next(next_iter) for (prev, curr, next_tok) in itertools.izip(prev_iter, curr_iter, next_iter) : if str(prev[0]) in ['Token.Operator', 'Token.Keyword.Type'] and prev[1] == '.' : continue if (next_tok[1]) == u'=>' : continue if str(curr[0]) in ['Token.Name.Class', 'Token.Keyword.Type'] and curr[1][0].isupper() : if is_declaration(prev[1]) : declared_tokens.add(curr[1]) elif curr not in seen : seen.add(curr) used_tokens.append(curr[1].split(".")[0]) return [t for t in used_tokens if t not in declared_tokens]
def get_file_lexer(filename, content): """ Try to get a lexer by file extension, guess by content if that fails. """ try: # Pygments sometimes returns a weird lexer for .txt files. if filename.lower().endswith('.txt'): lexer = lexers.get_lexer_by_name('text') debug('Lexer forced by extension: {:>20} -> {}'.format( lexer.name, filename, )) else: lexer = lexers.get_lexer_for_filename(filename) debug('Lexer chosen by file name: {:>20} -> {}'.format( lexer.name, filename, )) except ClassNotFound: try: # Guess by content. lexer = lexers.guess_lexer(content) debug('Lexer guessed by content: {:>20} -> {}'.format( lexer.name, filename, )) except ClassNotFound: # Fall back to default lexer. lexer = lexers.get_lexer_by_name(DEFAULT_LEXER) debug('Lexer set to default: {:>20} -> {}'.format( lexer.name, filename, )) return lexer
def pipp_code(context, src, code, lexer, docss): ctx = context.processor.extensionParams[(NAMESPACE, 'context')] src = Conversions.StringValue(src) if src: abs_src = ctx.abs_in_path(src) ctx.add_depends(abs_src[len(ctx.in_root):]) fname = os.path.basename(src) code = open(abs_src).read() else: fname = 'inline-code' code = Conversions.StringValue(code) lexer = Conversions.StringValue(lexer) if lexer: lexer = get_lexer_by_name(lexer) elif src: lexer = get_lexer_for_filename(fname) else: raise Exception('The lexer must be explicitly specified for inline code blocks') formatter = HtmlFormatter(cssclass="source") result = highlight(code, lexer, formatter) if Conversions.StringValue(docss) == '1': result = '<link rel="stylesheet" href="%s.css"/>' % fname + result css = open(ctx.abs_out_path(ctx.abs_in_path(fname + '.css')), 'w') css.write(formatter.get_style_defs()) css.close() return result
def render_listing(in_name, out_name, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(f), anchorlinenos=True)) title = os.path.basename(in_name) else: code = '' title = '' crumbs = utils.get_crumbs(os.path.relpath(out_name, kw['output_folder']), is_file=True) context = { 'code': code, 'title': title, 'crumbs': crumbs, 'lang': kw['default_lang'], 'folders': folders, 'files': files, 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def display(sphinx_id): db = oursql.connect(user='******', passwd='codesearch', db='codesearch') cursor = db.cursor(oursql.DictCursor) query = 'SELECT project, path, text FROM documents WHERE id = ?' cursor.execute(query, (sphinx_id,)) sourcecode = cursor.fetchone() if sourcecode is None: flask.abort(404) title = posixpath.join(sourcecode['project'], sourcecode['path']) try: lexer = get_lexer_for_filename(sourcecode['path']) except ClassNotFound: # Syntax highlighting not supported.' code = u'<pre>{}</pre>'.format(sourcecode['text']) return flask.render_template('display.html', title=title, code=code) formatter = HtmlFormatter() # Highlighting large files can be a slow operation. This is a candidate # for caching. checksum = zlib.adler32(sourcecode['text']) key = json.dumps(['HIGHLIGHT', checksum]) code = cache.get(key) if code is None: code = highlight(sourcecode['text'], lexer, formatter) cache.set(key, code) return flask.render_template('display.html', title=title, code=code)
def pcat(filename, target='ipython'): code = read_file_or_url(filename) HTML_TEMPLATE = """<style> {} </style> {} """ from pygments.lexers import get_lexer_for_filename lexer = get_lexer_for_filename(filename, stripall=True) from pygments.formatters import HtmlFormatter, TerminalFormatter from pygments import highlight try: assert(target=='ipython') from IPython.display import HTML, display from pygments.formatters import HtmlFormatter formatter = HtmlFormatter(linenos=True, cssclass="source") html_code = highlight(code, lexer, formatter) css = formatter.get_style_defs() html = HTML_TEMPLATE.format(css, html_code) htmlres = HTML(html) return htmlres except Exception as e: print(e) pass formatter = TerminalFormatter() output = highlight(code,lexer,formatter) print(output)
def __init__(self, text, syntax=None, name=None): self.text = strip_indents(text) lexer = None if name: self.name = name if syntax is not None: self.syntax = syntax.lower() elif self.name: try: lexer = get_lexer_for_filename(self.name) except ClassNotFound: pass else: self.syntax = lexer.aliases[0] if self.syntax == "markdown": self.html = markdown(self.text) else: try: lexer = lexer or get_lexer_by_name(self.syntax) except ClassNotFound: # do nothing - if html is empty then description is a raw text pass else: self.html = highlight(self.text, lexer, formater)
def _lexer_for_filename(filename): """Return a Pygments lexer suitable for a file based on its extension. Return None if one can't be determined. """ if filename.endswith('.js') or filename.endswith('.jsm'): # Use a custom lexer for js/jsm files to highlight prepocessor # directives lexer = JavascriptPreprocLexer() elif filename == 'moz.build': lexer = PythonLexer() else: try: # Lex .h files as C++ so occurrences of "class" and such get colored; # Pygments expects .H, .hxx, etc. This is okay even for uses of # keywords that would be invalid in C++, like 'int class = 3;'. # Also we can syntax highlight XUL as XML, and IDL/WebIDL as CPP lexer = get_lexer_for_filename( 'dummy.cpp' if filename.endswith( ('.h', '.idl', '.webidl', '.tcc', '.tpp')) else 'dummy.xml' if filename.endswith(('.xul', '.svg')) else filename) except ClassNotFound: return None return lexer
def test_example_files(): testdir = os.path.dirname(__file__) outdir = os.path.join(testdir, 'examplefiles', 'output') if STORE_OUTPUT and not os.path.isdir(outdir): os.makedirs(outdir) for fn in os.listdir(os.path.join(testdir, 'examplefiles')): if fn.startswith('.') or fn.endswith('#'): continue absfn = os.path.join(testdir, 'examplefiles', fn) if not os.path.isfile(absfn): continue outfn = os.path.join(outdir, fn) try: lx = get_lexer_for_filename(absfn) except ClassNotFound: if "_" not in fn: raise AssertionError('file %r has no registered extension, ' 'nor is of the form <lexer>_filename ' 'for overriding, thus no lexer found.' % fn) try: name, rest = fn.split("_", 1) lx = get_lexer_by_name(name) except ClassNotFound: raise AssertionError('no lexer found for file %r' % fn) yield check_lexer, lx, absfn, outfn
def main(fn, lexer=None): if lexer is not None: lx = get_lexer_by_name(lexer) else: try: lx = get_lexer_for_filename(os.path.basename(fn)) except ValueError: try: name, rest = fn.split('_', 1) lx = get_lexer_by_name(name) except ValueError: raise AssertionError('no lexer found for file %r' % fn) debug_lexer = False # does not work for e.g. ExtendedRegexLexers if lx.__class__.__bases__ == (RegexLexer,): lx.__class__.__bases__ = (DebuggingRegexLexer,) debug_lexer = True lno = 1 text = file(fn, 'U').read() text = text.strip('\n') + '\n' text = text.decode('latin1') tokens = [] states = [] def show_token(tok, state): reprs = map(repr, tok) print ' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], if debug_lexer: print ' ' + ' ' * (29-len(reprs[0])) + repr(state), print for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error: print 'Error parsing', fn, 'on line', lno print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':' if showall: for tok, state in zip(tokens, states): show_token(tok, state) else: for i in range(len(tokens) - num, len(tokens)): show_token(tokens[i], states[i]) print 'Error token:' l = len(repr(val)) print ' ' + repr(val), if debug_lexer and hasattr(lx, 'statestack'): print ' ' * (60-l) + repr(lx.statestack), print print return 1 tokens.append((type,val)) if debug_lexer: if hasattr(lx, 'statestack'): states.append(lx.statestack[:]) else: states.append(None) if showall: for tok, state in zip(tokens, states): show_token(tok, state) return 0
def get_sphinx_data(sphinx_id): query = Search(indexes=['sourcecode'], config=BaseSearchConfig) query = query.filter(id__eq=sphinx_id) results = query.ask() if len(results['result']['items']) == 0: flask.abort(404) filename = results['result']['items'][0]['path'] if not os.path.isfile(filename): return filename, 'File not found. Please resphinx_id.' code = '' with open(filename) as f: code = f.read() try: # This is the line that throws the exception. lexer = get_lexer_for_filename(filename) formatter = HtmlFormatter(noclasses=True) result = highlight(code, lexer, formatter) except ClassNotFound: # Syntax highlighting not supported.' result = '<pre>{}</pre>'.format(code) url = flask.url_for('display', sphinx_id=sphinx_id) return {'body': result, 'path': filename, 'url': url}
def refreshSource(self, process = None): (self.height, self.width) = self.win.getmaxyx() if process is not None: loc = process.GetSelectedThread().GetSelectedFrame().GetLineEntry() f = loc.GetFileSpec() self.pc_line = loc.GetLine() if not f.IsValid(): self.win.addstr(0, 0, "Invalid source file") return self.filename = f.GetFilename() path = os.path.join(f.GetDirectory(), self.filename) self.setTitle(path) self.content = self.getContent(path) self.updateViewline() if self.filename is None: return if self.formatter is not None: from pygments.lexers import get_lexer_for_filename self.lexer = get_lexer_for_filename(self.filename) bps = [] if not self.filename in self.breakpoints else self.breakpoints[self.filename] self.win.erase() if self.content: self.formatContent(self.content, self.pc_line, bps)
def tokenize(self): """ Tokenizes the input file. Yields (tokentype, val) pairs, where val is a string. The concatenation of all val strings is equal to the input file's content. """ # contains all namespaces and other '{' tokens self.stack = [] # current line number self.lineno = 1 # we're using the pygments lexer (mainly because that was the first # google hit for 'python c++ lexer', and it's fairly awesome to use) lexer = get_lexer_for_filename('.cpp') with open(self.filename) as infile: code = infile.read() for token, val in lexer.get_tokens(code): # ignore whitespaces yield token, val self.lineno += val.count('\n')
def highlight_file(self, linenos=True, style='default'): """ Highlight the input file, and return HTML as a string. """ try: lexer = lexers.get_lexer_for_filename(self.input_file) except pygments.util.ClassNotFound: # Try guessing the lexer (file type) later. lexer = None try: formatter = formatters.HtmlFormatter( linenos=linenos, style=style, full=True) except pygments.util.ClassNotFound: logging.error("\nInvalid style name: {}\nExpecting one of:\n \ {}".format(style, "\n ".join(sorted(styles.STYLE_MAP)))) sys.exit(1) try: with open(self.input_file, "r") as f: content = f.read() try: lexer = lexer or lexers.guess_lexer(content) except pygments.util.ClassNotFound: # No lexer could be guessed. lexer = lexers.get_lexer_by_name("text") except EnvironmentError as exread: fmt = "\nUnable to read file: {}\n{}" logging.error(fmt.format(self.input_file, exread)) sys.exit(2) return pygments.highlight(content, lexer, formatter)
def doc(path): """Gathers the documentation """ file_ = open(path, 'r') content = file_.read() file_.close() try: lexer = get_lexer_for_filename(path, stripall=True) except: lexer = TextLexer(stripall=True) if path.endswith('.md'): return markdown.markdown( bleach.clean(content), extensions=['markdown.extensions.nl2br', 'markdown.extensions.toc'] ) elif path.endswith('.less'): lexer = ScssLexer(stripall=True) formatter = HtmlFormatter( linenos=True, cssclass='codehilight', noclobber_cssfile=True, title=path[path.rfind(os.sep)+1:] ) return "<div class='table-responsive codehilight'>"+highlight(content, lexer, formatter)+"</div>"
def __init__(self, raw, request, **kw): self.raw = raw self.req = request if "format_args" in kw: # called from a {{{ }}} block try: self.lexer = get_lexer_by_name(kw['format_args'].strip()) except ClassNotFound: self.lexer = textlexer return if "filename" in kw: # called for an attachment filename = kw['filename'] else: # called for an attachment by an older moin # HACK: find out the filename by peeking into the execution # frame which might not always work try: frame = sys._getframe(1) filename = frame.f_locals['filename'] except: filename = 'x.txt' try: self.lexer = get_lexer_for_filename(filename) except ClassNotFound: self.lexer = textlexer
def get_line_types(repo, repo_uri, rev, path): """Returns an array, where each item means a line of code. Each item is labled 'code', 'comment' or 'empty'""" #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path)) uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path file_content = _get_file_content(repo, uri, rev) # get file_content if file_content is None or file_content == '': printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.") line_types = None else: try: lexer = get_lexer_for_filename(path) except ClassNotFound: try: printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".") lexer = guess_lexer(file_content) except ClassNotFound: printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.") lexer = TextLexer() if isinstance(lexer, NemerleLexer): # this lexer is broken and yield an unstoppable process # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop lexer = TextLexer() # Not shure if this should be skipped, when the language uses off-side rules (e.g. python, # see http://en.wikipedia.org/wiki/Off-side_rule for list) stripped_code = _strip_lines(file_content) lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code)) line_types_str = _comment_empty_or_code(lexer_output) line_types = line_types_str.split("\n") return line_types
def _get_lexer(self, filename): try: lexer = get_lexer_for_filename(filename) except ClassNotFound: raise return lexer
def highlightCode(code, fileName): htmlFormatter = HtmlFormatter() try: lexer = get_lexer_for_filename(fileName) except ClassNotFound: lexer = TextLexer() return highlight(code, lexer, htmlFormatter)
def code(title): """ Return syntax highlighted LaTeX. """ filename = title.split(' ')[1] # open the code file relative from the yml file path f = open(os.path.join(os.path.dirname(os.path.abspath(source_file)), filename)) out = "\n\\begin{frame}[fragile,t]" out += "\n\t\\frametitle{Code: \"%s\"}" % filename try: from pygments import highlight from pygments.lexers import get_lexer_for_filename, get_lexer_by_name from pygments.formatters import LatexFormatter try: lexer = get_lexer_for_filename(filename) except: lexer = get_lexer_by_name('text') out += "\n%s\n" % highlight(f.read(), lexer, LatexFormatter(linenos=True)) except ImportError: out += "\n\t\\begin{lstlisting}\n" out += f.read() out += "\n\t\end{lstlisting}" f.close() out += "\n\end{frame}" return out
def process_text_to_dict(self, input_text): composer = Composer() builder = idiopidae.parser.parse('Document', input_text + "\n\0") ext = self.artifact.input_ext name = "input_text%s" % ext # List any file extensions which don't map neatly to lexers. if ext == '.pycon': lexer = PythonConsoleLexer() elif ext == '.rbcon': lexer = RubyConsoleLexer() elif ext in ('.json', '.dexy'): lexer = JavascriptLexer() else: lexer = get_lexer_for_filename(name) formatter = get_formatter_for_filename(self.artifact.filename(), lineanchors='l') output_dict = OrderedDict() for i, s in enumerate(builder.sections): lines = builder.statements[i]['lines'] formatted_lines = composer.format(lines, lexer, formatter) output_dict[s] = formatted_lines return output_dict
def highlight(text, extension=None, formatter=HtmlFormatter, **formatter_options): try: lexer = get_lexer_for_filename('foo.{}'.format(extension)) except ClassNotFound: lexer = find_best_lexer(text) return pygment_highlight(text, lexer, formatter(**formatter_options))
def get_lexer_from_filename(name): try: lexer = get_lexer_for_filename(name) return lexer.name except ClassNotFound: console.print( 'WARNING: Could not determine correct lexer for this file!', style='yellow') return ''
def highlight_and_paginate_content(file_name, content): lexer = None try: lexer = get_lexer_for_filename(file_name) except: lexer = RawTokenLexer() if get_terminal_color_support() == 256: paginate(highlight(content, lexer, Terminal256Formatter())) else: paginate(highlight(content, lexer, TerminalFormatter()))
def __init__(self, filename): """ init by getting a lexer for file name If none exist set lexer to dummy which will be caught in parse """ try: self.lexer = get_lexer_for_filename(filename) except: self.lexer = None
def guess_lexer_for_filename(filename): from pygments.lexers import get_lexer_for_filename from pygments.util import ClassNotFound try: lexer = get_lexer_for_filename(filename) except ClassNotFound: from pygments.lexers.special import TextLexer lexer = TextLexer() return lexer
def _get_lexer(self): if self.language: from pygments.lexers import get_lexer_by_name return get_lexer_by_name(self.language) elif self.filename: from pygments.lexers import get_lexer_for_filename return get_lexer_for_filename(self.filename) else: from pygments.lexers import guess_lexer return guess_lexer(self.data)
def print_lines(console_printer, file_dict, sourcerange): """ Prints the lines between the current and the result line. If needed they will be shortened. :param console_printer: Object to print messages on the console. :param file_dict: A dictionary containing all files as values with filenames as key. :param sourcerange: The SourceRange object referring to the related lines to print. """ no_color = not console_printer.print_colored for i in range(sourcerange.start.line, sourcerange.end.line + 1): # Print affected file's line number in the sidebar. console_printer.print(format_lines(lines='', line_nr=i, symbol='['), color=FILE_LINES_COLOR, end='') line = file_dict[sourcerange.file][i - 1].rstrip('\n') try: lexer = get_lexer_for_filename(sourcerange.file) except ClassNotFound: lexer = TextLexer() lexer.add_filter( VisibleWhitespaceFilter(spaces=True, tabs=True, tabsize=SpacingHelper.DEFAULT_TAB_WIDTH)) # highlight() combines lexer and formatter to output a ``str`` # object. printed_chars = 0 if i == sourcerange.start.line and sourcerange.start.column: console_printer.print(highlight_text( no_color, line[:sourcerange.start.column - 1], BackgroundMessageStyle, lexer), end='') printed_chars = sourcerange.start.column - 1 if i == sourcerange.end.line and sourcerange.end.column: console_printer.print(highlight_text( no_color, line[printed_chars:sourcerange.end.column - 1], BackgroundSourceRangeStyle, lexer), end='') console_printer.print(highlight_text( no_color, line[sourcerange.end.column - 1:], BackgroundSourceRangeStyle, lexer), end='') console_printer.print('') else: console_printer.print(highlight_text(no_color, line[printed_chars:], BackgroundMessageStyle, lexer), end='') console_printer.print('')
def render_listing(in_name, out_name, input_folder, output_folder, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight( fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name, force=True), anchorlinenos=True)) # the pygments highlighter uses <div class="codehilite"><pre> # for code. We switch it to reST's <pre class="code">. code = CODERE.sub('<pre class="code literal-block">\\1</pre>', code) title = os.path.basename(in_name) else: code = '' title = os.path.split(os.path.dirname(out_name))[1] crumbs = utils.get_crumbs(os.path.relpath( out_name, self.kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.join( input_folder, os.path.relpath( out_name[:-5], # remove '.html' os.path.join(self.kw['output_folder'], output_folder)))) if self.site.config['COPY_SOURCES']: source_link = permalink[:-5] # remove '.html' else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': self.kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, 'source_link': source_link, } self.site.render_template('listing.tmpl', out_name, context)
def highlight_url(request): """Retrieves a generated source file and syntax highlights it Some stack frames are functions that are generated during the build process. Thus the stack frame itself isn't particularly helpful since the generated source file isn't available anywhere. Bug 1389217 and friends adjust the build process to capture the generated source and push it to S3. This view takes a URL for the generated source, retrieves it from S3, runs it through syntax highlighting, and returns that as an HTML page. NOTE(willkg): The output of pygments has CSS in the page, but no JS. """ url = request.GET.get("url") if not url: return http.HttpResponseBadRequest("No url specified.") parsed = urlparse(url) # We will only pull urls from allowed hosts if parsed.netloc not in ALLOWED_SOURCE_HOSTS: return http.HttpResponseForbidden("Document at disallowed host.") if parsed.scheme not in ALLOWED_SCHEMES: return http.HttpResponseForbidden("Document at disallowed scheme.") resp = requests.get(url) if resp.status_code != 200: return http.HttpResponseNotFound("Document at URL does not exist.") filename = parsed.path.split("/")[-1] if filename.endswith(".h"): # Pygments will default to C which we don't want, so override it here. lexer = CppLexer() else: lexer = get_lexer_for_filename(filename) lines = [] if request.GET.get("line"): try: lines = [int(request.GET.get("line"))] except ValueError: pass formatter = HtmlFormatter(full=True, title=parsed.path, linenos="table", lineanchors="L", hl_lines=lines) return http.HttpResponse(highlight(resp.text, lexer, formatter), content_type="text/html")
def preview_cheat(cheat): filepath = CHEAT_DIRECTORY+cheat with open(filepath, "r") as f: file_content = f.read() try: lexer = lexers.get_lexer_for_filename(filepath, stripnl=False, stripall=False) except ClassNotFound: lexer = lexers.get_lexer_by_name("text", stripnl=False, stripall=False) formatter = formatters.TerminalFormatter(bg="dark") # dark or light highlighted_file_content = highlight(file_content, lexer, formatter) return highlighted_file_content
def highlight_data(code: str, filename: str, aliases: Optional[Dict[str, str]] = None) -> Optional[str]: if aliases: base, ext = os.path.splitext(filename) alias = aliases.get(ext[1:]) if alias is not None: filename = base + '.' + alias try: lexer = get_lexer_for_filename(filename, stripnl=False) except ClassNotFound: return None return cast(str, highlight(code, lexer, formatter))
def prettyprint(path): try: lexer = get_lexer_for_filename(path) except ClassNotFound: lexer = _custom_guess(path) content = open(path).read() formatter = HtmlFormatter() return IPython.display.HTML('<style type="text/css">{}</style>{}'.format( formatter.get_style_defs('.highlight'), highlight(content, lexer, formatter)))
def snippet_api(request): content = request.POST.get('content', '').strip() lexer = request.POST.get('lexer', LEXER_DEFAULT).strip() filename = request.POST.get('filename', '').strip() expires = request.POST.get('expires', '').strip() format = request.POST.get('format', 'default').strip() if not content: return HttpResponseBadRequest('No content given') # We need at least a lexer or a filename if not lexer and not filename: return HttpResponseBadRequest('No lexer or filename given. Unable to ' 'determine a highlight. Valid lexers are: %s' % ', '.join(LEXER_KEYS)) # A lexer is given, check if its valid at all if lexer and lexer not in LEXER_KEYS: return HttpResponseBadRequest('Invalid lexer "%s" given. Valid lexers are: %s' % ( lexer, ', '.join(LEXER_KEYS))) # No lexer is given, but we have a filename, try to get the lexer out of it. # In case Pygments cannot determine the lexer of the filename, we fallback # to 'plain' code. if not lexer and filename: try: lexer_cls = get_lexer_for_filename(filename) lexer = lexer_cls.aliases[0] except (ClassNotFound, IndexError): lexer = PLAIN_CODE if expires: expire_options = [str(i) for i in dict(EXPIRE_CHOICES).keys()] if not expires in expire_options: return HttpResponseBadRequest('Invalid expire choice "{}" given. ' 'Valid values are: {}'.format(expires, ', '.join(expire_options))) expires, expire_type = get_expire_values(expires) else: expires = datetime.datetime.now() + datetime.timedelta(seconds=60 * 60 * 24 * 30) expire_type = Snippet.EXPIRE_TIME s = Snippet.objects.create( content=content, lexer=lexer, expires=expires, expire_type=expire_type, ) s.save() if not format in FORMAT_MAPPING: response = _format_default(s) else: response = FORMAT_MAPPING[format](s) return HttpResponse(response)
def getcode(request): solution = Solution.objects.get(id=request.POST['solution']) solution_path = '{}/{}/{}'.format(MEDIA_ROOT, solution.user.login, solution.file.name[:solution.file.name.find('.')] + '_{}'.format(request.POST['solution']) + solution.file.name[solution.file.name.find('.'):]) with open(solution_path) as file: solution_code = file.read() lexer_for_code = get_lexer_for_filename(solution.file.name) return HttpResponse(highlight(solution_code, lexer_for_code, HtmlFormatter()))
def get_lexer_for_file(filename): ext = os.path.splitext(filename)[1] try: lexer = lexers.get_lexer_for_filename(filename) except lexers.ClassNotFound: if ext == '.kv': lexer = KivyLexer() else: lexer = lexers.TextLexer() # print('found {} for {}'.format(lexer, filename)) return lexer
def _apply_pygments(self, data, filename): """Applies Pygments syntax-highlighting to a file's contents. The resulting HTML will be returned as a list of lines. """ lexer = get_lexer_for_filename(filename, stripnl=False, encoding='utf-8') lexer.add_filter('codetagify') return highlight(data, lexer, NoWrapperHtmlFormatter()).splitlines()
def html_src(self): if not self.src: return '' try: return highlight( self.src, get_lexer_for_filename(self.file.name.replace('.s', '.asm')), HtmlFormatter()) except: return '<pre>' + self.src + '</pre>'
def update_all(self): """ Colorize all text in the widget. """ lexer = None try: lexer = get_lexer_for_filename(self.area.filename, '') except Exception: return self.tag_tokens(lexer, '1.0', 'end')
def test_render(self): code = '#!/usr/bin/env python\n'\ 'print "Hello, world!"' formatter = utils.LineAnchorCodeHtmlFormatter(cssclass='codehilite', linenos='inline') lexer = get_lexer_for_filename("some.py", encoding='chardet') hl_code = highlight(code, lexer, formatter) assert '<div class="codehilite">' in hl_code assert '<div id="l1" class="code_block">' in hl_code assert_in('<span class="lineno">1 </span>', hl_code)
def update_cache(self): try: if self.language == "" and self.filename is not None: lexer = get_lexer_for_filename(self.filename, stripall=True) else: lexer = get_lexer_by_name(self.language, stripall=True) except pygments.util.ClassNotFound: lexer = get_lexer_by_name("text") self.text_cache = highlight(self.text, lexer, HtmlFormatter(linenos=True))
def format(self): try: self.lexer = get_lexer_for_filename(self.file) except Exception as e: if self.language !=None: self.lexer = get_lexer_by_name(self.language) else: raise Exception("no suitable lexer found") formatter = TerminalFormatter() return highlight(self._read(),self.lexer,formatter)
def highlighter_for_file(self, filename): if self.formatter is None: # pragma: no cover return NullHighlighting.highlighter_for_file(filename) import pygments from pygments.lexers import get_lexer_for_filename from jinja2 import Markup try: lexer = get_lexer_for_filename(filename, None, stripnl=False) return lambda code: [Markup(line.rstrip()) for line in pygments.highlight(code, lexer, self.formatter).split("\n")] except pygments.util.ClassNotFound: # pragma: no cover return NullHighlighting.highlighter_for_file(filename)
def highlight_data(code, filename, aliases=None): if aliases: base, ext = os.path.splitext(filename) alias = aliases.get(ext[1:]) if alias is not None: filename = base + '.' + alias try: lexer = get_lexer_for_filename(filename, stripnl=False) except ClassNotFound: pass else: return highlight(code, lexer, formatter)
def test_example_files(): global STATS STATS = {} outdir = os.path.join(TESTDIR, 'examplefiles', 'output') if STORE_OUTPUT and not os.path.isdir(outdir): os.makedirs(outdir) for fn in os.listdir(os.path.join(TESTDIR, 'examplefiles')): if fn.startswith('.') or fn.endswith('#'): continue absfn = os.path.join(TESTDIR, 'examplefiles', fn) if not os.path.isfile(absfn): continue extension = os.getenv('TEST_EXT') if extension and not absfn.endswith(extension): continue print(absfn) with open(absfn, 'rb') as f: code = f.read() try: code = code.decode('utf-8') except UnicodeError: code = code.decode('latin1') lx = None if '_' in fn: try: lx = get_lexer_by_name(fn.split('_')[0]) except ClassNotFound: pass if lx is None: try: lx = get_lexer_for_filename(absfn, code=code) except ClassNotFound: raise AssertionError('file %r has no registered extension, ' 'nor is of the form <lexer>_filename ' 'for overriding, thus no lexer found.' % fn) yield check_lexer, lx, fn N = 7 stats = list(STATS.items()) stats.sort(key=lambda x: x[1][1]) print('\nExample files that took longest absolute time:') for fn, t in stats[-N:]: print('%-30s %6d chars %8.2f ms %7.3f ms/char' % ((fn,) + t)) print() stats.sort(key=lambda x: x[1][2]) print('\nExample files that took longest relative time:') for fn, t in stats[-N:]: print('%-30s %6d chars %8.2f ms %7.3f ms/char' % ((fn,) + t))
def get_language_for(filename, mimetype=None, default='text'): """Get language for filename and mimetype""" try: if mimetype is None: raise ClassNotFound() lexer = get_lexer_for_mimetype(mimetype) except ClassNotFound: try: lexer = get_lexer_for_filename(filename) except ClassNotFound: return default return get_known_alias(lexer, default)
def generate(inpath, outpath=None, style='autumn'): outpath = outpath or inpath.rsplit('.')[0] + '.png' with open(inpath) as infile: with open(outpath, 'w') as outfile: lexer = get_lexer_for_filename(inpath) formatter = ImageFormatter(style=style, line_numbers=False, font_size=42, font_name='dejavu sans mono') highlight(infile.read(), lexer, formatter, outfile) resize(outpath) return outpath
def __getLexerByFileName(self, params_dict): from pygments.lexers import ClassNotFound from pygments.lexers import get_lexer_for_filename fname = getFileName(params_dict[FILE_PARAM_NAME]) try: lexer = get_lexer_for_filename(fname, stripall=True) except ClassNotFound: lexer = self.__getDefaultLexer() return lexer
def _sample_source(self, parts): """ Return the HTML rendering of a source file """ bn = parts[-1] if not bn.startswith('source--') or not bn.endswith('.html'): # The link isn't of the form that this plugin generated, and # therefore cannot refer to a valid source file. raise TrackError("Path doesn't refer to source file: %s" % path) repos = self.env.get_repository() try: sourcefn = parts[-1].replace('-', '/')[8:-5] suffix = os.path.splitext(sourcefn)[-1] if suffix not in gSourceSuffix: # File is not one of the types of sources we want to deal # with. raise TrackError("Path doesn't refer to source file: %s" % path) fullpath = '/'.join(parts[1:-1] + [sourcefn]) path = '/trunk/pyobjc/' + parts[0] + '/Examples/' + fullpath node = repos.get_node(path) if node is None: raise TrackError("Couldn't find repository path: %s" % path) src = node.get_content().read() sources = [] rootnode = repos.get_node(os.path.dirname(path)) self._gather_sources(sources, rootnode, base=None) sources.sort(key=lambda x: x[-1]) sources = [(a, b, (((b == parts[-1]) or None) and 'selected')) for a, b in sources] lexer = get_lexer_for_filename(sourcefn) formatter = HtmlFormatter(linenoes=False, cssclass='source') body = highlight(src, lexer, formatter) style = formatter.get_style_defs() return dict( filename=sourcefn, zipname=os.path.basename(parts[-2]) + '.zip', body=body, style=style, sources=sources, ) finally: repos.close()
def filter_code(code, filename, language=None): """Tokenize and filter a code document. Replace variable names with V, function names with F, object names with O, and strings with S. Return the filtered document and a list of offsets indicating how many characters were removed by filtering at each index in the resulting document where filtering occured (this is used later to highlight the original code using plagiarism detection results on the filtered code) """ try: if language is not None: lexer = lexers.get_lexer_by_name(language) else: lexer = lexers.get_lexer_for_filename(filename) tokens = lexer.get_tokens(code) except pygments.util.ClassNotFound: logging.warning(f"{filename} not tokenized: unknown file extension") return code, np.array([]) if lexer == pygments.lexers.TextLexer: logging.warning(f"did not tokenize plaintext file {filename}") return code, np.array([]) out_code = "" offset = 0 offsets = [[0,0]] for t in tokens: if t[0] == token.Name: out_code += "V" offsets.append([len(out_code) - 1, offset]) offset += len(t[1]) - 1 elif t[0] in token.Name.Function: out_code += "F" offsets.append([len(out_code) - 1, offset]) offset += len(t[1]) - 1 elif t[0] in token.Name.Class: out_code += "O" offsets.append([len(out_code) - 1, len(t[1]) - 1]) offset += len(t[1]) - 1 elif t[0] == token.Text or t[0] in token.Comment: offsets.append([len(out_code) - 1, offset]) offset += len(t[1]) elif t[0] in token.Literal.String: if t[1] == "'" or t[1] == '"': out_code += '"' else: out_code += "S" offsets.append([len(out_code) - 1, offset]) offset += len(t[1]) - 1 else: out_code += t[1] return out_code, np.array(offsets)
def preprocessFile(path, basePath, retainLine): """ Perform preprocessing on the lexer. Parameters: ----------- path basePath retainLine - do we keep the original line numbers or not Returns: ----------- (curProject - The current project or corpora we are in curFile - The corresponding original file path lexedWoComments - the Pygments token list with preprocessing OR (Not yet implemented) Something for English?, language - the language of this lexer fileErrorCount - count of observed error tokens from Pygments) """ if (True): #TODO is a programming language. components = path.split(".") fileContents = "" fileContents = ''.join(open(path, 'r').readlines()) lexer = get_lexer_for_filename(path) tokens = lex(fileContents, lexer) # returns a generator of tuples tokensList = list(tokens) language = languageForLexer(lexer) (curProject, curFile) = getProjectAndFilename(path, basePath) #Debug: what does the original token set look like #print(tokensList) #quit() if (retainLine): lexedWoComments = reduceToNewLine(tokensList, Token.Comment) lexedWoComments = reduceToNewLine(lexedWoComments, Token.Literal.String.Doc) else: # Strip comments and alter strings lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment) lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Literal.String.Doc) beforeError = len(lexedWoComments) #Remove Things than didn't lex properly lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Error) fileErrorCount = beforeError - len(lexedWoComments) #Alter the pygments lexer types to be more comparable between our #languages lexedWoComments = fixTypes(lexedWoComments, language) lexedWoComments = convertNamespaceTokens(lexedWoComments, language) return (curProject, curFile, lexedWoComments, language, fileErrorCount)