def filedisplay(context): """ Display a file at a given revision. According to the mimetype, the file may be displayed as source, colored thanks to Pygments, or, for pictures and PDF, directly in the browser """ from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_for_mimetype, guess_lexer_for_filename import mimetypes if 'raw' in context['request'].GET: mimetype = ('text/plain', None) else: mimetype = mimetypes.guess_type(context['file']) if mimetype != ('text/plain', None): lexer = None if mimetype[0] is not None: try: lexer = get_lexer_for_mimetype(mimetype) except: lexer = None if lexer is None: try: lexer = guess_lexer_for_filename(context['file'], context['fctx'].data()) except: lexer = None else: # a lexer can't be guess from plain text mimetype. # we force the file to be view as text try: lexer = guess_lexer_for_filename(context['file'] + '.txt', context['fctx'].data()) except: lexer = None if lexer: formatter = HtmlFormatter(linenos=True, cssclass="source") content = highlight(context['fctx'].data(), lexer, formatter) else: lexer = None if mimetype[0] == 'image/png' or mimetype[ 0] == 'image/jpeg' or mimetype[0] == 'image/gif': content = 'image' elif mimetype[0] == 'application/pdf': content = 'pdf' else: content = None return { 'content': content, 'lexer': lexer, 'mimetype': mimetype, 'file': context['file'], 'size': context['fctx'].size(), 'name': context['repo'], 'rev': context['rev'] }
def highlight_code(path, src, div=False, **kwargs): src = decode_charset_to_unicode(src) try: if path.endswith(('.html', '.mako')): lexer = MakoHtmlLexer(encoding='utf-8') elif path.endswith('.ptl'): lexer = PythonLexer(encoding='utf-8') elif path.endswith('.md'): lexer = RstLexer(encoding='utf-8') else: if path.endswith(IGNORE_FILE_EXTS): src = 'Hmm.., this is binary file.' lexer = guess_lexer_for_filename(path, src) lexer.encoding = 'utf-8' lexer.stripnl = False except ClassNotFound: # no code highlight lexer = TextLexer(encoding='utf-8') if div: formatter = _CodeHtmlFormatter else: formatter = HtmlFormatter src = highlight(src, lexer, formatter(linenos=True, lineanchors='L', anchorlinenos=True, encoding='utf-8', **kwargs)) return src
def get_lexer_for_filename(filename, text="", **options): """Gets a lexer from a filename (usually via the filename extension). This mimics the behavior of ``pygments.lexers.get_lexer_for_filename()`` and ``pygments.lexers.guess_lexer_for_filename()``. """ if CACHE is None: load_or_build() exts = CACHE["lexers"]["exts"] fname = os.path.basename(filename) key = fname if fname in exts else os.path.splitext(fname)[1] if key in exts: modname, clsname = exts[key] mod = importlib.import_module(modname) cls = getattr(mod, clsname) lexer = cls(**options) else: # couldn't find lexer in cache, fallback to the hard way import inspect from pygments.lexers import guess_lexer_for_filename lexer = guess_lexer_for_filename(filename, text, **options) # add this filename to the cache for future use cls = type(lexer) mod = inspect.getmodule(cls) exts[fname] = (mod.__name__, cls.__name__) write_cache(cache_filename()) return lexer
def process(context): """Main routine for metrics.""" metrics = {} # import all the needed metric modules metric_modules = __import_metric_modules(context['include_metrics']) # instantiate all the desired metric classes metric_instance = __instantiate_metric(metric_modules, context) cm = ComputeMetrics(metric_instance, context) # main loop for i, in_file in enumerate(context['in_file_names']): # print 'file %i: %s' % (i, in_file) try: cm.reset() fin = open(os.path.join(context['base'], in_file), 'r') code = ''.join(fin.readlines()) fin.close() # define lexographical scanner to use for this run try: lex = guess_lexer_for_filename(in_file, code, encoding='guess') # encoding is 'guess', chardet', 'utf-8' except: pass else: token_list = lex.get_tokens(code) # parse code metrics[in_file] = {} metrics[in_file].update(cm(token_list)) metrics[in_file]['language'] = lex.name # provide language except IOError, e: sys.stderr.writelines(str(e) + " -- Skipping input file.\n\n")
def build_highlighter(lexer, formatter, template, template_file): try: import pygments from pygments.lexers import guess_lexer_for_filename, guess_lexer from pygments.formatters import HtmlFormatter # guess language on the template without {{{}}}: clean_template = re.sub('{{{[^}]*}}}', ' ', template) if lexer is None: if template_file is not None: lexer = guess_lexer_for_filename(template_file, clean_template) else: lexer = guess_lexer(clean_template) if formatter is None: formatter = HtmlFormatter( # inline formatting directives (avoid the need for a # separate style section): noclasses=True, # We'll wrap the whole thing ourselves: nowrap=True, ) def pygments_highlight(code): return pygments.highlight(code, lexer, formatter) except ImportError: # Pygments is not available, just escape the code def pygments_highlight(code): return cgi.escape(code) def H(code): h = pygments_highlight(code) if not code.endswith('\n'): h = re.sub(r'[\n\r]+$', '', h) return h return H
def guess_lang_formatter(paste_plaintext, paste_filename=None): paste_formatter = 'plaintext' # Map numpy to python because the numpy lexer gives false positives # when guessing. lexer_lang_map = {'numpy': 'python'} # If we have a filename, try guessing using the more reliable # guess_lexer_for_filename function. # If that fails, try the guess_lexer function on the code. lang = None if paste_filename: try: lang = guess_lexer_for_filename(options.filename, paste_plaintext).name.lower() except: print("No guess by filename") pass else: try: lang = guess_lexer(paste_plaintext).name.lower() except: pass if lang: if lang == 'markdown': paste_formatter = 'markdown' if lang != 'text only': paste_formatter = 'syntaxhighlighting' return paste_formatter
def run(self, args): try: cat = self.client.remote('pupyutils.basic_cmds', 'cat', False) grep = None filter_out = False if args.G: grep = args.G elif args.g: grep = args.g filter_out = True r = cat(args.path, args.N, args.n, grep, args.E, filter_out) if r: lexer = None if is_binary(r): lexer = False try: import hexdump from pygments.lexers.hexdump import HexdumpLexer result = [] for line in hexdump.dumpgen(r): if args.color: # Change to something HexdumpLexer knows result.append(line[:8] + ' ' + line[9:60] + '|' + line[60:] + '|') else: result.append(line) r = '\n'.join(result) if args.color: lexer = HexdumpLexer() except Exception, e: r = '[ BINARY FILE ]' lexer = False if args.color: if lexer is None and '*' not in args.path: try: lexer = guess_lexer_for_filename(args.path, r) except: pass if lexer is None and not args.N: try: lexer = guess_lexer(r) except: pass if lexer: r = Pygment(lexer, r) self.log(r) except Exception, e: self.error(' '.join(x for x in e.args if type(x) in (str, unicode)))
def __init__(self, name, main_display, tabsize, multiline_window=1500, number_of_windows=1): self.name = name self.file = f = open(name) try: lexer = guess_lexer_for_filename(name, f.readline()) except TypeError: try: lexer = get_lexer_by_name(os.path.splitext(name)[1][1:]) except pygments.util.ClassNotFound: lexer = TextLexer() except pygments.util.ClassNotFound: lexer = TextLexer() lexer = Python3Lexer() if isinstance(lexer, PythonLexer) else lexer lexer.add_filter(NonEmptyFilter()) lexer.add_filter('tokenmerge') f.seek(0) self.lines = [] self.focus = 0 self.clipboard = None self.clipboard_pos = None self.lexer = lexer self.w_pos = {} self.all_tokens = None self._etext = lambda w: w.edit_text self.multiline_window = multiline_window self.number_of_windows = number_of_windows self.main_display = main_display self.line_kwargs = dict(caption="", allow_tab=True, lexer=lexer, wrap='clip', main_display=main_display, smart_home=True, tabsize=tabsize)
def render(self, context): path = self.path.resolve(context) content_editable = False content_type = path.content_type if content_type in MEDIAFILES_IMAGES: rendered = render_to_string('mediafiles/wrapper/image.html', context) else: lexer = None lexer_options = {'stripnl': False, 'tabsize': 4} try: lexer = guess_lexer_for_filename(path.name, path.content, **lexer_options) except ClassNotFound: if not path.extension and not path.is_executable(): lexer = get_lexer_by_name('text', **lexer_options) if lexer is not None: formatter = HtmlFormatter(linenos='inline', lineanchors='l', nobackground=True, style=MEDIAFILES_PYGMENTS_STYLE) rendered = mark_safe(highlight(path.content, lexer, formatter)) content_editable = path.is_writeable() else: rendered = None if self.content is not None: context[self.content] = rendered context[self.content_editable] = content_editable return u'' return rendered
def show_submission_source_view(request, contest_id, submission_id): submission = get_object_or_404(ProgramSubmission, id=submission_id) if contest_id != submission.problem_instance.contest_id: raise Http404 check_submission_access(request, submission) raw_source = submission.source_file.read() filename = submission.source_file.file.name is_source_safe = True try: lexer = guess_lexer_for_filename( filename, raw_source ) formatter = HtmlFormatter(linenos=True, cssclass='syntax-highlight') formatted_source = highlight(raw_source, lexer, formatter) formatted_source_css = HtmlFormatter().get_style_defs('.syntax-highlight') except ClassNotFound: formatted_source = raw_source formatted_source_css = '' is_source_safe = False return TemplateResponse(request, 'programs/source.html', { 'source': formatted_source, 'css': formatted_source_css, 'is_source_safe': is_source_safe })
def get_highlighted(self, filename, hl_lines=None): """Get the highlighted version of a file.""" hl_lines = sorted(hl_lines or []) st = os.stat(filename) key = '%s-%d-%s-%s' % (filename, int(st.st_mtime), CACHE_SERIAL, hl_lines) key = os.path.join(self.cache_dir, hashlib.sha1(key).hexdigest() + '.html.gz') try: with gzip.open(key) as keyfile: return keyfile.read() except IOError: with open(filename) as infile: file_data = infile.read() try: lexer = lexers.guess_lexer_for_filename(filename, file_data) except pygments.util.ClassNotFound: try: lexer = lexers.guess_lexer(file_data) except pygments.util.ClassNotFound: lexer = lexers.TextLexer() highlight = pygments.highlight( file_data, lexer, formatters.HtmlFormatter( hl_lines=hl_lines, linenos='table', lineanchors='line', anchorlinenos=True)) with gzip.open(key, 'w') as keyfile: keyfile.write(highlight.encode('utf-8')) return highlight
def __guessLexer(self, text): """ Private method to guess a pygments lexer. @param text text to base guessing on (string) @return reference to the guessed lexer (pygments.lexer) """ lexer = None if self.__pygmentsName: lexerClass = find_lexer_class(self.__pygmentsName) if lexerClass is not None: lexer = lexerClass() elif text: # step 1: guess based on filename and text if self.editor is not None: fn = self.editor.getFileName() if fn: try: lexer = guess_lexer_for_filename(fn, text) except ClassNotFound: pass # step 2: guess on text only if lexer is None: try: lexer = guess_lexer(text) except ClassNotFound: pass return lexer
def _blob_detail(request, project, branch_name, git_object, path_list, branches): """Render a blob. Pretty prints using Pygments""" breadcrumbs = generate_breadcrumbs(path_list) file_name = path_list[-1]['name'] try: lexer = guess_lexer_for_filename(file_name, git_object.as_raw_string()) except: lexer = guess_lexer(git_object.as_raw_string()) formatter = HtmlFormatter(linenos=True) pretty_printed_file = highlight(git_object.as_raw_string(), lexer, formatter) return render_to_response('project/blob.html', { 'project': project, 'branch_name': branch_name, 'object': git_object, 'path': path_list, 'breadcrumbs': breadcrumbs, 'pretty_print': pretty_printed_file, 'branches': branches, }, context_instance=RequestContext(request))
def programas(request, arquivo): erros = False codigo = '' nome = 'Erro!' caminho = os.path.join(programas_dir, arquivo) if os.path.isfile(caminho): try: with open(caminho) as programa: texto = programa.read().decode('utf-8') except IOError: erros = True else: nome = os.path.basename(arquivo) #Se não conseguir adivinhar a linguagem do programa exibe texto try: lexer = guess_lexer_for_filename(arquivo, texto) except ValueError: try: lexer = guess_lexer(texto) except ValueError: lexer = TextLexer # linenos pode ser inline, table, True ou '' # replace corrige problema com {% spaceless %} codigo = highlight(texto, lexer, HtmlFormatter(linenos=False)).replace('\n', '<br>\n') else: erros = True return render_to_response('programas.html', {'erros': erros, 'nome': nome, 'codigo': codigo}, context_instance=RequestContext(request))
def __init__(self, parent, lexer=None, filename="a.txt"): super(GenericHighlighter, self).__init__(parent=parent, lexer=lexer) self._document = self.document() self._formatter = HtmlFormatter() self._lexer = guess_lexer_for_filename(filename, "") print(self._lexer) self.set_style('default')
def define_ajax(action): form = ToolForm() tool = Tool(**form.data) if action == 'show': return highlight(tool.toRDF(rdf_format='turtle'), Notation3Lexer(), HtmlFormatter()) elif action == 'download': return make_response(tool.toRDF(rdf_format='turtle'), 200, [ ('Content-Type', 'rdf/ttl'), ('Content-disposition', 'attachment; filename=%s.ttl' % tool.name) ]) elif action == 'generate': target_file = open( os.path.join(app.config['TEMPLATE_DIR'], request.form['template_name']), 'r') target = Template(target_file.read()) code = tool.toTarget(target) try: lexer = guess_lexer_for_filename(request.form['template_name'], code) except ClassNotFound: lexer = PythonLexer() # default return highlight(code, lexer, HtmlFormatter()) else: return make_response("Unknown action '%s'" % action, 401)
def handleMatch(self, match): params = match.group('params') or '' rel_include_path = match.group('path') source_dir = os.path.dirname(self.source_path) include_path = os.path.join(source_dir, rel_include_path) try: with open(include_path) as include_file: file_text = include_file.read() except IOError as e: raise IOError('Markdown file {0} tried to include file {1}, got ' '{2}'.format(self.source_path, rel_include_path, e.strerror)) include_text = choose_include_text(file_text, params, self.source_path) if not include_text: raise TaskError('Markdown file {0} tried to include file {1} but ' 'filtered out everything'.format(self.source_path, rel_include_path)) el = markdown.util.etree.Element('div') el.set('class', 'md-included-snippet') try: lexer = guess_lexer_for_filename(include_path, file_text) except ClassNotFound: # e.g., ClassNotFound: no lexer for filename u'BUILD' found if 'BUILD' in include_path: lexer = PythonLexer() else: lexer = TextLexer() # the boring plain-text lexer html_snippet = highlight(include_text, lexer, HtmlFormatter(cssclass='codehilite')) el.text = html_snippet return el
def guess_code_present(filepath): """Guess whether a file contains "code" or not. Structured text (as listed in NOT_CODE) does not count as "code", but anything else that the Pygments lexer-guesser finds a probable lexer for counts as "code" for these purposes. Parameters ---------- filepath : string Path to the file that may contain code. Returns ------- boolean True if the file contains "code" (as a best guess), False otherwise """ text = get_file_text(filepath) filename = os.path.split(filepath)[1] try: lexer = lexers.guess_lexer_for_filename(filename, text) if lexer.name not in NOT_CODE: return True else: return False except lexers.ClassNotFound: return False
def show_submission_source_view(request, contest_id, submission_id): submission = get_submission_or_404(request, contest_id, submission_id, ProgramSubmission) raw_source = submission.source_file.read() raw_source, decode_error = decode_str(raw_source) filename = submission.source_file.file.name is_source_safe = False try: lexer = guess_lexer_for_filename(filename, raw_source) formatter = HtmlFormatter(linenos=True, line_number_chars=3, cssclass="syntax-highlight") formatted_source = highlight(raw_source, lexer, formatter) formatted_source_css = HtmlFormatter().get_style_defs(".syntax-highlight") is_source_safe = True except ClassNotFound: formatted_source = raw_source formatted_source_css = "" download_url = reverse( "download_submission_source", kwargs={"contest_id": request.contest.id, "submission_id": submission_id} ) return TemplateResponse( request, "programs/source.html", { "source": formatted_source, "css": formatted_source_css, "is_source_safe": is_source_safe, "download_url": download_url, "decode_error": decode_error, }, )
def _generate_preview_html(self, data): """Return the first few truncated lines of the text file. Args: data (bytes): The contents of the attachment. Returns: django.utils.safestring.SafeText: The resulting HTML-safe thumbnail content. """ from reviewboard.diffviewer.chunk_generator import \ NoWrapperHtmlFormatter charset = self.mimetype[2].get('charset', 'ascii') try: text = data.decode(charset) except UnicodeDecodeError: logging.error( 'Could not decode text file attachment %s using ' 'charset "%s"', self.attachment.pk, charset) text = data.decode('utf-8', 'replace') try: lexer = guess_lexer_for_filename(self.attachment.filename, text) except ClassNotFound: lexer = TextLexer() lines = highlight(text, lexer, NoWrapperHtmlFormatter()).splitlines() return format_html_join( '', '<pre>{0}</pre>', ((mark_safe(line), ) for line in lines[:self.TEXT_CROP_NUM_HEIGHT]))
def pretty_print(fn, data): if not hasPygments: print(data) return lx = None # guess lexer will not guess json in priority, so we first try to decode json with json module try: data = json.dumps(json.loads(data), indent=2) lx = JsonLexer() except ValueError: pass if lx is None: try: lx = guess_lexer_for_filename(fn, data) except: pass if lx is None: try: lx = guess_lexer(data) except: pass if lx is None: lx = HexdumpLexer() data = hexdump(data, result='return') if isinstance(lx, JsonLexer): data = json.dumps(json.loads(data), indent=2) try: print(highlight(data, lx, Terminal256Formatter())) except UnicodeEncodeError: print( highlight(hexdump(data, result='return'), HexdumpLexer(), Terminal256Formatter()))
def get_highlighted_code(name, code, type="terminal"): """ If pygments are available on the system then returned output is colored. Otherwise unchanged content is returned. """ import logging try: import pygments pygments except ImportError: return code from pygments import highlight from pygments.lexers import guess_lexer_for_filename, ClassNotFound from pygments.formatters import TerminalFormatter try: lexer = guess_lexer_for_filename(name, code) formatter = TerminalFormatter() content = highlight(code, lexer, formatter) except ClassNotFound: logging.debug("Couldn't guess Lexer, will not use pygments.") content = code return content
def _apply_pygments(self, data, filename): """Apply Pygments syntax-highlighting to a file's contents. This will only apply syntax highlighting if a lexer is available and the file extension is not blacklisted. Args: data (unicode): The data to syntax highlight. filename (unicode): The name of the file. This is used to help determine a suitable lexer. Returns: list of unicode: A list of lines, all syntax-highlighted, if a lexer is found. If no lexer is available, this will return ``None``. """ if filename.endswith(self.STYLED_EXT_BLACKLIST): return None try: lexer = guess_lexer_for_filename(filename, data, stripnl=False, encoding='utf-8') except pygments.util.ClassNotFound: return None lexer.add_filter('codetagify') return split_line_endings( highlight(data, lexer, NoWrapperHtmlFormatter()))
def show_submission_source_view_unsafe(request, submission_id, source_file, download_url): raw_source, decode_error = decode_str(source_file.read()) filename = source_file.file.name is_source_safe = False try: lexer = guess_lexer_for_filename( filename, raw_source ) formatter = HtmlFormatter(linenos=True, line_number_chars=3, cssclass='syntax-highlight') formatted_source = highlight(raw_source, lexer, formatter) formatted_source_css = HtmlFormatter() \ .get_style_defs('.syntax-highlight') is_source_safe = True except ClassNotFound: formatted_source = raw_source formatted_source_css = '' return TemplateResponse(request, 'programs/source.html', { 'source': formatted_source, 'css': formatted_source_css, 'is_source_safe': is_source_safe, 'download_url': download_url, 'decode_error': decode_error, 'submission_id': submission_id })
def _highlight_syntax(self): """If we have pygments, syntax highlight the file.""" if self.syntax_format: stringified_text = \ ''.join(str(s.decode("utf-8")) for s in self.read_lines()) if self.syntax_format is not True: lexer = get_lexer_by_name(self.syntax_format) elif self.file_name: try: lexer = \ guess_lexer_for_filename( self.file_name, stringified_text) except TypeError: # XXX pygments py3 incompatibility workaround; fixed in tip lexer = guess_lexer(stringified_text) else: lexer = guess_lexer(stringified_text) highlighted_text = \ highlight( stringified_text, lexer, TerminalFormatter(bg="dark")).split('\n') line_num = 1 try: for line in highlighted_text: self._lines[ self._find_line_index(line_num)].update_highlight(line) line_num += 1 except KeyError: # our conversions to strings sometimes adds a trailing line pass
def to_html(self): """Convert file document to html""" source_dir = get_source_directory() css_path = source_dir + "printing.css" fin = open(self.path, 'r') code = fin.read() #cmd = "source-highlight -n --style-css-file {0} --tab 2 -f html -i {1}".format(css_path, self.path) #p = Popen(cmd.split(), shell=False, stdout=PIPE) file_path_name = os.path.relpath(self.path, get_current_directory()) html_string = """<h1>File: {0}</h1> <h3>Created By: {1}, Date: {2}</h3> """.format(file_path_name, self.username, str(self.modifytime)) lexer = guess_lexer_for_filename('test.py', code, stripall=True) linenos = False if self.args.linenumbers == True: linenos = 'inline' formatter = HtmlFormatter(style='bw', linenos=linenos) html_string += highlight(code, lexer, formatter) fin.close() return html_string
def process_file_metrics(context, file_processors): """Main routine for metrics.""" file_metrics = OrderedDict() # TODO make available the includes and excludes feature gitignore = [] if os.path.isfile('.gitignore'): with open('.gitignore', 'r') as ifile: gitignore = ifile.read().splitlines() in_files = glob_files(context['root_dir'], context['in_file_names'], gitignore=gitignore) # main loop for in_file, key in in_files: # print 'file %i: %s' % (i, in_file) try: with open(in_file, 'rb') as ifile: code = ifile.read() # lookup lexicographical scanner to use for this run try: lex = guess_lexer_for_filename(in_file, code, encoding='guess') # encoding is 'guess', chardet', 'utf-8' except: pass else: token_list = lex.get_tokens(code) # parse code file_metrics[key] = OrderedDict() file_metrics[key].update(compute_file_metrics(file_processors, lex.name, key, token_list)) file_metrics[key]['language'] = lex.name except IOError as e: sys.stderr.writelines(str(e) + " -- Skipping input file.\n\n") return file_metrics
def handleMatch(self, match): params = match.group('params') or '' rel_include_path = match.group('path') source_dir = os.path.dirname(self.source_path) include_path = os.path.join(source_dir, rel_include_path) try: with open(include_path) as include_file: file_text = include_file.read() except IOError as e: raise IOError('Markdown file {0} tried to include file {1}, got ' '{2}'.format(self.source_path, rel_include_path, e.strerror)) include_text = choose_include_text(file_text, params, self.source_path) if not include_text: raise TaskError('Markdown file {0} tried to include file {1} but ' 'filtered out everything'.format( self.source_path, rel_include_path)) el = markdown.util.etree.Element('div') el.set('class', 'md-included-snippet') try: lexer = guess_lexer_for_filename(include_path, file_text) except ClassNotFound: # e.g., ClassNotFound: no lexer for filename u'BUILD' found if 'BUILD' in include_path: lexer = PythonLexer() else: lexer = TextLexer() # the boring plain-text lexer html_snippet = highlight(include_text, lexer, HtmlFormatter(cssclass='codehilite')) el.text = html_snippet return el
def show_submission_source_view(request, submission_id): source_file = get_submission_source_file_or_error(request, submission_id) raw_source, decode_error = decode_str(source_file.read()) filename = source_file.file.name is_source_safe = False try: lexer = guess_lexer_for_filename(filename, raw_source) formatter = HtmlFormatter(linenos=True, line_number_chars=3, cssclass='syntax-highlight') formatted_source = highlight(raw_source, lexer, formatter) formatted_source_css = HtmlFormatter() \ .get_style_defs('.syntax-highlight') is_source_safe = True except ClassNotFound: formatted_source = raw_source formatted_source_css = '' download_url = reverse('download_submission_source', kwargs={'submission_id': submission_id}) return TemplateResponse( request, 'programs/source.html', { 'source': formatted_source, 'css': formatted_source_css, 'is_source_safe': is_source_safe, 'download_url': download_url, 'decode_error': decode_error, 'submission_id': submission_id })
def _run_cycle(self): # validate path if not os.path.exists(self.path): raise exception.PathNotFoundException(self.path) # get the list of available files file_list = [] if os.path.isdir(self.path): for dirpath, dirnames, filenames in os.walk(self.path): for f in filenames: file_list.append(os.path.join(dirpath, f)) else: file_list.append(self.path) if len(file_list) == 0: raise exception.PathNotFoundException(self.path) self.clear_screen() for path in file_list: f = open(path, 'r') try: lexer = guess_lexer_for_filename(f.name, f.readline()) except ClassNotFound: continue h = highlight(f.read(), lexer, TerminalFormatter()) self.typing_print(h) f.close() if self.cleanup_per_file: self.clear_screen() def _message_no_path(self): return 'Please specify a file or directory to colorize'
def highlight_blob(blob): try: lexer = guess_lexer_for_filename(blob.name, blob.data_stream.read()) except ClassNotFound: lexer = TextLexer() formater = HtmlFormatter(nobackground=True,linenos='table', cssclass="source") return "<style>%s</style>%s" % (formater.get_style_defs('.source'), highlight(blob.data_stream.read(), lexer, formater))
def pygmentize(self, tmpl, fctx, field): # append a <link ...> to the syntax highlighting css old_header = ''.join(tmpl('header')) if SYNTAX_CSS not in old_header: new_header = old_header + SYNTAX_CSS tmpl.cache['header'] = new_header text = fctx.data() if util.binary(text): return style = self.config("web", "pygments_style", "colorful") # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text, encoding=util._encoding) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text, encoding=util._encoding) except (ClassNotFound, ValueError): lexer = TextLexer(encoding=util._encoding) formatter = HtmlFormatter(style=style, encoding=util._encoding) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n</pre>')] colorized = colorized[colorized.find('<pre>')+5:] coloriter = iter(colorized.splitlines()) filters['colorize'] = lambda x: coloriter.next() oldl = tmpl.cache[field] newl = oldl.replace('line|escape', 'line|colorize') tmpl.cache[field] = newl
def render_highlight_code(text, path, **kwargs): try: if path.endswith(('.html', '.mako')): lexer = MakoHtmlLexer(encoding='utf-8') elif path.endswith('.ptl'): lexer = PythonLexer(encoding='utf-8') elif path.endswith('.md'): lexer = RstLexer(encoding='utf-8') else: if path.endswith(IGNORE_FILE_EXTS): text = 'Hmm.., this is binary file.' lexer = guess_lexer_for_filename(path, text) lexer.encoding = 'utf-8' lexer.stripnl = False except ClassNotFound: # no code highlight lexer = TextLexer(encoding='utf-8') formatter = CodeHtmlFormatter return highlight(text, lexer, formatter(linenos='inline', lineanchors='L', anchorlinenos=True, encoding='utf-8', **kwargs))
def _detect_lang_name(self, subject, paste_content): lexer = None if '.' in subject: if subject[-1] == ')': if ' (' in subject: name = subject.split(' (')[0] elif '(' in subject: name = subject.split('(')[0] else: name = subject else: name = subject if name.split('.')[-1] in self.recognized_extenstions: try: lexer = guess_lexer_for_filename(name, paste_content, encoding='utf-8') except (ClassNotFound, ImportError): pass if lexer is None and len(paste_content) >= 20: try: lexer = guess_lexer(paste_content, encoding='utf-8') except (ClassNotFound, ImportError): pass if lexer is None: try: lexer = get_lexer_by_name('text', encoding='utf-8') except (ClassNotFound, ImportError) as e: self.log(self.logger.WARNING, '%s: %s' % (subject, e)) return '' return lexer.aliases[0]
def get_lines(finding='', path=''): formatter = HtmlFormatter(linenos=False, cssclass="source") if (finding): APK_PATH = settings.BASE_DIR + finding.scan.apk.url DECOMPILE_PATH = os.path.splitext(APK_PATH)[0] path = DECOMPILE_PATH + finding.path lines = [] try: extension = os.path.splitext(path)[1] if (not extension == '.html' and not extension == '.js'): with open(path, encoding="utf-8") as f: for i, line in enumerate(f): try: if (i == 1): lexer = guess_lexer_for_filename(path, line) highlighted = highlight(line, lexer, formatter) lines.append(highlighted) except Exception as e: if (line): lines.append(line) except Exception as e: try: with open(path, encoding="utf-8") as f: for i, line in enumerate(f): lines.append(line) except Exception as e: logger.error(e) return lines
def filename(self, value): "Set the file being displayed by the view" if self._filename != value: self.code.delete('1.0', END) with open(value) as code: all_content = code.read() if self.lexer: lexer = self.lexer else: lexer = guess_lexer_for_filename(value, all_content, stripnl=False) for token, content in lex(all_content, lexer): self.code.insert(END, content, str(token)) # Now update the text for the linenumbers end_index = self.code.index(END) line_count = int(end_index.split('.')[0]) lineNumbers = '\n'.join('%5d' % i for i in range(1, line_count)) self.lines.config(state=NORMAL) self.lines.delete('1.0', END) self.lines.insert('1.0', lineNumbers) self.lines.config(state=DISABLED) # Store the new filename, and clear any current line self._filename = value self._line = None
def get_lexer( # The lexer itself, which will simply be returned. lexer=None, # The `short name <http://pygments.org/docs/lexers/>`_, or alias, of the # lexer to use. alias=None, # The filename of the source file to lex. filename=None, # The MIME type of the source file to lex. mimetype=None, # The code to be highlighted, used to guess a lexer. code=None, # _`options`: Specify the lexer (see `get_lexer` arguments), and provide it any other needed options. **options): # This sets the default tabsize to 4 spaces in # `Pygments' lexer <http://pygments.org/docs/api/#pygments.lexer.Lexer>`_, # and this link is a list of # `all available lexers <http://pygments.org/docs/lexers/#available-lexers>`_ options.setdefault("tabsize", 4) if lexer: return lexer if alias: return get_lexer_by_name(alias, **options) if filename: if code: return guess_lexer_for_filename(filename, code, **options) else: return get_lexer_for_filename(filename, **options) if mimetype: return get_lexer_for_mimetype(mimetype, **options) if code: return guess_lexer(code, **options)
def _generate_preview_html(self, data): """Returns the first few truncated lines of the text file.""" from reviewboard.diffviewer.chunk_generator import \ NoWrapperHtmlFormatter charset = self.mimetype[2].get('charset', 'ascii') try: text = data.decode(charset) except UnicodeDecodeError: logging.error( 'Could not decode text file attachment %s using ' 'charset "%s"', self.attachment.pk, charset) text = data.decode('utf-8', 'replace') try: lexer = guess_lexer_for_filename(self.attachment.filename, text) except ClassNotFound: lexer = TextLexer() lines = highlight(text, lexer, NoWrapperHtmlFormatter()).splitlines() return ''.join([ '<pre>%s</pre>' % line for line in lines[:self.TEXT_CROP_NUM_HEIGHT] ])
def _generate_preview_html(self, data): """Return the first few truncated lines of the text file.""" from reviewboard.diffviewer.chunk_generator import \ NoWrapperHtmlFormatter charset = self.mimetype[2].get('charset', 'ascii') try: text = data.decode(charset) except UnicodeDecodeError: logging.error('Could not decode text file attachment %s using ' 'charset "%s"', self.attachment.pk, charset) text = data.decode('utf-8', 'replace') try: lexer = guess_lexer_for_filename(self.attachment.filename, text) except ClassNotFound: lexer = TextLexer() lines = highlight(text, lexer, NoWrapperHtmlFormatter()).splitlines() return ''.join([ '<pre>%s</pre>' % line for line in lines[:self.TEXT_CROP_NUM_HEIGHT] ])
def show(name): name = name.replace("..", "", -1).replace(" ", " ", -1) file_path = os.path.join("./f", name) if not (os.path.exists(file_path) or os.path.isfile(file_path)): abort(404, u"不存在这个文件哈") content = open(file_path).read().decode("utf8") if name.endswith(".md") or name.endswith(".markdown"): html = misaka.html(content, extensions=\ misaka.EXT_AUTOLINK|misaka.EXT_LAX_HTML_BLOCKS|misaka.EXT_SPACE_HEADERS|\ misaka.EXT_SUPERSCRIPT|misaka.EXT_FENCED_CODE|misaka.EXT_NO_INTRA_EMPHASIS|\ misaka.EXT_STRIKETHROUGH|misaka.EXT_TABLES) def _r(m): try: lexer_name = m.group(1) code = m.group(2) lexer = get_lexer_by_name(lexer_name) code = HTMLParser.HTMLParser().unescape(code) return highlight(code, lexer, HtmlFormatter()) except pygments.util.ClassNotFound: return m.group() p = re.compile(r'''<pre><code class="([0-9a-zA-Z._-]+)">(.+?)</code></pre>''', re.DOTALL) html = p.sub(lambda m: _r(m), html) else: try: lexer = guess_lexer_for_filename(file_path, content) except pygments.util.ClassNotFound: lexer = get_lexer_by_name("python") html = highlight(content, lexer, HtmlFormatter()) return render_template("gfm.html", **locals())
def text_content(self): if self.size <= MAX_TEXTFILE_SIZE and not self.is_image: possible_markdown = self.extension in (MARKDOWN_FILE_EXTENSIONS + TEXTILE_FILE_EXTENSIONS) fake_extension = self.extension if not possible_markdown else u'txt' fake_filename = u'.'.join((self.filename, fake_extension,)) style = styles.get_style_by_name('friendly') formatter = formatters.HtmlFormatter(style=style) style = formatter.get_style_defs() f = urlopen(self.file_obj.cdn_url) data = f.read() f.close() try: data = data.decode('utf-8') lexer = lexers.guess_lexer_for_filename(fake_filename, data) except (ClassNotFound, UnicodeDecodeError): return None if isinstance(lexer, lexers.TextLexer) and possible_markdown: format_string = u'<div class="%s">%s</div>' if self.extension in MARKDOWN_FILE_EXTENSIONS: data = format_string % ('markdown', markdown(data)) if self.extension in TEXTILE_FILE_EXTENSIONS: data = format_string % ('textile', textile(data)) else: data = u'<style>%s</style>\n%s' % (style, highlight(data, lexer, formatter)) return data
def __init__(self, file_content, file_name=None, language=None, private=False): self.set_file_content(file_content) self.file_name = file_name self.language = language self.private = private self.pub_date = datetime.utcnow() # guess language, if needed if self.language is None: if self.file_name is None: lexer = guess_lexer(self.file_content) else: try: lexer = guess_lexer_for_filename(self.file_name, self.file_content) except: lexer = guess_lexer(self.file_content) # verify if lexer is ok for filename found = False for pattern in lexer.filenames: if fnmatch(self.file_name, pattern): found = True break else: found = True if not found: lexer = TextLexer self.language = lexer.aliases[0]
def highlightedCode(submission): data = submission.source.read() formatter = HtmlFormatter(linenos=True, noclasses=True) try: lexer = guess_lexer_for_filename(submission.source.path, data) except: lexer = TextLexer() return highlight(data, lexer, formatter)
def blob(request, repo, commit): file = request.GET.get('file', '') blob = get_blob(repo, commit, file) lexer = guess_lexer_for_filename(blob.basename, blob.data) return HttpResponse( highlight( blob.data, lexer, HtmlFormatter(cssclass="pygment_highlight", linenos='inline')))
def format_html(filename, source): """Format the source text given as HTML with Pygments.""" try: lexer = guess_lexer_for_filename(filename, source, stripnl=False) except ClassNotFound: lexer = TextLexer(stripnl=False) formatter = HtmlFormatter(nowrap=True) return highlight(source, lexer, formatter)
def format_HTML(filepath): with open(filepath, 'r') as file_to_read: data = file_to_read.read() lexer_object = guess_lexer_for_filename(filepath, data) pyg_formatter = get_formatter_by_name('html', linenos='table', style='monokai') return highlight(data, lexer_object, pyg_formatter)
def lexer(self): if self.format is None: try: return guess_lexer_for_filename(self.filename, self.content) except ClassNotFound: return TextLexer() else: return get_lexer_by_name(self.format)
def include_source(self, path): self.log('invoke_source: %s' % path) self.write('<pre>\n') with open(path) as f: code = f.read() lexer = guess_lexer_for_filename(path, code) self.write(highlight(code, lexer, HtmlFormatter(noclasses=True))) self.write('</pre>\n')
def render_pygments(filename, data): from pygments import lexers, formatters, highlight try: lexer = lexers.guess_lexer_for_filename(filename, data) except: lexer = lexers.TextLexer() formatter = formatters.HtmlFormatter(linenos=False) return highlight(data, lexer, formatter), {'language': lexer.name}
def highlight_file(style, filename): """ Hightlight a given file guessing the lexer based on the extension """ with open(filename) as f: code_txt = f.read() lexer = guess_lexer_for_filename(filename, code_txt) font_name = "Ubuntu Mono" formatter = JpgImageFormatter(font_name=font_name, style=style) return Image.open(io.BytesIO(highlight(code_txt, lexer, formatter)))
def highlight(filename, code): try: lexer = guess_lexer(code) except ClassNotFound: try: lexer = guess_lexer_for_filename(filename, code) except ClassNotFound: return code return highlight_(code, lexer, get_output_type())
def formatPygmentsCodeSnippet(data: dict, html, filepath, lineno): startline = data.get('startline', None) endline = data.get('endline', None) name = data.get('name', None) file = full_filepath(data['file'], filepath) # Get the GitHub/GitLab links for this file git_service, git_link = get_git_remote_link(file, startline, endline) # Select the lines between startline and endline filecontents, ctrstart = clip_file_contents(file, startline, endline) # Select the right lexer based on the filename and contents if 'lexer' in data: lexer = find_lexer_class_by_name(data['lexer'])() else: lex_filename = path.basename(file) if lex_filename == 'CMakeLists.txt': lex_filename += '.cmake' lexer = guess_lexer_for_filename(lex_filename, filecontents) # Select the right formatter based on the lexer cssclass = 'pygments{}'.format(lineno) if lexer.name == "Arduino" and not "style" in data: formatter = HtmlFormatter(cssclass=cssclass, style='arduino') else: style = data.get('style', VSCodeStyle) formatter = HtmlFormatter(cssclass=cssclass, style=style) # Extract the CSS from the formatter, and set the line number offset css = formatter.get_style_defs('.' + cssclass) css += '\n.pygments{} pre.snippet{} {{ counter-reset: line {}; }}' \ .format(lineno, lineno, ctrstart) # Syntax highlight the code htmlc = highlight(filecontents, lexer, formatter) # Set the right classes htmlc = htmlc.replace('<pre>', '<pre class="lineNumbers snippet{}">'.format(lineno)) htmlc = htmlc.replace('\n</pre></div>', '</pre></div>') # Construct the final HTML code datastr = '' if name is not None: datastr += '<h4 class="snippet-name">' + name + '</h4>\n' datastr += '<div class="codesnippet"><style>' + css + '</style>\n' if git_link is not None and git_service == 'github': datastr += '<a href="' + git_link + '" title="Open on GitHub">' datastr += '<img class="github-mark" src="/Images/GitHub-Mark.svg"/>' datastr += '</a>\n' if git_link is not None and git_service == 'gitlab': datastr += '<a href="' + git_link + '" title="Open on GitLab">' datastr += '<img class="gitlab-mark" src="/Images/GitLab-Mark.svg"/>' datastr += '</a>\n' datastr += htmlc + '</div>' return datastr, file
def find_language(filename): """ Find the language used in `filename`. :param filename: The name of the file. :return: The language used. """ return guess_lexer_for_filename(filename, 'Error, no file ' 'found').name