def __guessLexer( self ): """ Guesses a pygments lexer """ self.__lexerGuessed = True if self.__pygmentsName: lexerClass = find_lexer_class( self.__pygmentsName ) if lexerClass is not None: self.__language = "Guessed: " + lexerClass.name return lexerClass() else: # Unfortunately, guessing a lexer by text lead to unpredictable # behaviour in some cases. E.g. national non-english characters # are mis-displayed or even core dump is generated. So the part # of guessing by text has been removed. if self.editor is not None: if self.__fileName != "": filename = self.__fileName else: filename = self.editor.getFileName() try: lexerClass = get_lexer_for_filename( filename ) self.__language = "Guessed: " + lexerClass.name return lexerClass except ClassNotFound: pass # Last resort - text only lexerClass = find_lexer_class( "Text only" ) if lexerClass is not None: self.__language = "Guessed: " + lexerClass.name return lexerClass() return None
def serialize(self, highlight_code=False): """ Convert the codelet into a dictionary that can be sent as JSON. :param highlight_code: Whether to return code as pygments-highlighted HTML or as plain source. :type highlight_code: bool :return: The codelet as a dictionary. :rtype: str """ lang = LANGS[self.language] code = self.code if highlight_code: lexer = find_lexer_class(lang)() or get_lexer_by_name("text") symbols = reduce(concat, self.symbols.values(), []) lines = reduce(concat, [[loc[0] for loc in sym[1] + sym[2]] for sym in symbols], []) formatter = HtmlFormatter(linenos=True, hl_lines=lines) code = highlight(code, lexer, formatter) return { "name": self.name, "code": code, "lang": lang, "authors": self.authors, "url": self.url, "created": self.date_created.isoformat(), "modified": self.date_modified.isoformat(), "origin": self.origin }
def test_get_lexers(): # test that the lexers functions work def verify(func, args): x = func(opt='val', *args) assert isinstance(x, lexers.PythonLexer) assert x.options["opt"] == "val" for func, args in [(lexers.get_lexer_by_name, ("python",)), (lexers.get_lexer_for_filename, ("test.py",)), (lexers.get_lexer_for_mimetype, ("text/x-python",)), (lexers.guess_lexer, ("#!/usr/bin/python -O\nprint",)), (lexers.guess_lexer_for_filename, ("a.py", "<%= @foo %>")) ]: yield verify, func, args for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.items(): assert cls == lexers.find_lexer_class(lname).__name__ for alias in aliases: assert cls == lexers.get_lexer_by_name(alias).__class__.__name__ for mimetype in mimetypes: assert cls == lexers.get_lexer_for_mimetype(mimetype).__class__.__name__ try: lexers.get_lexer_by_name(None) except ClassNotFound: pass else: raise Exception
def __guessLexer(self, text): """ Private method to guess a pygments lexer. @param text text to base guessing on (string) @return reference to the guessed lexer (pygments.lexer) """ lexer = None if self.__pygmentsName: lexerClass = find_lexer_class(self.__pygmentsName) if lexerClass is not None: lexer = lexerClass() elif text: # step 1: guess based on filename and text if self.editor is not None: fn = self.editor.getFileName() if fn: try: lexer = guess_lexer_for_filename(fn, text) except ClassNotFound: pass # step 2: guess on text only if lexer is None: try: lexer = guess_lexer(text) except ClassNotFound: pass return lexer
def lexer(self): """ Internal: Get the lexer of the blob. Returns a Lexer. """ return self.language.lexer if self.language else lexers.find_lexer_class('Text only')
def view(paste_id): model = Paste.query.get_or_404(paste_id) output = request.args.get('output', None) if output == 'raw': resp = make_response(model.content, 200) resp.headers['Content-Type'] = 'text/plain; charset=utf-8' return resp user = getUserObject() form = CommentForm(request.form, csrf_enabled=False) if request.method == 'POST': if user: if form.validate(): content, users = get_usernames_from_comment(form.content.data) comment = PasteComment(user.id, model.id, content) model.comment_num = model.comment_num + 1 if user.id != model.user_id: if user not in model.followers: model.followers.append(user) db.session.add(comment) mt = MessageTemplate.query.filter_by(used_for='new_comment').first() if mt: if model.followers: for to_user in model.followers: if to_user.id != user.id: message = Message(user.id, to_user.id, mt.title % model.title, mt.content % (model.title, url_for('pasteview.view', paste_id=paste_id))) db.session.add(message) mt = MessageTemplate.query.filter_by(used_for='new_comment_has_user').first() if mt: if users: for to_user in users: if to_user.id != user.id: message = Message(user.id, to_user.id, mt.title, mt.content % url_for('pasteview.view', paste_id=paste_id)) db.session.add(message) return redirect(url_for('pasteview.view', paste_id=paste_id)) updateViewTimes(model, paste_id) lexer = find_lexer_class(model.syntax.name) formatter = HtmlFormatter(linenos='table', cssclass="source") g.code = highlight(model.content, lexer(stripall=True), formatter) g.model = model g.user = user g.form = form g.top_users = User.query.order_by('-paste_num')[:PAGE_SIZE] g.top_tags = Tag.query.order_by('-times')[:PAGE_SIZE] g.syntax_theme = request.args.get('css', app.config.get('DEFAULT_SYNTAX_CSS_FILE')) g.css_file = "/static/css/themes/%s.css" % g.syntax_theme #g.syntax_themes = SyntaxTheme.get_all_syntax_themes() return render('pasteview/view.html')
def __init__(self, attributes={}): # name is required if "name" not in attributes.keys(): raise KeyError("missing name") self.name = attributes["name"] # Set type self.type = attributes.get("type") if self.type and self.type not in self.TYPES: raise ValueError("invalid type: %s" % self.type) self.color = attributes["color"] # Set aliases aliases = attributes.get("aliases") or [] self.aliases = [self.default_alias_name] + aliases # Lookup Lexer object lexer = attributes.get("lexer") or self.name self.lexer = lexers.find_lexer_class(lexer) if not self.lexer: raise TypeError("%s is missing lexer" % self.name) self.ace_mode = attributes["ace_mode"] self.wrap = attributes.get("wrap") or False # Set legacy search term self.search_term = attributes.get("search_term") or self.default_alias_name # Set extensions or default to []. self.extensions = attributes.get("extensions") or [] self.filenames = attributes.get("filenames") or [] self.primary_extension = attributes.get("primary_extension") if not self.primary_extension: raise KeyError("%s is missing primary extension" % self.name) # Prepend primary extension unless its already included if self.primary_extension not in self.extensions: self.extensions = [self.primary_extension] + self.extensions # Set popular, and searchable flags self.popular = attributes["popular"] if attributes.has_key("popular") else False self.searchable = attributes["searchable"] if attributes.has_key("searchable") else True # If group name is set, save the name so we can lazy load it later group_name = attributes.get("group_name") if group_name: self._group = None self.group_name = group_name else: self._group = self
def __init__(self, attributes={}): # name is required if 'name' not in attributes: raise KeyError('missing name') self.name = attributes['name'] # Set type self.type = attributes.get('type') if self.type and self.type not in self.TYPES: raise ValueError('invalid type: %s' % self.type) self.color = attributes['color'] # Set aliases aliases = attributes.get('aliases', []) self.aliases = [self.default_alias_name] + aliases # Lookup Lexer object lexer = attributes.get('lexer') or self.name self.lexer = lexers.find_lexer_class(lexer) if not self.lexer: raise TypeError('%s is missing lexer' % self.name) self.ace_mode = attributes['ace_mode'] self.wrap = attributes.get('wrap') or False # Set legacy search term self.search_term = attributes.get('search_term') or self.default_alias_name # Set extensions or default to []. self.extensions = attributes.get('extensions', []) self.filenames = attributes.get('filenames', []) self.primary_extension = attributes.get('primary_extension') if not self.primary_extension: raise KeyError('%s is missing primary extension' % self.name) # Prepend primary extension unless its already included if self.primary_extension not in self.extensions: self.extensions = [self.primary_extension] + self.extensions # Set popular, and searchable flags self.popular = attributes.get('popular', False) self.searchable = attributes.get('searchable', True) # If group name is set, save the name so we can lazy load it later group_name = attributes.get('group_name') if group_name: self._group = None self.group_name = group_name else: self._group = self
def get_pygments_lexer(name): name = name.lower() if name == 'ipython2': from IPython.lib.lexers import IPythonLexer return IPythonLexer elif name == 'ipython3': from IPython.lib.lexers import IPython3Lexer return IPython3Lexer else: for module_name, cls_name, aliases, _, _ in LEXERS.values(): if name in aliases: return find_lexer_class(cls_name) warn("No lexer found for language %r. Treating as plain text." % name) from pygments.lexers.special import TextLexer return TextLexer
def _print_help(what, name): try: if what == 'lexer': cls = find_lexer_class(name) print "Help on the %s lexer:" % cls.name print dedent(cls.__doc__) elif what == 'formatter': cls = find_formatter_class(name) print "Help on the %s formatter:" % cls.name print dedent(cls.__doc__) elif what == 'filter': cls = find_filter_class(name) print "Help on the %s filter:" % name print dedent(cls.__doc__) except AttributeError: print >>sys.stderr, "%s not found!" % what
def _print_help(what, name): try: if what == "lexer": cls = find_lexer_class(name) print("Help on the %s lexer:" % cls.name) print(dedent(cls.__doc__)) elif what == "formatter": cls = find_formatter_class(name) print("Help on the %s formatter:" % cls.name) print(dedent(cls.__doc__)) elif what == "filter": cls = find_filter_class(name) print("Help on the %s filter:" % name) print(dedent(cls.__doc__)) except AttributeError: print("%s not found!" % what, file=sys.stderr)
def prepare(self): super(Pygmentize, self).prepare() lexer = None # Try very hard to get any lexer if self.filename: try: lexer = get_lexer_for_filename(self.filename, self.source or None, **self.lexer_args) except ClassNotFound: pass if self.lexer_name: lexer = find_lexer_class(self.lexer_name) if lexer: lexer = lexer(**self.lexer_args) try: lexer = get_lexer_by_name(self.lexer_name, **self.lexer_args) except ClassNotFound: pass try: lexer = get_lexer_for_mimetype(self.lexer_name, **self.lexer_args) except ClassNotFound: pass if not lexer: # Fallback, so that we at least have line numbering lexer = get_lexer_by_name('text', **self.lexer_args) formatter_args = dict(self.formatter_args) if self.name: for k in 'lineanchors', 'linespans': if k in formatter_args: formatter_args[k] = self.name + '_' + formatter_args[k] formatter_args['style'] = self.style formatter_args['noclasses'] = self.noclasses formatter = self.formatter_class(**formatter_args) self.source = highlight(self.source, lexer, formatter)
def main(): uses = {} for name, aliases, filenames, mimetypes in get_all_lexers(): cls = find_lexer_class(name) for f in filenames: if f not in uses: uses[f] = [] uses[f].append(cls) ret = 0 for k, v in uses.iteritems(): if len(v) > 1: #print "Multiple for", k, v for i in v: if i.analyse_text is None: print i, "has a None analyse_text" ret |= 1 elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__: print i, "needs analyse_text, multiple lexers for", k ret |= 2 return ret
def test_lexer(self): assert find_lexer_class('ActionScript 3') == Language.find_by_name('ActionScript').lexer assert find_lexer_class('ActionScript 3') == Language.find_by_name('ActionScript').lexer assert find_lexer_class('Bash') == Language.find_by_name('Gentoo Ebuild').lexer assert find_lexer_class('Bash') == Language.find_by_name('Gentoo Eclass').lexer assert find_lexer_class('Bash') == Language.find_by_name('Shell').lexer assert find_lexer_class('C') == Language.find_by_name('OpenCL').lexer assert find_lexer_class('C') == Language.find_by_name('XS').lexer assert find_lexer_class('C++') == Language.find_by_name('C++').lexer assert find_lexer_class('Coldfusion HTML') == Language.find_by_name('ColdFusion').lexer assert find_lexer_class('Coq') == Language.find_by_name('Coq').lexer assert find_lexer_class('FSharp') == Language.find_by_name('F#').lexer assert find_lexer_class('FSharp') == Language.find_by_name('F#').lexer assert find_lexer_class('Fortran') == Language.find_by_name('FORTRAN').lexer assert find_lexer_class('Gherkin') == Language.find_by_name('Cucumber').lexer assert find_lexer_class('Groovy') == Language.find_by_name('Groovy').lexer assert find_lexer_class('HTML') == Language.find_by_name('HTML').lexer assert find_lexer_class('HTML+Django/Jinja') == Language.find_by_name('HTML+Django').lexer assert find_lexer_class('HTML+PHP') == Language.find_by_name('HTML+PHP').lexer assert find_lexer_class('HTTP') == Language.find_by_name('HTTP').lexer assert find_lexer_class('JSON') == Language.find_by_name('JSON').lexer assert find_lexer_class('Java') == Language.find_by_name('ChucK').lexer assert find_lexer_class('Java') == Language.find_by_name('Java').lexer assert find_lexer_class('JavaScript') == Language.find_by_name('JavaScript').lexer assert find_lexer_class('MOOCode') == Language.find_by_name('Moocode').lexer assert find_lexer_class('MuPAD') == Language.find_by_name('mupad').lexer assert find_lexer_class('NASM') == Language.find_by_name('Assembly').lexer assert find_lexer_class('OCaml') == Language.find_by_name('OCaml').lexer assert find_lexer_class('Ooc') == Language.find_by_name('ooc').lexer assert find_lexer_class('OpenEdge ABL') == Language.find_by_name('OpenEdge ABL').lexer assert find_lexer_class('REBOL') == Language.find_by_name('Rebol').lexer assert find_lexer_class('RHTML') == Language.find_by_name('HTML+ERB').lexer assert find_lexer_class('RHTML') == Language.find_by_name('RHTML').lexer assert find_lexer_class('Ruby') == Language.find_by_name('Mirah').lexer assert find_lexer_class('Ruby') == Language.find_by_name('Ruby').lexer assert find_lexer_class('S') == Language.find_by_name('R').lexer assert find_lexer_class('Scheme') == Language.find_by_name('Emacs Lisp').lexer assert find_lexer_class('Scheme') == Language.find_by_name('Nu').lexer assert find_lexer_class('Racket') == Language.find_by_name('Racket').lexer assert find_lexer_class('Scheme') == Language.find_by_name('Scheme').lexer assert find_lexer_class('Standard ML') == Language.find_by_name('Standard ML').lexer assert find_lexer_class('TeX') == Language.find_by_name('TeX').lexer #FIXME bug by pygments # assert find_lexer_class('Verilog') == Language.find_by_name('Verilog').lexer assert find_lexer_class('XSLT') == Language.find_by_name('XSLT').lexer assert find_lexer_class('aspx-vb') == Language.find_by_name('ASP').lexer assert find_lexer_class('haXe') == Language.find_by_name('Haxe').lexer assert find_lexer_class('reStructuredText') == Language.find_by_name('reStructuredText').lexer
def get_lexer_by_lang(lang=None): cls = pygments_lexers.find_lexer_class(lang or 'Python') return lexers.PygmentsLexer(cls)
def main(fn, lexer=None, options={}): if fn == '-': text = sys.stdin.read() else: try: with open(fn, 'rb') as fp: text = fp.read().decode('utf-8') except UnicodeError: if decode_strategy == 'latin1': print('Warning: non-UTF8 input, using latin1') with open(fn, 'rb') as fp: text = fp.read().decode('latin1') elif decode_strategy == 'utf8-ignore': print('Warning: ignoring non-UTF8 bytes in input') with open(fn, 'rb') as fp: text = fp.read().decode('utf-8', 'ignore') text = text.strip('\n') + '\n' if lexer is not None: lxcls = get_lexer_by_name(lexer).__class__ elif guess: lxcls = guess_lexer(text).__class__ print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__, lxcls.__name__)) else: lxcls = find_lexer_class_for_filename(os.path.basename(fn)) if lxcls is None: name, rest = fn.split('_', 1) lxcls = find_lexer_class(name) if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__, lxcls.__name__)) debug_lexer = False # if profile: # # does not work for e.g. ExtendedRegexLexers # if lxcls.__bases__ == (RegexLexer,): # # yes we can! (change the metaclass) # lxcls.__class__ = ProfilingRegexLexerMeta # lxcls.__bases__ = (ProfilingRegexLexer,) # lxcls._prof_sort_index = profsort # else: # if lxcls.__bases__ == (RegexLexer,): # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True # elif lxcls.__bases__ == (DebuggingRegexLexer,): # # already debugged before # debug_lexer = True # else: # # HACK: ExtendedRegexLexer subclasses will only partially work here. # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True lx = lxcls(**options) lno = 1 tokens = [] states = [] def show_token(tok, state): reprs = list(map(repr, tok)) print(' ' + reprs[1] + ' ' + ' ' * (29 - len(reprs[1])) + reprs[0], end=' ') if debug_lexer: print(' ' + ' ' * (29 - len(reprs[0])) + ' : '.join(state) if state else '', end=' ') print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error and not ignerror: print('Error parsing', fn, 'on line', lno) if not showall: print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) else: show_token(tokens[i], None) print('Error token:') l = len(repr(val)) print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'ctx'): print(' ' * (60 - l) + ' : '.join(lx.ctx.stack), end=' ') print() print() return 1 tokens.append((type, val)) if debug_lexer: if hasattr(lx, 'ctx'): states.append(lx.ctx.stack[:]) else: states.append(None) if showall: show_token((type, val), states[-1] if debug_lexer else None) return 0
def test_lexer(self): assert find_lexer_class('Ruby') == self.blob("Ruby/foo.rb").lexer
def main(fn, lexer=None, options={}): if lexer is not None: lxcls = get_lexer_by_name(lexer).__class__ else: lxcls = find_lexer_class_for_filename(os.path.basename(fn)) if lxcls is None: name, rest = fn.split('_', 1) lxcls = find_lexer_class(name) if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) debug_lexer = False if profile: # does not work for e.g. ExtendedRegexLexers if lxcls.__bases__ == (RegexLexer, ): # yes we can! (change the metaclass) lxcls.__class__ = ProfilingRegexLexerMeta lxcls.__bases__ = (ProfilingRegexLexer, ) lxcls._prof_sort_index = profsort else: if lxcls.__bases__ == (RegexLexer, ): lxcls.__bases__ = (DebuggingRegexLexer, ) debug_lexer = True elif lxcls.__bases__ == (DebuggingRegexLexer, ): # already debugged before debug_lexer = True else: # HACK: ExtendedRegexLexer subclasses will only partially work here. lxcls.__bases__ = (DebuggingRegexLexer, ) debug_lexer = True lx = lxcls(**options) lno = 1 if fn == '-': text = sys.stdin.read() else: with open(fn, 'rb') as fp: text = fp.read().decode('utf-8') text = text.strip('\n') + '\n' tokens = [] states = [] def show_token(tok, state): reprs = list(map(repr, tok)) print(' ' + reprs[1] + ' ' + ' ' * (29 - len(reprs[1])) + reprs[0], end=' ') if debug_lexer: print(' ' + ' ' * (29 - len(reprs[0])) + ' : '.join(state), end=' ') print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error: print('Error parsing', fn, 'on line', lno) print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') if showall: for tok, state in map(None, tokens, states): show_token(tok, state) else: for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) else: show_token(tokens[i], None) print('Error token:') l = len(repr(val)) print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'ctx'): print(' ' * (60 - l) + ' : '.join(lx.ctx.stack), end=' ') print() print() return 1 tokens.append((type, val)) if debug_lexer: if hasattr(lx, 'ctx'): states.append(lx.ctx.stack[:]) else: states.append(None) if showall: for tok, state in zip(tokens, states): show_token(tok, state) return 0
def _discover_lexers(): import inspect from pygments.lexers import get_all_lexers, find_lexer_class # maps file extension (and names) to (module, classname) tuples default_exts = { # C / C++ ".h": ("pygments.lexers.c_cpp", "CLexer"), ".hh": ("pygments.lexers.c_cpp", "CppLexer"), ".cp": ("pygments.lexers.c_cpp", "CppLexer"), # python ".py": ("pygments.lexers.python", "Python3Lexer"), ".pyw": ("pygments.lexers.python", "Python3Lexer"), ".sc": ("pygments.lexers.python", "Python3Lexer"), ".tac": ("pygments.lexers.python", "Python3Lexer"), "SConstruct": ("pygments.lexers.python", "Python3Lexer"), "SConscript": ("pygments.lexers.python", "Python3Lexer"), ".sage": ("pygments.lexers.python", "Python3Lexer"), ".pytb": ("pygments.lexers.python", "Python3TracebackLexer"), # perl ".t": ("pygments.lexers.perl", "Perl6Lexer"), ".pl": ("pygments.lexers.perl", "Perl6Lexer"), ".pm": ("pygments.lexers.perl", "Perl6Lexer"), # asm ".s": ("pygments.lexers.asm", "GasLexer"), ".S": ("pygments.lexers.asm", "GasLexer"), ".asm": ("pygments.lexers.asm", "NasmLexer"), ".ASM": ("pygments.lexers.asm", "NasmLexer"), # Antlr ".g": ("pygments.lexers.parsers", "AntlrCppLexer"), ".G": ("pygments.lexers.parsers", "AntlrCppLexer"), # XML ".xml": ("pygments.lexers.html", "XmlLexer"), ".xsl": ("pygments.lexers.html", "XsltLexer"), ".xslt": ("pygments.lexers.html", "XsltLexer"), # ASP ".axd": ("pygments.lexers.dotnet", "CSharpAspxLexer"), ".asax": ("pygments.lexers.dotnet", "CSharpAspxLexer"), ".ascx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), ".ashx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), ".asmx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), ".aspx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), # misc ".b": ("pygments.lexers.esoteric", "BrainfuckLexer"), ".j": ("pygments.lexers.jvm", "JasminLexer"), ".m": ("pygments.lexers.matlab", "MatlabLexer"), ".n": ("pygments.lexers.dotnet", "NemerleLexer"), ".p": ("pygments.lexers.pawn", "PawnLexer"), ".v": ("pygments.lexers.theorem", "CoqLexer"), ".as": ("pygments.lexers.actionscript", "ActionScript3Lexer"), ".fs": ("pygments.lexers.forth", "ForthLexer"), ".hy": ("pygments.lexers.lisp", "HyLexer"), ".ts": ("pygments.lexers.javascript", "TypeScriptLexer"), ".rl": ("pygments.lexers.parsers", "RagelCppLexer"), ".bas": ("pygments.lexers.basic", "QBasicLexer"), ".bug": ("pygments.lexers.modeling", "BugsLexer"), ".ecl": ("pygments.lexers.ecl", "ECLLexer"), ".inc": ("pygments.lexers.php", "PhpLexer"), ".inf": ("pygments.lexers.configs", "IniLexer"), ".pro": ("pygments.lexers.prolog", "PrologLexer"), ".sql": ("pygments.lexers.sql", "SqlLexer"), ".txt": ("pygments.lexers.special", "TextLexer"), ".html": ("pygments.lexers.html", "HtmlLexer"), } exts = {} lexers = {"exts": exts} if DEBUG: from collections import defaultdict duplicates = defaultdict(set) for longname, aliases, filenames, mimetypes in get_all_lexers(): cls = find_lexer_class(longname) mod = inspect.getmodule(cls) val = (mod.__name__, cls.__name__) for filename in filenames: if filename.startswith("*."): filename = filename[1:] if "*" in filename: continue if ( DEBUG and filename in exts and exts[filename] != val and filename not in default_exts ): duplicates[filename].add(val) duplicates[filename].add(exts[filename]) exts[filename] = val # remove some ambiquity exts.update(default_exts) # print duplicate message if DEBUG: _print_duplicate_message(duplicates) return lexers
def test_lexer(self): assert find_lexer_class( 'ActionScript 3') == Language['ActionScript'].lexer assert find_lexer_class('Bash') == Language['Gentoo Ebuild'].lexer assert find_lexer_class('Bash') == Language['Gentoo Eclass'].lexer assert find_lexer_class('Bash') == Language['Shell'].lexer assert find_lexer_class('C') == Language['OpenCL'].lexer assert find_lexer_class('C') == Language['XS'].lexer assert find_lexer_class('C++') == Language['C++'].lexer assert find_lexer_class( 'Coldfusion HTML') == Language['ColdFusion'].lexer assert find_lexer_class('Coq') == Language['Coq'].lexer assert find_lexer_class('FSharp') == Language['F#'].lexer assert find_lexer_class('FSharp') == Language['F#'].lexer assert find_lexer_class('Fortran') == Language['FORTRAN'].lexer assert find_lexer_class('Gherkin') == Language['Cucumber'].lexer assert find_lexer_class('Groovy') == Language['Groovy'].lexer assert find_lexer_class('HTML') == Language['HTML'].lexer assert find_lexer_class( 'HTML+Django/Jinja') == Language['HTML+Django'].lexer assert find_lexer_class('HTML+PHP') == Language['HTML+PHP'].lexer assert find_lexer_class('HTTP') == Language['HTTP'].lexer assert find_lexer_class('JSON') == Language['JSON'].lexer assert find_lexer_class('Java') == Language['ChucK'].lexer assert find_lexer_class('Java') == Language['Java'].lexer assert find_lexer_class('JavaScript') == Language['JavaScript'].lexer assert find_lexer_class('MOOCode') == Language['Moocode'].lexer assert find_lexer_class('MuPAD') == Language['mupad'].lexer assert find_lexer_class('NASM') == Language['Assembly'].lexer assert find_lexer_class('OCaml') == Language['OCaml'].lexer assert find_lexer_class('Ooc') == Language['ooc'].lexer assert find_lexer_class( 'OpenEdge ABL') == Language['OpenEdge ABL'].lexer assert find_lexer_class('REBOL') == Language['Rebol'].lexer assert find_lexer_class('RHTML') == Language['HTML+ERB'].lexer assert find_lexer_class('RHTML') == Language['RHTML'].lexer assert find_lexer_class('Ruby') == Language['Mirah'].lexer assert find_lexer_class('Ruby') == Language['Ruby'].lexer assert find_lexer_class('S') == Language['R'].lexer assert find_lexer_class('Scheme') == Language['Emacs Lisp'].lexer assert find_lexer_class('Scheme') == Language['Nu'].lexer assert find_lexer_class('Racket') == Language['Racket'].lexer assert find_lexer_class('Scheme') == Language['Scheme'].lexer assert find_lexer_class('Standard ML') == Language['Standard ML'].lexer assert find_lexer_class('TeX') == Language['TeX'].lexer assert find_lexer_class('verilog') == Language['Verilog'].lexer assert find_lexer_class('XSLT') == Language['XSLT'].lexer assert find_lexer_class('aspx-vb') == Language['ASP'].lexer assert find_lexer_class('haXe') == Language['Haxe'].lexer assert find_lexer_class( 'reStructuredText') == Language['reStructuredText'].lexer
def test_lexer(self): assert find_lexer_class('ActionScript 3') == Language['ActionScript'].lexer assert find_lexer_class('Bash') == Language['Gentoo Ebuild'].lexer assert find_lexer_class('Bash') == Language['Gentoo Eclass'].lexer assert find_lexer_class('Bash') == Language['Shell'].lexer assert find_lexer_class('C') == Language['OpenCL'].lexer assert find_lexer_class('C') == Language['XS'].lexer assert find_lexer_class('C++') == Language['C++'].lexer assert find_lexer_class('Coldfusion HTML') == Language['ColdFusion'].lexer assert find_lexer_class('Coq') == Language['Coq'].lexer assert find_lexer_class('FSharp') == Language['F#'].lexer assert find_lexer_class('FSharp') == Language['F#'].lexer assert find_lexer_class('Fortran') == Language['FORTRAN'].lexer assert find_lexer_class('Gherkin') == Language['Cucumber'].lexer assert find_lexer_class('Groovy') == Language['Groovy'].lexer assert find_lexer_class('HTML') == Language['HTML'].lexer assert find_lexer_class('HTML+Django/Jinja') == Language['HTML+Django'].lexer assert find_lexer_class('HTML+PHP') == Language['HTML+PHP'].lexer assert find_lexer_class('HTTP') == Language['HTTP'].lexer assert find_lexer_class('JSON') == Language['JSON'].lexer assert find_lexer_class('Java') == Language['ChucK'].lexer assert find_lexer_class('Java') == Language['Java'].lexer assert find_lexer_class('JavaScript') == Language['JavaScript'].lexer assert find_lexer_class('MOOCode') == Language['Moocode'].lexer assert find_lexer_class('MuPAD') == Language['mupad'].lexer assert find_lexer_class('NASM') == Language['Assembly'].lexer assert find_lexer_class('OCaml') == Language['OCaml'].lexer assert find_lexer_class('Ooc') == Language['ooc'].lexer assert find_lexer_class('OpenEdge ABL') == Language['OpenEdge ABL'].lexer assert find_lexer_class('REBOL') == Language['Rebol'].lexer assert find_lexer_class('RHTML') == Language['HTML+ERB'].lexer assert find_lexer_class('RHTML') == Language['RHTML'].lexer assert find_lexer_class('Ruby') == Language['Mirah'].lexer assert find_lexer_class('Ruby') == Language['Ruby'].lexer assert find_lexer_class('S') == Language['R'].lexer assert find_lexer_class('Scheme') == Language['Emacs Lisp'].lexer assert find_lexer_class('Scheme') == Language['Nu'].lexer assert find_lexer_class('Racket') == Language['Racket'].lexer assert find_lexer_class('Scheme') == Language['Scheme'].lexer assert find_lexer_class('Standard ML') == Language['Standard ML'].lexer assert find_lexer_class('TeX') == Language['TeX'].lexer assert find_lexer_class('verilog') == Language['Verilog'].lexer assert find_lexer_class('XSLT') == Language['XSLT'].lexer assert find_lexer_class('aspx-vb') == Language['ASP'].lexer assert find_lexer_class('haXe') == Language['Haxe'].lexer assert find_lexer_class('reStructuredText') == Language['reStructuredText'].lexer
def main(fn, lexer=None, options={}): if lexer is not None: lxcls = get_lexer_by_name(lexer).__class__ else: lxcls = find_lexer_class_for_filename(os.path.basename(fn)) if lxcls is None: name, rest = fn.split('_', 1) lxcls = find_lexer_class(name) if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__, lxcls.__name__)) debug_lexer = False # if profile: # # does not work for e.g. ExtendedRegexLexers # if lxcls.__bases__ == (RegexLexer,): # # yes we can! (change the metaclass) # lxcls.__class__ = ProfilingRegexLexerMeta # lxcls.__bases__ = (ProfilingRegexLexer,) # lxcls._prof_sort_index = profsort # else: # if lxcls.__bases__ == (RegexLexer,): # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True # elif lxcls.__bases__ == (DebuggingRegexLexer,): # # already debugged before # debug_lexer = True # else: # # HACK: ExtendedRegexLexer subclasses will only partially work here. # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True lx = lxcls(**options) lno = 1 if fn == '-': text = sys.stdin.read() else: with open(fn, 'rb') as fp: text = fp.read().decode('utf-8') text = text.strip('\n') + '\n' tokens = [] states = [] def show_token(tok, state): reprs = list(map(repr, tok)) print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') if debug_lexer: print(' ' + ' ' * (29-len(reprs[0])) + ' : '.join(state) if state else '', end=' ') print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error and not ignerror: print('Error parsing', fn, 'on line', lno) if not showall: print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) else: show_token(tokens[i], None) print('Error token:') l = len(repr(val)) print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'ctx'): print(' ' * (60-l) + ' : '.join(lx.ctx.stack), end=' ') print() print() return 1 tokens.append((type, val)) if debug_lexer: if hasattr(lx, 'ctx'): states.append(lx.ctx.stack[:]) else: states.append(None) if showall: show_token((type, val), states[-1] if debug_lexer else None) return 0
def _discover_lexers(): import inspect from pygments.lexers import get_all_lexers, find_lexer_class # maps file extension (and names) to (module, classname) tuples default_exts = { # C / C++ '.h': ('pygments.lexers.c_cpp', 'CLexer'), '.hh': ('pygments.lexers.c_cpp', 'CppLexer'), '.cp': ('pygments.lexers.c_cpp', 'CppLexer'), # python '.py': ('pygments.lexers.python', 'Python3Lexer'), '.pyw': ('pygments.lexers.python', 'Python3Lexer'), '.sc': ('pygments.lexers.python', 'Python3Lexer'), '.tac': ('pygments.lexers.python', 'Python3Lexer'), 'SConstruct': ('pygments.lexers.python', 'Python3Lexer'), 'SConscript': ('pygments.lexers.python', 'Python3Lexer'), '.sage': ('pygments.lexers.python', 'Python3Lexer'), '.pytb': ('pygments.lexers.python', 'Python3TracebackLexer'), # perl '.t': ('pygments.lexers.perl', 'Perl6Lexer'), '.pl': ('pygments.lexers.perl', 'Perl6Lexer'), '.pm': ('pygments.lexers.perl', 'Perl6Lexer'), # asm '.s': ('pygments.lexers.asm', 'GasLexer'), '.S': ('pygments.lexers.asm', 'GasLexer'), '.asm': ('pygments.lexers.asm', 'NasmLexer'), '.ASM': ('pygments.lexers.asm', 'NasmLexer'), # Antlr '.g': ('pygments.lexers.parsers', 'AntlrCppLexer'), '.G': ('pygments.lexers.parsers', 'AntlrCppLexer'), # XML '.xml': ('pygments.lexers.html', 'XmlLexer'), '.xsl': ('pygments.lexers.html', 'XsltLexer'), '.xslt': ('pygments.lexers.html', 'XsltLexer'), # ASP '.axd': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), '.asax': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), '.ascx': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), '.ashx': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), '.asmx': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), '.aspx': ('pygments.lexers.dotnet', 'CSharpAspxLexer'), # misc '.b': ('pygments.lexers.esoteric', 'BrainfuckLexer'), '.j': ('pygments.lexers.jvm', 'JasminLexer'), '.m': ('pygments.lexers.matlab', 'MatlabLexer'), '.n': ('pygments.lexers.dotnet', 'NemerleLexer'), '.p': ('pygments.lexers.pawn', 'PawnLexer'), '.v': ('pygments.lexers.theorem', 'CoqLexer'), '.as': ('pygments.lexers.actionscript', 'ActionScript3Lexer'), '.fs': ('pygments.lexers.forth', 'ForthLexer'), '.hy': ('pygments.lexers.lisp', 'HyLexer'), '.ts': ('pygments.lexers.javascript', 'TypeScriptLexer'), '.rl': ('pygments.lexers.parsers', 'RagelCppLexer'), '.bas': ('pygments.lexers.basic', 'QBasicLexer'), '.bug': ('pygments.lexers.modeling', 'BugsLexer'), '.ecl': ('pygments.lexers.ecl', 'ECLLexer'), '.inc': ('pygments.lexers.php', 'PhpLexer'), '.inf': ('pygments.lexers.configs', 'IniLexer'), '.pro': ('pygments.lexers.prolog', 'PrologLexer'), '.sql': ('pygments.lexers.sql', 'SqlLexer'), '.txt': ('pygments.lexers.special', 'TextLexer'), '.html': ('pygments.lexers.html', 'HtmlLexer'), } exts = {} lexers = {'exts': exts} if DEBUG: from collections import defaultdict duplicates = defaultdict(set) for longname, aliases, filenames, mimetypes in get_all_lexers(): cls = find_lexer_class(longname) mod = inspect.getmodule(cls) val = (mod.__name__, cls.__name__) for filename in filenames: if filename.startswith('*.'): filename = filename[1:] if '*' in filename: continue if (DEBUG and filename in exts and exts[filename] != val and filename not in default_exts): duplicates[filename].add(val) duplicates[filename].add(exts[filename]) exts[filename] = val # remove some ambiquity exts.update(default_exts) # print duplicate message if DEBUG: _print_duplicate_message(duplicates) return lexers
def tokenize(language, source): # have you found yourself looking here after getting a UnicodeDecoeError? # thought so. i think upgrading to pygments 1.5 should do the trick. lexer = find_lexer_class(language)(encoding="guess") return lex(source, lexer)
def test_lexer(self): assert find_lexer_class("ActionScript 3") == Language["ActionScript"].lexer assert find_lexer_class("Bash") == Language["Gentoo Ebuild"].lexer assert find_lexer_class("Bash") == Language["Gentoo Eclass"].lexer assert find_lexer_class("Bash") == Language["Shell"].lexer assert find_lexer_class("C") == Language["OpenCL"].lexer assert find_lexer_class("C") == Language["XS"].lexer assert find_lexer_class("C++") == Language["C++"].lexer assert find_lexer_class("Coldfusion HTML") == Language["ColdFusion"].lexer assert find_lexer_class("Coq") == Language["Coq"].lexer assert find_lexer_class("FSharp") == Language["F#"].lexer assert find_lexer_class("FSharp") == Language["F#"].lexer assert find_lexer_class("Fortran") == Language["FORTRAN"].lexer assert find_lexer_class("Gherkin") == Language["Cucumber"].lexer assert find_lexer_class("Groovy") == Language["Groovy"].lexer assert find_lexer_class("HTML") == Language["HTML"].lexer assert find_lexer_class("HTML+Django/Jinja") == Language["HTML+Django"].lexer assert find_lexer_class("HTML+PHP") == Language["HTML+PHP"].lexer assert find_lexer_class("HTTP") == Language["HTTP"].lexer assert find_lexer_class("JSON") == Language["JSON"].lexer assert find_lexer_class("Java") == Language["ChucK"].lexer assert find_lexer_class("Java") == Language["Java"].lexer assert find_lexer_class("JavaScript") == Language["JavaScript"].lexer assert find_lexer_class("MOOCode") == Language["Moocode"].lexer assert find_lexer_class("MuPAD") == Language["mupad"].lexer assert find_lexer_class("NASM") == Language["Assembly"].lexer assert find_lexer_class("OCaml") == Language["OCaml"].lexer assert find_lexer_class("Ooc") == Language["ooc"].lexer assert find_lexer_class("OpenEdge ABL") == Language["OpenEdge ABL"].lexer assert find_lexer_class("REBOL") == Language["Rebol"].lexer assert find_lexer_class("RHTML") == Language["HTML+ERB"].lexer assert find_lexer_class("RHTML") == Language["RHTML"].lexer assert find_lexer_class("Ruby") == Language["Mirah"].lexer assert find_lexer_class("Ruby") == Language["Ruby"].lexer assert find_lexer_class("S") == Language["R"].lexer assert find_lexer_class("Scheme") == Language["Emacs Lisp"].lexer assert find_lexer_class("Scheme") == Language["Nu"].lexer assert find_lexer_class("Racket") == Language["Racket"].lexer assert find_lexer_class("Scheme") == Language["Scheme"].lexer assert find_lexer_class("Standard ML") == Language["Standard ML"].lexer assert find_lexer_class("TeX") == Language["TeX"].lexer assert find_lexer_class("verilog") == Language["Verilog"].lexer assert find_lexer_class("XSLT") == Language["XSLT"].lexer assert find_lexer_class("aspx-vb") == Language["ASP"].lexer # assert find_lexer_class('haXe') == Language['Haxe'].lexer assert find_lexer_class("reStructuredText") == Language["reStructuredText"].lexer