class GenshiTextLexer(RegexLexer): """ A lexer that highlights `genshi <http://genshi.edgewall.org/>`_ text templates. """ name = 'Genshi Text' aliases = ['genshitext'] mimetypes = ['application/x-genshi-text', 'text/x-genshi'] tokens = { 'root': [ (r'[^#\$\s]+', Other), (r'^(\s*)(##.*)$', bygroups(Text, Comment)), (r'^(\s*)(#)', bygroups(Text, Comment.Preproc), 'directive'), include('variable'), (r'[#\$\s]', Other), ], 'directive': [ (r'\n', Text, '#pop'), (r'(?:def|for|if)\s+.*', using(PythonLexer), '#pop'), (r'(choose|when|with)([^\S\n]+)(.*)', bygroups(Keyword, Text, using(PythonLexer)), '#pop'), (r'(choose|otherwise)\b', Keyword, '#pop'), (r'(end\w*)([^\S\n]*)(.*)', bygroups(Keyword, Text, Comment), '#pop'), ], 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', Name.Variable), ] }
class HtmlLexer(RegexLexer): """ For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted by the appropriate lexer. """ name = 'HTML' aliases = ['html'] filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt'] mimetypes = ['text/html', 'application/xhtml+xml'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), (r'<\s*style\s*', Name.Tag, ('style-content', 'tag')), (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), (r'[a-zA-Z0-9_:-]+', Name.Attribute), (r'/?\s*>', Name.Tag, '#pop'), ], 'script-content': [ (r'<\s*/\s*script\s*>', Name.Tag, '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), ], 'style-content': [ (r'<\s*/\s*style\s*>', Name.Tag, '#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): if html_doctype_matches(text): return 0.5
class GenshiMarkupLexer(RegexLexer): """ Base lexer for Genshi markup, used by `HtmlGenshiLexer` and `GenshiLexer`. """ flags = re.DOTALL tokens = { 'root': [ (r'[^<\$]+', Other), (r'(<\?python)(.*?)(\?>)', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), # yield style and script blocks as Other (r'<\s*(script|style)\s*.*?>.*?<\s*/\1\s*>', Other), (r'<\s*py:[a-zA-Z0-9]+', Name.Tag, 'pytag'), (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), include('variable'), (r'[<\$]', Other), ], 'pytag': [ (r'\s+', Text), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'pyattr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'pyattr': [ ('(")(.*?)(")', bygroups(String, using(PythonLexer), String), '#pop'), ("(')(.*?)(')", bygroups(String, using(PythonLexer), String), '#pop'), (r'[^\s>]+', String, '#pop'), ], 'tag': [ (r'\s+', Text), (r'py:[a-zA-Z0-9_-]+\s*=', Name.Attribute, 'pyattr'), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [('"', String, 'attr-dstring'), ("'", String, 'attr-sstring'), (r'[^\s>]*', String, '#pop')], 'attr-dstring': [('"', String, '#pop'), include('strings'), ("'", String)], 'attr-sstring': [("'", String, '#pop'), include('strings'), ("'", String)], 'strings': [('[^"\'$]+', String), include('variable')], 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', Name.Variable), ] }
class EvoqueLexer(RegexLexer): """ For files using the Evoque templating system. *New in Pygments 1.1.* """ name = 'Evoque' aliases = ['evoque'] filenames = ['*.evoque'] mimetypes = ['application/x-evoque'] flags = re.DOTALL tokens = { 'root': [ (r'[^#$]+', Other), (r'#\[', Comment.Multiline, 'comment'), (r'\$\$', Other), # svn keywords (r'\$\w+:[^$\n]*\$', Comment.Multiline), # directives: begin, end (r'(\$)(begin|end)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, String, Punctuation, None)), # directives: evoque, overlay # see doc for handling first name arg: /directives/evoque/ #+ minor inconsistency: the "name" in e.g. $overlay{name=site_base} # should be using(PythonLexer), not passed out as String (r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?' r'(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, String, using(PythonLexer), Punctuation, None)), # directives: if, for, prefer, test (r'(\$)(\w+)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, using(PythonLexer), Punctuation, None)), # directive clauses (no {} expression) (r'(\$)(else|rof|fi)', bygroups(Punctuation, Name.Builtin)), # expressions (r'(\$\{(%)?)(.*?)((!)(.*?))?((?(2)%)\})', bygroups(Punctuation, None, using(PythonLexer), Name.Builtin, None, None, Punctuation, None)), (r'#', Other), ], 'comment': [(r'[^\]#]', Comment.Multiline), (r'#\[', Comment.Multiline, '#push'), (r'\]#', Comment.Multiline, '#pop'), (r'[\]#]', Comment.Multiline)], }
class ObjdumpLexer(RegexLexer): """ For the output of 'objdump -dr' """ name = 'objdump' aliases = ['objdump'] filenames = ['*.objdump'] mimetypes = ['text/x-objdump'] hex = r'[0-9A-Za-z]' tokens = { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('(' + hex + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('(' + hex + '+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(GasLexer))), # Code line with ascii ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes ('\t\.\.\.$', Text), # Relocation line # (With offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), ('[^\n]+\n', Other) ] }
class GenericAspxLexer(RegexLexer): """ Lexer for ASP.NET pages. """ name = 'aspx-gen' filenames = [] mimetypes = [] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer), Other, using(XmlLexer))), (r'(.+?)(?=<)', using(XmlLexer)), (r'.+', using(XmlLexer)), ], }
class CheetahLexer(RegexLexer): """ Generic `cheetah templates`_ lexer. Code that isn't Cheetah markup is yielded as `Token.Other`. This also works for `spitfire templates`_ which use the same syntax. .. _cheetah templates: http://www.cheetahtemplate.org/ .. _spitfire templates: http://code.google.com/p/spitfire/ """ name = 'Cheetah' aliases = ['cheetah', 'spitfire'] filenames = ['*.tmpl', '*.spt'] mimetypes = ['application/x-cheetah', 'application/x-spitfire'] tokens = { 'root': [ (r'(##[^\n]*)$', (bygroups(Comment))), (r'#[*](.|\n)*?[*]#', Comment), (r'#end[^#\n]*(?:#|$)', Comment.Preproc), (r'#slurp$', Comment.Preproc), (r'(#[a-zA-Z]+)([^#\n]*)(#|$)', (bygroups(Comment.Preproc, using(CheetahPythonLexer), Comment.Preproc))), # TODO support other Python syntax like $foo['bar'] (r'(\$)([a-zA-Z_][a-zA-Z0-9_\.]*[a-zA-Z0-9_])', bygroups(Comment.Preproc, using(CheetahPythonLexer))), (r'(\$\{!?)(.*?)(\})(?s)', bygroups(Comment.Preproc, using(CheetahPythonLexer), Comment.Preproc)), (r'''(?sx) (.+?) # anything, followed by: (?: (?=[#][#a-zA-Z]*) | # an eval comment (?=\$[a-zA-Z_{]) | # a substitution \Z # end of string ) ''', Other), (r'\s+', Text), ], }
class SmartyLexer(RegexLexer): """ Generic `Smarty <http://smarty.php.net/>`_ template lexer. Just highlights smarty code between the preprocessor directives, other data is left untouched by the lexer. """ name = 'Smarty' aliases = ['smarty'] filenames = ['*.tpl'] mimetypes = ['application/x-smarty'] flags = re.MULTILINE | re.DOTALL tokens = { 'root': [(r'[^{]+', Other), (r'(\{)(\*.*?\*)(\})', bygroups(Comment.Preproc, Comment, Comment.Preproc)), (r'(\{php\})(.*?)(\{/php\})', bygroups(Comment.Preproc, using(PhpLexer, startinline=True), Comment.Preproc)), (r'(\{)(/?[a-zA-Z_][a-zA-Z0-9_]*)(\s*)', bygroups(Comment.Preproc, Name.Function, Text), 'smarty'), (r'\{', Comment.Preproc, 'smarty')], 'smarty': [(r'\s+', Text), (r'\}', Comment.Preproc, '#pop'), (r'#[a-zA-Z_][a-zA-Z0-9_]*#', Name.Variable), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z0-9_]+)*', Name.Variable), (r'[~!%^&*()+=|\[\]:;,.<>/?{}@-]', Operator), ('(true|false|null)\b', Keyword.Constant), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Attribute)] } def analyse_text(text): rv = 0.0 if re.search('\{if\s+.*?\}.*?\{/if\}', text): rv += 0.15 if re.search('\{include\s+file=.*?\}', text): rv += 0.15 if re.search('\{foreach\s+.*?\}.*?\{/foreach\}', text): rv += 0.15 if re.search('\{\$.*?\}', text): rv += 0.01 return rv
class MxmlLexer(RegexLexer): """ For MXML markup. Nested AS3 in <script> tags is highlighted by the appropriate lexer. """ flags = re.MULTILINE | re.DOTALL name = 'MXML' aliases = ['mxml'] filenames = ['*.mxml'] mimetimes = ['text/xml', 'application/xml'] tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'(\<\!\[CDATA\[)(.*?)(\]\]\>)', bygroups(String, using(ActionScript3Lexer), String)), ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'), (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ ('\s+', Text), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }
class MyghtyLexer(RegexLexer): """ Generic `myghty templates`_ lexer. Code that isn't Myghty markup is yielded as `Token.Other`. *New in Pygments 0.6.* .. _myghty templates: http://www.myghty.org/ """ name = 'Myghty' aliases = ['myghty'] filenames = ['*.myt', 'autodelegate'] mimetypes = ['application/x-myghty'] tokens = { 'root': [ (r'\s+', Text), (r'(<%(def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', bygroups(Name.Tag, None, Text, Name.Function, Name.Tag, using(this), Name.Tag)), (r'(<%(\w+))(.*?)(>)(.*?)(</%\2\s*>)(?s)', bygroups(Name.Tag, None, Name.Function, Name.Tag, using(PythonLexer), Name.Tag)), (r'(<&[^|])(.*?)(,.*?)?(&>)', bygroups(Name.Tag, Name.Function, using(PythonLexer), Name.Tag)), (r'(<&\|)(.*?)(,.*?)?(&>)(?s)', bygroups(Name.Tag, Name.Function, using(PythonLexer), Name.Tag)), (r'</&>', Name.Tag), (r'(<%!?)(.*?)(%>)(?s)', bygroups(Name.Tag, using(PythonLexer), Name.Tag)), (r'(?<=^)#[^\n]*(\n|\Z)', Comment), (r'(?<=^)(%)([^\n]*)(\n|\Z)', bygroups(Name.Tag, using(PythonLexer), Other)), (r"""(?sx) (.+?) # anything, followed by: (?: (?<=\n)(?=[%#]) | # an eval or comment line (?=</?[%&]) | # a substitution or block or # call start or end # - don't consume (\\\n) | # an escaped newline \Z # end of string )""", bygroups(Other, Operator)), ] }
class JspRootLexer(RegexLexer): """ Base for the `JspLexer`. Yields `Token.Other` for area outside of JSP tags. *New in Pygments 0.7.* """ tokens = { 'root': [ (r'<%\S?', Keyword, 'sec'), # FIXME: I want to make these keywords but still parse attributes. (r'</?jsp:(forward|getProperty|include|plugin|setProperty|useBean).*?>', Keyword), (r'[^<]+', Other), (r'<', Other), ], 'sec': [ (r'%>', Keyword, '#pop'), # note: '\w\W' != '.' without DOTALL. (r'[\w\W]+?(?=%>|\Z)', using(JavaLexer)), ], }
class MakoLexer(RegexLexer): """ Generic `mako templates`_ lexer. Code that isn't Mako markup is yielded as `Token.Other`. *New in Pygments 0.7.* .. _mako templates: http://www.makotemplates.org/ """ name = 'Mako' aliases = ['mako'] filenames = ['*.mao'] mimetypes = ['application/x-mako'] tokens = { 'root': [ (r'(\s*)(%)(\s*end(?:\w+))(\n|\Z)', bygroups(Text, Comment.Preproc, Keyword, Other)), (r'(\s*)(%)([^\n]*)(\n|\Z)', bygroups(Text, Comment.Preproc, using(PythonLexer), Other)), (r'(\s*)(##[^\n]*)(\n|\Z)', bygroups(Text, Comment.Preproc, Other)), (r'(?s)<%doc>.*?</%doc>', Comment.Preproc), (r'(<%)([\w\.\:]+)', bygroups(Comment.Preproc, Name.Builtin), 'tag'), (r'(</%)([\w\.\:]+)(>)', bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc)), (r'<%(?=([\w\.\:]+))', Comment.Preproc, 'ondeftags'), (r'(<%(?:!?))(.*?)(%>)(?s)', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(\$\{)(.*?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'''(?sx) (.+?) # anything, followed by: (?: (?<=\n)(?=%|\#\#) | # an eval or comment line (?=\#\*) | # multiline comment (?=</?%) | # a python block # call start or end (?=\$\{) | # a substitution (?<=\n)(?=\s*%) | # - don't consume (\\\n) | # an escaped newline \Z # end of string ) ''', bygroups(Other, Operator)), (r'\s+', Text), ], 'ondeftags': [ (r'<%', Comment.Preproc), (r'(?<=<%)(include|inherit|namespace|page)', Name.Builtin), include('tag'), ], 'tag': [ (r'((?:\w+)\s*=)\s*(".*?")', bygroups(Name.Attribute, String)), (r'/?\s*>', Comment.Preproc, '#pop'), (r'\s+', Text), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }
class PhpLexer(RegexLexer): """ For `PHP <http://www.php.net/>`_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``<?php`` required). The default is ``False``. `funcnamehighlighting` If given and ``True``, highlight builtin function names (default: ``True``). `disabledmodules` If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted except the special ``'unknown'`` module that includes functions that are known to php but are undocumented. To get a list of allowed modules have a look into the `_phpbuiltins` module: .. sourcecode:: pycon >>> from pygments3.lexers._phpbuiltins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]'] mimetypes = ['text/x-php'] flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other)], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String), (r'\s+', Text), (r'#.*?\n', Comment), (r'//.*?\n', Comment), (r'/\*\*/', Comment), # put the empty comment here, it is otherwise # seen as the start of a docstring (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment), (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/?@-]+', Operator), (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|__FILE__|' r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|' r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this)\b', Keyword), ('(true|false|null)\b', Keyword.Constant), (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable), (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'classname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'functionname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')], 'string': [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]+', String.Double)], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt(options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt(options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from pygments3.lexers._phpbuiltins import MODULES for key, value in MODULES.items(): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 if '?>' in text: rv += 0.1 return rv
class ActionScript3Lexer(RegexLexer): """ For ActionScript 3 source code. *New in Pygments 0.11.* """ name = 'ActionScript 3' aliases = ['as3', 'actionscript3'] filenames = ['*.as'] mimetypes = [ 'application/x-actionscript', 'text/x-actionscript', 'text/actionscript' ] identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*' flags = re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'\s+', Text), (r'(function\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword.Declaration, Name.Function, Text, Operator), 'funcparams'), (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r')', bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text, Keyword.Type)), (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)', bygroups(Keyword, Text, Name.Namespace, Text)), (r'(new)(\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex), (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' r'switch|import|include|as|is)\b', Keyword), (r'(class|public|final|internal|native|override|private|protected|' r'static|import|extends|implements|interface|intrinsic|return|super|' r'dynamic|function|const|get|namespace|package|set)\b', Keyword.Declaration), (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b', Keyword.Constant), (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|' r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|' r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|' r'unescape)\b', Name.Function), (identifier, Name), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'[~\^\*!%&<>\|+=:;,/?\\{}\[\]();.-]+', Operator), ], 'funcparams': [(r'\s+', Text), (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r'|\*)(\s*)', bygroups(Text, Punctuation, Name, Text, Operator, Text, Keyword.Type, Text), 'defval'), (r'\)', Operator, 'type')], 'type': [(r'(\s*)(:)(\s*)(' + identifier + r'|\*)', bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'), (r'\s*', Text, '#pop:2')], 'defval': [(r'(=)(\s*)([^(),]+)(\s*)(,?)', bygroups(Operator, Text, using(this), Text, Operator), '#pop'), (r',?', Operator, '#pop')] } def analyse_text(text): if re.match(r'\w+\s*:\s*\w', text): return 0.3 return 0.1
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. *New in Pygments 0.8.* """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' + '[^' + _escape( uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc')) + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in list(levels.items()): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment), (r'/[*](.|\n)*?[*]/', Comment), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(\\\\|\\"|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|base|break|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|lock|new|null|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [(cs_ident, Name.Class, '#pop')], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)