class GenshiTextLexer(RegexLexer): """ A lexer that highlights `genshi <http://genshi.edgewall.org/>`_ text templates. """ name = 'Genshi Text' aliases = ['genshitext'] mimetypes = ['application/x-genshi-text', 'text/x-genshi'] tokens = { 'root': [ (r'[^#\$\s]+', Other), (r'^(\s*)(##.*)$', bygroups(Text, Comment)), (r'^(\s*)(#)', bygroups(Text, Comment.Preproc), 'directive'), include('variable'), (r'[#\$\s]', Other), ], 'directive': [ (r'\n', Text, '#pop'), (r'(?:def|for|if)\s+.*', using(PythonLexer), '#pop'), (r'(choose|when|with)([^\S\n]+)(.*)', bygroups(Keyword, Text, using(PythonLexer)), '#pop'), (r'(choose|otherwise)\b', Keyword, '#pop'), (r'(end\w*)([^\S\n]*)(.*)', bygroups(Keyword, Text, Comment), '#pop'), ], 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', Name.Variable), ] }
class GasLexer(RegexLexer): """ For Gas (AT&T) assembly code. """ name = 'GAS' aliases = ['gas'] filenames = ['*.s', '*.S'] mimetypes = ['text/x-gas'] #: optional Comment or Whitespace string = r'"(\\"|[^"])*"' char = r'[a-zA-Z$._0-9@]' identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' tokens = { 'root': [ include('whitespace'), (identifier + ':', Name.Label), (r'\.' + identifier, Name.Attribute, 'directive-args'), (r'lock|rep(n?z)?|data\d+', Name.Attribute), (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text) ], 'directive-args': [(identifier, Name.Constant), (string, String), ('@' + identifier, Name.Attribute), (number, Number.Integer), (r'[\r\n]+', Text, '#pop'), (r'#.*?$', Comment, '#pop'), include('punctuation'), include('whitespace')], 'instruction-args': [ # For objdump-disassembled code, shouldn't occur in # actual assembler input ('([a-z0-9]+)( )(<)(' + identifier + ')(>)', bygroups(Number.Hex, Text, Punctuation, Name.Constant, Punctuation)), ('([a-z0-9]+)( )(<)(' + identifier + ')([-+])(' + number + ')(>)', bygroups(Number.Hex, Text, Punctuation, Name.Constant, Punctuation, Number.Integer, Punctuation)), # Address constants (identifier, Name.Constant), (number, Number.Integer), # Registers ('%' + identifier, Name.Variable), # Numeric constants ('$' + number, Number.Integer), (r'[\r\n]+', Text, '#pop'), (r'#.*?$', Comment, '#pop'), include('punctuation'), include('whitespace') ], 'whitespace': [(r'\n', Text), (r'\s+', Text), (r'#.*?\n', Comment)], 'punctuation': [(r'[-*,.():]+', Punctuation)] } def analyse_text(text): return re.match(r'^\.\w+', text, re.M)
class BooLexer(RegexLexer): """ For `Boo <http://boo.codehaus.org/>`_ source code. """ name = 'Boo' aliases = ['boo'] filenames = ['*.boo'] mimetypes = ['text/x-boo'] tokens = { 'root': [ (r'\s+', Text), (r'(#|//).*$', Comment), (r'/[*]', Comment, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'/(\\\\|\\/|[^/\s])/', String.Regex), (r'@/(\\\\|\\/|[^/])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' r'partial|private|protected|public|return|set|static|' r'struct|transient|virtual|yield|super|and|break|cast|' r'continue|elif|else|ensure|except|for|given|goto|if|in|' r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|' r'while|from|as)\b', Keyword), (r'def(?=\s+\(.*?\))', Keyword), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|' r'assert|checked|enumerate|filter|getter|len|lock|map|' r'matrix|max|min|normalArrayIndexing|print|property|range|' r'rawArrayIndexing|required|typeof|unchecked|using|' r'yieldAll|zip)\b', Name.Builtin), ('"""(\\\\|\\"|.*?)"""', String.Double), ('"(\\\\|\\"|[^"]*?)"', String.Double), ("'(\\\\|\\'|[^']*?)'", String.Single), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number), (r'0\d+', Number.Oct), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer), ], 'comment': [('/[*]', Comment.Multiline, '#push'), ('[*]/', Comment.Multiline, '#pop'), ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline)], 'funcname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')], 'classname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'namespace': [('[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop')] }
class GenshiMarkupLexer(RegexLexer): """ Base lexer for Genshi markup, used by `HtmlGenshiLexer` and `GenshiLexer`. """ flags = re.DOTALL tokens = { 'root': [ (r'[^<\$]+', Other), (r'(<\?python)(.*?)(\?>)', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), # yield style and script blocks as Other (r'<\s*(script|style)\s*.*?>.*?<\s*/\1\s*>', Other), (r'<\s*py:[a-zA-Z0-9]+', Name.Tag, 'pytag'), (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), include('variable'), (r'[<\$]', Other), ], 'pytag': [ (r'\s+', Text), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'pyattr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'pyattr': [ ('(")(.*?)(")', bygroups(String, using(PythonLexer), String), '#pop'), ("(')(.*?)(')", bygroups(String, using(PythonLexer), String), '#pop'), (r'[^\s>]+', String, '#pop'), ], 'tag': [ (r'\s+', Text), (r'py:[a-zA-Z0-9_-]+\s*=', Name.Attribute, 'pyattr'), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [('"', String, 'attr-dstring'), ("'", String, 'attr-sstring'), (r'[^\s>]*', String, '#pop')], 'attr-dstring': [('"', String, '#pop'), include('strings'), ("'", String)], 'attr-sstring': [("'", String, '#pop'), include('strings'), ("'", String)], 'strings': [('[^"\'$]+', String), include('variable')], 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', Name.Variable), ] }
class ObjdumpLexer(RegexLexer): """ For the output of 'objdump -dr' """ name = 'objdump' aliases = ['objdump'] filenames = ['*.objdump'] mimetypes = ['text/x-objdump'] hex = r'[0-9A-Za-z]' tokens = { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('(' + hex + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('(' + hex + '+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(GasLexer))), # Code line with ascii ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes ('\t\.\.\.$', Text), # Relocation line # (With offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), ('[^\n]+\n', Other) ] }
class SmartyLexer(RegexLexer): """ Generic `Smarty <http://smarty.php.net/>`_ template lexer. Just highlights smarty code between the preprocessor directives, other data is left untouched by the lexer. """ name = 'Smarty' aliases = ['smarty'] filenames = ['*.tpl'] mimetypes = ['application/x-smarty'] flags = re.MULTILINE | re.DOTALL tokens = { 'root': [(r'[^{]+', Other), (r'(\{)(\*.*?\*)(\})', bygroups(Comment.Preproc, Comment, Comment.Preproc)), (r'(\{php\})(.*?)(\{/php\})', bygroups(Comment.Preproc, using(PhpLexer, startinline=True), Comment.Preproc)), (r'(\{)(/?[a-zA-Z_][a-zA-Z0-9_]*)(\s*)', bygroups(Comment.Preproc, Name.Function, Text), 'smarty'), (r'\{', Comment.Preproc, 'smarty')], 'smarty': [(r'\s+', Text), (r'\}', Comment.Preproc, '#pop'), (r'#[a-zA-Z_][a-zA-Z0-9_]*#', Name.Variable), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z0-9_]+)*', Name.Variable), (r'[~!%^&*()+=|\[\]:;,.<>/?{}@-]', Operator), ('(true|false|null)\b', Keyword.Constant), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Attribute)] } def analyse_text(text): rv = 0.0 if re.search('\{if\s+.*?\}.*?\{/if\}', text): rv += 0.15 if re.search('\{include\s+file=.*?\}', text): rv += 0.15 if re.search('\{foreach\s+.*?\}.*?\{/foreach\}', text): rv += 0.15 if re.search('\{\$.*?\}', text): rv += 0.01 return rv
class EvoqueLexer(RegexLexer): """ For files using the Evoque templating system. *New in Pygments 1.1.* """ name = 'Evoque' aliases = ['evoque'] filenames = ['*.evoque'] mimetypes = ['application/x-evoque'] flags = re.DOTALL tokens = { 'root': [ (r'[^#$]+', Other), (r'#\[', Comment.Multiline, 'comment'), (r'\$\$', Other), # svn keywords (r'\$\w+:[^$\n]*\$', Comment.Multiline), # directives: begin, end (r'(\$)(begin|end)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, String, Punctuation, None)), # directives: evoque, overlay # see doc for handling first name arg: /directives/evoque/ #+ minor inconsistency: the "name" in e.g. $overlay{name=site_base} # should be using(PythonLexer), not passed out as String (r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?' r'(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, String, using(PythonLexer), Punctuation, None)), # directives: if, for, prefer, test (r'(\$)(\w+)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, using(PythonLexer), Punctuation, None)), # directive clauses (no {} expression) (r'(\$)(else|rof|fi)', bygroups(Punctuation, Name.Builtin)), # expressions (r'(\$\{(%)?)(.*?)((!)(.*?))?((?(2)%)\})', bygroups(Punctuation, None, using(PythonLexer), Name.Builtin, None, None, Punctuation, None)), (r'#', Other), ], 'comment': [(r'[^\]#]', Comment.Multiline), (r'#\[', Comment.Multiline, '#push'), (r'\]#', Comment.Multiline, '#pop'), (r'[\]#]', Comment.Multiline)], }
class GenericAspxLexer(RegexLexer): """ Lexer for ASP.NET pages. """ name = 'aspx-gen' filenames = [] mimetypes = [] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer), Other, using(XmlLexer))), (r'(.+?)(?=<)', using(XmlLexer)), (r'.+', using(XmlLexer)), ], }
class NasmLexer(RegexLexer): """ For Nasm (Intel) assembly code. """ name = 'NASM' aliases = ['nasm'] filenames = ['*.asm', '*.ASM'] mimetypes = ['text/x-nasm'] identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9a-fA-F]+h)' octn = r'[0-7]+q' binn = r'[01]+b' decn = r'[0-9]+' floatn = decn + r'\.e?' + decn string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'" declkw = r'(?:res|d)[bwdqt]|times' register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|' r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') wordop = r'seg|wrt|strict' type = r'byte|[dq]?word' directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' r'COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE') flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ include('whitespace'), (r'^\s*%', Comment.Preproc, 'preproc'), (identifier + ':', Name.Label), (directives, Keyword, 'instruction-args'), (r'(%s)\s+(equ)' % identifier, bygroups(Name.Constant, Keyword.Declaration), 'instruction-args'), (declkw, Keyword.Declaration, 'instruction-args'), (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text) ], 'instruction-args': [(string, String), (hexn, Number.Hex), (octn, Number.Oct), (binn, Number), (floatn, Number.Float), (decn, Number.Integer), include('punctuation'), (register, Name.Builtin), (identifier, Name.Variable), (r'[\r\n]+', Text, '#pop'), include('whitespace')], 'preproc': [ (r'[^;\n]+', Comment.Preproc), (r';.*?\n', Comment.Single, '#pop'), (r'\n', Comment.Preproc, '#pop'), ], 'whitespace': [(r'\n', Text), (r'[ \t]+', Text), (r';.*', Comment.Single)], 'punctuation': [(r'[,():\[\]]+', Punctuation), (r'[&|^<>+*/%~-]+', Operator), (r'[$]+', Keyword.Constant), (wordop, Operator.Word), (type, Keyword.Type)], }
class CheetahLexer(RegexLexer): """ Generic `cheetah templates`_ lexer. Code that isn't Cheetah markup is yielded as `Token.Other`. This also works for `spitfire templates`_ which use the same syntax. .. _cheetah templates: http://www.cheetahtemplate.org/ .. _spitfire templates: http://code.google.com/p/spitfire/ """ name = 'Cheetah' aliases = ['cheetah', 'spitfire'] filenames = ['*.tmpl', '*.spt'] mimetypes = ['application/x-cheetah', 'application/x-spitfire'] tokens = { 'root': [ (r'(##[^\n]*)$', (bygroups(Comment))), (r'#[*](.|\n)*?[*]#', Comment), (r'#end[^#\n]*(?:#|$)', Comment.Preproc), (r'#slurp$', Comment.Preproc), (r'(#[a-zA-Z]+)([^#\n]*)(#|$)', (bygroups(Comment.Preproc, using(CheetahPythonLexer), Comment.Preproc))), # TODO support other Python syntax like $foo['bar'] (r'(\$)([a-zA-Z_][a-zA-Z0-9_\.]*[a-zA-Z0-9_])', bygroups(Comment.Preproc, using(CheetahPythonLexer))), (r'(\$\{!?)(.*?)(\})(?s)', bygroups(Comment.Preproc, using(CheetahPythonLexer), Comment.Preproc)), (r'''(?sx) (.+?) # anything, followed by: (?: (?=[#][#a-zA-Z]*) | # an eval comment (?=\$[a-zA-Z_{]) | # a substitution \Z # end of string ) ''', Other), (r'\s+', Text), ], }
class MyghtyLexer(RegexLexer): """ Generic `myghty templates`_ lexer. Code that isn't Myghty markup is yielded as `Token.Other`. *New in Pygments 0.6.* .. _myghty templates: http://www.myghty.org/ """ name = 'Myghty' aliases = ['myghty'] filenames = ['*.myt', 'autodelegate'] mimetypes = ['application/x-myghty'] tokens = { 'root': [ (r'\s+', Text), (r'(<%(def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', bygroups(Name.Tag, None, Text, Name.Function, Name.Tag, using(this), Name.Tag)), (r'(<%(\w+))(.*?)(>)(.*?)(</%\2\s*>)(?s)', bygroups(Name.Tag, None, Name.Function, Name.Tag, using(PythonLexer), Name.Tag)), (r'(<&[^|])(.*?)(,.*?)?(&>)', bygroups(Name.Tag, Name.Function, using(PythonLexer), Name.Tag)), (r'(<&\|)(.*?)(,.*?)?(&>)(?s)', bygroups(Name.Tag, Name.Function, using(PythonLexer), Name.Tag)), (r'</&>', Name.Tag), (r'(<%!?)(.*?)(%>)(?s)', bygroups(Name.Tag, using(PythonLexer), Name.Tag)), (r'(?<=^)#[^\n]*(\n|\Z)', Comment), (r'(?<=^)(%)([^\n]*)(\n|\Z)', bygroups(Name.Tag, using(PythonLexer), Other)), (r"""(?sx) (.+?) # anything, followed by: (?: (?<=\n)(?=[%#]) | # an eval or comment line (?=</?[%&]) | # a substitution or block or # call start or end # - don't consume (\\\n) | # an escaped newline \Z # end of string )""", bygroups(Other, Operator)), ] }
class MxmlLexer(RegexLexer): """ For MXML markup. Nested AS3 in <script> tags is highlighted by the appropriate lexer. """ flags = re.MULTILINE | re.DOTALL name = 'MXML' aliases = ['mxml'] filenames = ['*.mxml'] mimetimes = ['text/xml', 'application/xml'] tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'(\<\!\[CDATA\[)(.*?)(\]\]\>)', bygroups(String, using(ActionScript3Lexer), String)), ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'), (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ ('\s+', Text), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }
class CommonLispLexer(RegexLexer): """ A Common Lisp lexer. *New in Pygments 0.9.* """ name = 'Common Lisp' aliases = ['common-lisp', 'cl'] filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too mimetypes = ['text/x-common-lisp'] flags = re.IGNORECASE | re.MULTILINE ### couple of useful regexes # characters that are not macro-characters and can be used to begin a symbol nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]' constituent = nonmacro + '|[#.:]' terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters ### symbol token, reverse-engineered from hyperspec # Take a deep breath... symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent) def __init__(self, **options): from pygments3.lexers._clbuiltins import BUILTIN_FUNCTIONS, \ SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \ BUILTIN_TYPES, BUILTIN_CLASSES self.builtin_function = BUILTIN_FUNCTIONS self.special_forms = SPECIAL_FORMS self.macros = MACROS self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS self.declarations = DECLARATIONS self.builtin_types = BUILTIN_TYPES self.builtin_classes = BUILTIN_CLASSES RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Variable: if value in self.builtin_function: yield index, Name.Builtin, value continue if value in self.special_forms: yield index, Keyword, value continue if value in self.macros: yield index, Name.Builtin, value continue if value in self.lambda_list_keywords: yield index, Keyword, value continue if value in self.declarations: yield index, Keyword, value continue if value in self.builtin_types: yield index, Keyword.Type, value continue if value in self.builtin_classes: yield index, Name.Class, value continue yield index, token, value tokens = { 'root' : [ ('', Text, 'body'), ], 'multiline-comment' : [ (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19) (r'\|#', Comment.Multiline, '#pop'), (r'[^|#]+', Comment.Multiline), (r'[|#]', Comment.Multiline), ], 'commented-form' : [ (r'\(', Comment.Preproc, '#push'), (r'\)', Comment.Preproc, '#pop'), (r'[^()]+', Comment.Preproc), ], 'body' : [ # whitespace (r'\s+', Text), # single-line comment (r';.*$', Comment.Single), # multi-line comment (r'#\|', Comment.Multiline, 'multiline-comment'), # encoding comment (?) (r'#\d*Y.*$', Comment.Special), # strings and characters (r'"(\\.|[^"])*"', String), # quoting (r":" + symbol, String.Symbol), (r"'" + symbol, String.Symbol), (r"'", Operator), (r"`", Operator), # decimal numbers (r'[-+]?\d+\.?' + terminated, Number.Integer), (r'[-+]?\d+/\d+' + terminated, Number), (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \ + terminated, Number.Float), # sharpsign strings and characters (r"#\\." + terminated, String.Char), (r"#\\" + symbol, String.Char), # vector (r'#\(', Operator, 'body'), # bitstring (r'#\d*\*[01]*', Literal.Other), # uninterned symbol (r'#:' + symbol, String.Symbol), # read-time and load-time evaluation (r'#[.,]', Operator), # function shorthand (r'#\'', Name.Function), # binary rational (r'#[bB][+-]?[01]+(/[01]+)?', Number), # octal rational (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct), # hex rational (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex), # radix rational (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number), # complex (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'), # array (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'), # structure (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'), # path (r'#[pP]?"(\\.|[^"])*"', Literal.Other), # reference (r'#\d+=', Operator), (r'#\d+#', Operator), # read-time comment (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'), # read-time conditional (r'#[+-]', Operator), # special operators that should have been parsed already (r'(,@|,|\.)', Operator), # special constants (r'(t|nil)' + terminated, Name.Constant), # functions and variables (r'\*' + symbol + '\*', Name.Variable.Global), (symbol, Name.Variable), # parentheses (r'\(', Punctuation, 'body'), (r'\)', Punctuation, '#pop'), ], }
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. *New in Pygments 0.8.* """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' + '[^' + _escape( uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc')) + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in list(levels.items()): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment), (r'/[*](.|\n)*?[*]/', Comment), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(\\\\|\\"|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|base|break|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|lock|new|null|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [(cs_ident, Name.Class, '#pop')], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class ActionScript3Lexer(RegexLexer): """ For ActionScript 3 source code. *New in Pygments 0.11.* """ name = 'ActionScript 3' aliases = ['as3', 'actionscript3'] filenames = ['*.as'] mimetypes = [ 'application/x-actionscript', 'text/x-actionscript', 'text/actionscript' ] identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*' flags = re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'\s+', Text), (r'(function\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword.Declaration, Name.Function, Text, Operator), 'funcparams'), (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r')', bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text, Keyword.Type)), (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)', bygroups(Keyword, Text, Name.Namespace, Text)), (r'(new)(\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex), (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' r'switch|import|include|as|is)\b', Keyword), (r'(class|public|final|internal|native|override|private|protected|' r'static|import|extends|implements|interface|intrinsic|return|super|' r'dynamic|function|const|get|namespace|package|set)\b', Keyword.Declaration), (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b', Keyword.Constant), (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|' r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|' r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|' r'unescape)\b', Name.Function), (identifier, Name), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'[~\^\*!%&<>\|+=:;,/?\\{}\[\]();.-]+', Operator), ], 'funcparams': [(r'\s+', Text), (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r'|\*)(\s*)', bygroups(Text, Punctuation, Name, Text, Operator, Text, Keyword.Type, Text), 'defval'), (r'\)', Operator, 'type')], 'type': [(r'(\s*)(:)(\s*)(' + identifier + r'|\*)', bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'), (r'\s*', Text, '#pop:2')], 'defval': [(r'(=)(\s*)([^(),]+)(\s*)(,?)', bygroups(Operator, Text, using(this), Text, Operator), '#pop'), (r',?', Operator, '#pop')] } def analyse_text(text): if re.match(r'\w+\s*:\s*\w', text): return 0.3 return 0.1
class PhpLexer(RegexLexer): """ For `PHP <http://www.php.net/>`_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``<?php`` required). The default is ``False``. `funcnamehighlighting` If given and ``True``, highlight builtin function names (default: ``True``). `disabledmodules` If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted except the special ``'unknown'`` module that includes functions that are known to php but are undocumented. To get a list of allowed modules have a look into the `_phpbuiltins` module: .. sourcecode:: pycon >>> from pygments3.lexers._phpbuiltins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]'] mimetypes = ['text/x-php'] flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other)], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String), (r'\s+', Text), (r'#.*?\n', Comment), (r'//.*?\n', Comment), (r'/\*\*/', Comment), # put the empty comment here, it is otherwise # seen as the start of a docstring (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment), (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/?@-]+', Operator), (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|__FILE__|' r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|' r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this)\b', Keyword), ('(true|false|null)\b', Keyword.Constant), (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable), (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'classname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'functionname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')], 'string': [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]+', String.Double)], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt(options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt(options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from pygments3.lexers._phpbuiltins import MODULES for key, value in MODULES.items(): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 if '?>' in text: rv += 0.1 return rv
class MakoLexer(RegexLexer): """ Generic `mako templates`_ lexer. Code that isn't Mako markup is yielded as `Token.Other`. *New in Pygments 0.7.* .. _mako templates: http://www.makotemplates.org/ """ name = 'Mako' aliases = ['mako'] filenames = ['*.mao'] mimetypes = ['application/x-mako'] tokens = { 'root': [ (r'(\s*)(%)(\s*end(?:\w+))(\n|\Z)', bygroups(Text, Comment.Preproc, Keyword, Other)), (r'(\s*)(%)([^\n]*)(\n|\Z)', bygroups(Text, Comment.Preproc, using(PythonLexer), Other)), (r'(\s*)(##[^\n]*)(\n|\Z)', bygroups(Text, Comment.Preproc, Other)), (r'(?s)<%doc>.*?</%doc>', Comment.Preproc), (r'(<%)([\w\.\:]+)', bygroups(Comment.Preproc, Name.Builtin), 'tag'), (r'(</%)([\w\.\:]+)(>)', bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc)), (r'<%(?=([\w\.\:]+))', Comment.Preproc, 'ondeftags'), (r'(<%(?:!?))(.*?)(%>)(?s)', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'(\$\{)(.*?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'''(?sx) (.+?) # anything, followed by: (?: (?<=\n)(?=%|\#\#) | # an eval or comment line (?=\#\*) | # multiline comment (?=</?%) | # a python block # call start or end (?=\$\{) | # a substitution (?<=\n)(?=\s*%) | # - don't consume (\\\n) | # an escaped newline \Z # end of string ) ''', bygroups(Other, Operator)), (r'\s+', Text), ], 'ondeftags': [ (r'<%', Comment.Preproc), (r'(?<=<%)(include|inherit|namespace|page)', Name.Builtin), include('tag'), ], 'tag': [ (r'((?:\w+)\s*=)\s*(".*?")', bygroups(Name.Attribute, String)), (r'/?\s*>', Comment.Preproc, '#pop'), (r'\s+', Text), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }
class VbNetLexer(RegexLexer): """ For `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_ source code. """ name = 'VB.net' aliases = ['vb.net', 'vbnet'] filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^\s*<.*?>', Name.Attribute), (r'\s+', Text), (r'\n', Text), (r'rem\b.*?\n', Comment), (r"'.*?\n", Comment), (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#End\s+If|#Const|' r'#ExternalSource.*?\n|#End\s+ExternalSource|' r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc), (r'[\(\){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), (r'(?<!\.)(AddHandler|Alias|' r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|' r'CDec|CDbl|CInt|CLng|CObj|Const|Continue|CSByte|CShort|' r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|' r'Default|Delegate|Dim|DirectCast|Do|Each|Else|ElseIf|' r'End|EndIf|Enum|Erase|Error|Event|Exit|False|Finally|For|' r'Friend|Function|Get|Global|GoSub|GoTo|Handles|If|' r'Implements|Imports|Inherits|Interface|' r'Let|Lib|Loop|Me|Module|MustInherit|' r'MustOverride|MyBase|MyClass|Namespace|Narrowing|New|Next|' r'Not|Nothing|NotInheritable|NotOverridable|Of|On|' r'Operator|Option|Optional|Overloads|Overridable|' r'Overrides|ParamArray|Partial|Private|Property|Protected|' r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|' r'Return|Select|Set|Shadows|Shared|Single|' r'Static|Step|Stop|Structure|Sub|SyncLock|Then|' r'Throw|To|True|Try|TryCast|Wend|' r'Using|When|While|Widening|With|WithEvents|' r'WriteOnly)\b', Keyword), (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(?<!\.)(Namespace|Imports)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|' r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|' r'UShort)\b', Keyword.Type), (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|' r'Or|OrElse|TypeOf|Xor)\b', Operator.Word), (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|' r'<=|>=|<>|[-&*/\\^+=<>]', Operator), ('"', String, 'string'), ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'_\n', Text), # Line continuation ], 'string': [ (r'""', String), (r'"C?', String, '#pop'), (r'[^"]+', String), ], 'funcname': [(r'[a-z_][a-z0-9_]*', Name.Function, '#pop')], 'classname': [(r'[a-z_][a-z0-9_]*', Name.Class, '#pop')], 'namespace': [(r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop')], }
class HaskellLexer(RegexLexer): """ A Haskell lexer based on the lexemes defined in the Haskell 98 Report. *New in Pygments 0.8.* """ name = 'Haskell' aliases = ['haskell', 'hs'] filenames = ['*.hs'] mimetypes = ['text/x-haskell'] reserved = ['case','class','data','default','deriving','do','else', 'if','in','infix[lr]?','instance', 'let','newtype','of','then','type','where','_'] ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK', 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE', 'DC[1-4]','NAK','SYN','ETB','CAN', 'EM','SUB','ESC','[FGRU]S','SP','DEL'] tokens = { 'root': [ # Whitespace: (r'\s+', Text), #(r'--\s*|.*$', Comment.Doc), (r'--.*$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), # Lexemes: # Identifiers (r'\bimport\b', Keyword.Reserved, 'import'), (r'\bmodule\b', Keyword.Reserved, 'module'), (r'\berror\b', Name.Exception), (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), (r'^[_a-z][\w\']*', Name.Function), (r'[_a-z][\w\']*', Name), (r'[A-Z][\w\']*', Keyword.Type), # Operators (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators # Numbers (r'\d+[eE][+-]?\d+', Number.Float), (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), (r'0[oO][0-7]+', Number.Oct), (r'0[xX][\da-fA-F]+', Number.Hex), (r'\d+', Number.Integer), # Character/String Literals (r"'", String.Char, 'character'), (r'"', String, 'string'), # Special (r'\[\]', Keyword.Type), (r'\(\)', Name.Builtin), (r'[][(),;`{}]', Punctuation), ], 'import': [ # Import statements (r'\s+', Text), # after "funclist" state (r'\)', Punctuation, '#pop'), (r'qualified\b', Keyword), # import X as Y (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), # import X hiding (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), # import X (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), # import X (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), ], 'module': [ (r'\s+', Text), (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), ], 'funclist': [ (r'\s+', Text), (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), (r'[_a-z][\w\']+', Name.Function), (r'--.*$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), (r',', Punctuation), (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # (HACK, but it makes sense to push two instances, believe me) (r'\(', Punctuation, ('funclist', 'funclist')), (r'\)', Punctuation, '#pop:2'), ], 'comment': [ # Multiline Comments (r'[^-{}]+', Comment.Multiline), (r'{-', Comment.Multiline, '#push'), (r'-}', Comment.Multiline, '#pop'), (r'[-{}]', Comment.Multiline), ], 'character': [ # Allows multi-chars, incorrectly. (r"[^\\']", String.Char), (r"\\", String.Escape, 'escape'), ("'", String.Char, '#pop'), ], 'string': [ (r'[^\\"]+', String), (r"\\", String.Escape, 'escape'), ('"', String, '#pop'), ], 'escape': [ (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), (r'\^[][A-Z@\^_]', String.Escape, '#pop'), ('|'.join(ascii), String.Escape, '#pop'), (r'o[0-7]+', String.Escape, '#pop'), (r'x[\da-fA-F]+', String.Escape, '#pop'), (r'\d+', String.Escape, '#pop'), (r'\n\s+\\', String.Escape, '#pop'), ], }
class MatlabLexer(RegexLexer): """ For Matlab (or GNU Octave) source code. Contributed by Ken Schutte <*****@*****.**>. *New in Pygments 0.10.* """ name = 'Matlab' aliases = ['matlab', 'octave'] filenames = ['*.m'] mimetypes = ['text/matlab'] # # These lists are generated automatically. # Run the following in bash shell: # # for f in elfun specfun elmat; do # echo -n "$f = " # matlab -nojvm -r "help $f;exit;" | perl -ne \ # 'push(@c,$1) if /^ (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}' # done # # elfun: Elementary math functions # specfun: Special Math functions # elmat: Elementary matrices and matrix manipulation # # taken from Matlab version 7.4.0.336 (R2007a) # elfun = [ "sin", "sind", "sinh", "asin", "asind", "asinh", "cos", "cosd", "cosh", "acos", "acosd", "acosh", "tan", "tand", "tanh", "atan", "atand", "atan2", "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd", "csch", "acsc", "acscd", "acsch", "cot", "cotd", "coth", "acot", "acotd", "acoth", "hypot", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2", "realpow", "reallog", "realsqrt", "sqrt", "nthroot", "nextpow2", "abs", "angle", "complex", "conj", "imag", "real", "unwrap", "isreal", "cplxpair", "fix", "floor", "ceil", "round", "mod", "rem", "sign" ] specfun = [ "airy", "besselj", "bessely", "besselh", "besseli", "besselk", "beta", "betainc", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx", "erfinv", "expint", "gamma", "gammainc", "gammaln", "psi", "legendre", "cross", "dot", "factor", "isprime", "primes", "gcd", "lcm", "rat", "rats", "perms", "nchoosek", "factorial", "cart2sph", "cart2pol", "pol2cart", "sph2cart", "hsv2rgb", "rgb2hsv" ] elmat = [ "zeros", "ones", "eye", "repmat", "rand", "randn", "linspace", "logspace", "freqspace", "meshgrid", "accumarray", "size", "length", "ndims", "numel", "disp", "isempty", "isequal", "isequalwithequalnans", "cat", "reshape", "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flipdim", "rot90", "find", "end", "sub2ind", "ind2sub", "bsxfun", "ndgrid", "permute", "ipermute", "shiftdim", "circshift", "squeeze", "isscalar", "isvector", "ans", "eps", "realmax", "realmin", "pi", "i", "inf", "nan", "isnan", "isinf", "isfinite", "j", "why", "compan", "gallery", "hadamard", "hankel", "hilb", "invhilb", "magic", "pascal", "rosser", "toeplitz", "vander", "wilkinson" ] tokens = { 'root': [ # line starting with '!' is sent as a system command. not sure what # label to use... (r'^!.*', String.Other), (r'%.*$', Comment), (r'^\s*function', Keyword, 'deffunc'), # from 'iskeyword' on version 7.4.0.336 (R2007a): (r'(break|case|catch|classdef|continue|else|elseif|end|for|function|' r'global|if|otherwise|parfor|persistent|return|switch|try|while)\b', Keyword), ("(" + "|".join(elfun + specfun + elmat) + r')\b', Name.Builtin), # operators: (r'-|==|~=|<|>|<=|>=|&&|&|~', Operator), # operators requiring escape for re: (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), # punctuation: (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), (r'=|:|;', Punctuation), # quote can be transpose, instead of string: # (not great, but handles common cases...) (r'([\w\)\]]+)(\')', bygroups(Text, Operator)), (r'\'', String, 'string'), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'.', Text), ], 'string': [(r'[^\']*\'', String, '#pop')], 'deffunc': [ (r'(\s*)(.+)(\s*)(=)(\s*)(.+)(\()(.*)(\))(\s*)', bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation, Text.Whitespace, Name.Function, Punctuation, Text, Punctuation, Text.Whitespace), '#pop'), ], } def analyse_text(text): if re.match('^\s*%', text, re.M): # comment return 0.9 elif re.match('^!\w+', text, re.M): # system cmd return 0.9 return 0.1
class MuPADLexer(RegexLexer): """ A `MuPAD <http://www.mupad.com>`_ lexer. Contributed by Christopher Creutzig <*****@*****.**>. *New in Pygments 0.8.* """ name = 'MuPAD' aliases = ['mupad'] filenames = ['*.mu'] tokens = { 'root': [ (r'//.*?$', Comment.Single), (r'/\*', Comment.Multiline, 'comment'), (r'"(?:[^"\\]|\\.)*"', String), (r'\(|\)|\[|\]|\{|\}', Punctuation), (r'''(?x)\b(?: next|break|end| axiom|end_axiom|category|end_category|domain|end_domain|inherits| if|%if|then|elif|else|end_if| case|of|do|otherwise|end_case| while|end_while| repeat|until|end_repeat| for|from|to|downto|step|end_for| proc|local|option|save|begin|end_proc| delete|frame )\b''', Keyword), (r'''(?x)\b(?: DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR| DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT| DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC| DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR )\b''', Name.Class), (r'''(?x)\b(?: PI|EULER|E|CATALAN| NIL|FAIL|undefined|infinity| TRUE|FALSE|UNKNOWN )\b''', Name.Constant), (r'\b(?:dom|procname)\b', Name.Builtin.Pseudo), (r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator), (r'''(?x)\b(?: and|or|not|xor| assuming| div|mod| union|minus|intersect|in|subset )\b''', Operator.Word), (r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number), #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin), (r'''(?x) ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''', bygroups(Name.Function, Punctuation)), (r'''(?x) (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable), (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number), (r'\.[0-9]+(?:e[0-9]+)?', Number), (r'.', Text) ], 'comment': [(r'[^*/]', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline)] }
class ErlangLexer(RegexLexer): """ For the Erlang functional programming language. Blame Jeremy Thurgood (http://jerith.za.net/). *New in Pygments 0.9.* """ name = 'Erlang' aliases = ['erlang'] filenames = ['*.erl', '*.hrl'] mimetypes = ['text/x-erlang'] keywords = [ 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive', 'try', 'when', ] builtins = [ # See erlang(3) man page 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 'float', 'float_to_list', 'fun_info', 'fun_to_list', 'function_exported', 'garbage_collect', 'get', 'get_keys', 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 'pid_to_list', 'port_close', 'port_command', 'port_connect', 'port_control', 'port_call', 'port_info', 'port_to_list', 'process_display', 'process_flag', 'process_info', 'purge_module', 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 'spawn_opt', 'split_binary', 'start_timer', 'statistics', 'suspend_process', 'system_flag', 'system_info', 'system_monitor', 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' ] operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)' word_operators = [ 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor' ] atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')" variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' tokens = { 'root': [ (r'\s+', Text), (r'%.*\n', Comment), ('(' + '|'.join(keywords) + r')\b', Keyword), ('(' + '|'.join(builtins) + r')\b', Name.Builtin), ('(' + '|'.join(word_operators) + r')\b', Operator.Word), (r'^-', Punctuation, 'directive'), (operators, Operator), (r'"', String, 'string'), (r'<<', Name.Label), (r'>>', Name.Label), (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)), (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer), (r'[+-]?\d+', Number.Integer), (r'[+-]?\d+.\d+', Number.Float), (r'[][:_@\".{}()|;,]', Punctuation), (variable_re, Name.Variable), (atom_re, Name), (r'\?'+macro_re, Name.Constant), (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), ], 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], 'directive': [ (r'(define)(\s*)(\()('+macro_re+r')', bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), (r'(record)(\s*)(\()('+macro_re+r')', bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), (atom_re, Name.Entity, '#pop'), ], }
class DjangoLexer(RegexLexer): """ Generic `django <http://www.djangoproject.com/documentation/templates/>`_ and `jinja <http://wsgiarea.pocoo.org/jinja/>`_ template lexer. It just highlights django/jinja code between the preprocessor directives, other data is left untouched by the lexer. """ name = 'Django/Jinja' aliases = ['django', 'jinja'] mimetypes = ['application/x-django-templating', 'application/x-jinja'] flags = re.M | re.S tokens = { 'root': [ (r'[^{]+', Other), (r'\{\{', Comment.Preproc, 'var'), # jinja/django comments (r'\{[*#].*?[*#]\}', Comment), # django comments (r'(\{%)(-?\s*)(comment)(\s*-?)(%\})(.*?)' r'(\{%)(-?\s*)(endcomment)(\s*-?)(%\})', bygroups(Comment.Preproc, Text, Keyword, Text, Comment.Preproc, Comment, Comment.Preproc, Text, Keyword, Text, Comment.Preproc)), # raw jinja blocks (r'(\{%)(-?\s*)(raw)(\s*-?)(%\})(.*?)' r'(\{%)(-?\s*)(endraw)(\s*-?)(%\})', bygroups(Comment.Preproc, Text, Keyword, Text, Comment.Preproc, Text, Comment.Preproc, Text, Keyword, Text, Comment.Preproc)), # filter blocks (r'(\{%)(-?\s*)(filter)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Comment.Preproc, Text, Keyword, Text, Name.Function), 'block'), (r'(\{%)(-?\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Comment.Preproc, Text, Keyword), 'block'), (r'\{', Other) ], 'varnames': [ (r'(\|)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Text, Name.Function)), (r'(is)(\s+)(not)?(\s+)?([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Keyword, Text, Name.Function)), (r'(_|true|false|none|True|False|None)\b', Keyword.Pseudo), (r'(in|as|reversed|recursive|not|and|or|is|if|else|import|' r'with(?:(?:out)?\s*context)?)\b', Keyword), (r'(loop|block|super|forloop)\b', Name.Builtin), (r'[a-zA-Z][a-zA-Z0-9_]*', Name.Variable), (r'\.[a-zA-Z0-9_]+', Name.Variable), (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), (r'([{}()\[\]+\-*/,:]|[><=]=?)', Operator), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), ], 'var': [(r'\s+', Text), (r'(-?)(\}\})', bygroups(Text, Comment.Preproc), '#pop'), include('varnames')], 'block': [(r'\s+', Text), (r'(-?)(%\})', bygroups(Text, Comment.Preproc), '#pop'), include('varnames'), (r'.', Punctuation)] } def analyse_text(text): rv = 0.0 if re.search(r'\{%\s*(block|extends)', text) is not None: rv += 0.4 if re.search(r'\{%\s*if\s*.*?%\}', text) is not None: rv += 0.1 if re.search(r'\{\{.*?\}\}', text) is not None: rv += 0.1 return rv