class GasLexer(RegexLexer): """ For Gas (AT&T) assembly code. """ name = 'GAS' aliases = ['gas'] filenames = ['*.s', '*.S'] mimetypes = ['text/x-gas'] #: optional Comment or Whitespace string = r'"(\\"|[^"])*"' char = r'[a-zA-Z$._0-9@]' identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' tokens = { 'root': [ include('whitespace'), (identifier + ':', Name.Label), (r'\.' + identifier, Name.Attribute, 'directive-args'), (r'lock|rep(n?z)?|data\d+', Name.Attribute), (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text) ], 'directive-args': [(identifier, Name.Constant), (string, String), ('@' + identifier, Name.Attribute), (number, Number.Integer), (r'[\r\n]+', Text, '#pop'), (r'#.*?$', Comment, '#pop'), include('punctuation'), include('whitespace')], 'instruction-args': [ # For objdump-disassembled code, shouldn't occur in # actual assembler input ('([a-z0-9]+)( )(<)(' + identifier + ')(>)', bygroups(Number.Hex, Text, Punctuation, Name.Constant, Punctuation)), ('([a-z0-9]+)( )(<)(' + identifier + ')([-+])(' + number + ')(>)', bygroups(Number.Hex, Text, Punctuation, Name.Constant, Punctuation, Number.Integer, Punctuation)), # Address constants (identifier, Name.Constant), (number, Number.Integer), # Registers ('%' + identifier, Name.Variable), # Numeric constants ('$' + number, Number.Integer), (r'[\r\n]+', Text, '#pop'), (r'#.*?$', Comment, '#pop'), include('punctuation'), include('whitespace') ], 'whitespace': [(r'\n', Text), (r'\s+', Text), (r'#.*?\n', Comment)], 'punctuation': [(r'[-*,.():]+', Punctuation)] } def analyse_text(text): return re.match(r'^\.\w+', text, re.M)
class BooLexer(RegexLexer): """ For `Boo <http://boo.codehaus.org/>`_ source code. """ name = 'Boo' aliases = ['boo'] filenames = ['*.boo'] mimetypes = ['text/x-boo'] tokens = { 'root': [ (r'\s+', Text), (r'(#|//).*$', Comment.Single), (r'/[*]', Comment.Multiline, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'/(\\\\|\\/|[^/\s])/', String.Regex), (r'@/(\\\\|\\/|[^/])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' r'partial|private|protected|public|return|set|static|' r'struct|transient|virtual|yield|super|and|break|cast|' r'continue|elif|else|ensure|except|for|given|goto|if|in|' r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|' r'while|from|as)\b', Keyword), (r'def(?=\s+\(.*?\))', Keyword), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|' r'assert|checked|enumerate|filter|getter|len|lock|map|' r'matrix|max|min|normalArrayIndexing|print|property|range|' r'rawArrayIndexing|required|typeof|unchecked|using|' r'yieldAll|zip)\b', Name.Builtin), ('"""(\\\\|\\"|.*?)"""', String.Double), ('"(\\\\|\\"|[^"]*?)"', String.Double), ("'(\\\\|\\'|[^']*?)'", String.Single), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number), (r'0\d+', Number.Oct), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer), ], 'comment': [('/[*]', Comment.Multiline, '#push'), ('[*]/', Comment.Multiline, '#pop'), ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline)], 'funcname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')], 'classname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'namespace': [('[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop')] }
class ObjdumpLexer(RegexLexer): """ For the output of 'objdump -dr' """ name = 'objdump' aliases = ['objdump'] filenames = ['*.objdump'] mimetypes = ['text/x-objdump'] hex = r'[0-9A-Za-z]' tokens = { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('(' + hex + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('(' + hex + '+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(GasLexer))), # Code line with ascii ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes ('\t\.\.\.$', Text), # Relocation line # (With offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), ('[^\n]+\n', Other) ] }
class NasmLexer(RegexLexer): """ For Nasm (Intel) assembly code. """ name = 'NASM' aliases = ['nasm'] filenames = ['*.asm', '*.ASM'] mimetypes = ['text/x-nasm'] identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)' octn = r'[0-7]+q' binn = r'[01]+b' decn = r'[0-9]+' floatn = decn + r'\.e?' + decn string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'" declkw = r'(?:res|d)[bwdqt]|times' register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|' r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') wordop = r'seg|wrt|strict' type = r'byte|[dq]?word' directives = ( r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' r'EXPORT|LIBRARY|MODULE') flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ include('whitespace'), (r'^\s*%', Comment.Preproc, 'preproc'), (identifier + ':', Name.Label), (r'(%s)(\s+)(equ)' % identifier, bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), 'instruction-args'), (directives, Keyword, 'instruction-args'), (declkw, Keyword.Declaration, 'instruction-args'), (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text) ], 'instruction-args': [(string, String), (hexn, Number.Hex), (octn, Number.Oct), (binn, Number), (floatn, Number.Float), (decn, Number.Integer), include('punctuation'), (register, Name.Builtin), (identifier, Name.Variable), (r'[\r\n]+', Text, '#pop'), include('whitespace')], 'preproc': [ (r'[^;\n]+', Comment.Preproc), (r';.*?\n', Comment.Single, '#pop'), (r'\n', Comment.Preproc, '#pop'), ], 'whitespace': [(r'\n', Text), (r'[ \t]+', Text), (r';.*', Comment.Single)], 'punctuation': [(r'[,():\[\]]+', Punctuation), (r'[&|^<>+*/%~-]+', Operator), (r'[$]+', Keyword.Constant), (wordop, Operator.Word), (type, Keyword.Type)], }
class GenericAspxLexer(RegexLexer): """ Lexer for ASP.NET pages. """ name = 'aspx-gen' filenames = [] mimetypes = [] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer), Other, using(XmlLexer))), (r'(.+?)(?=<)', using(XmlLexer)), (r'.+', using(XmlLexer)), ], }
class MatlabLexer(RegexLexer): """ For Matlab (or GNU Octave) source code. Contributed by Ken Schutte <*****@*****.**>. *New in Pygments 0.10.* """ name = 'Matlab' aliases = ['matlab', 'octave'] filenames = ['*.m'] mimetypes = ['text/matlab'] # # These lists are generated automatically. # Run the following in bash shell: # # for f in elfun specfun elmat; do # echo -n "$f = " # matlab -nojvm -r "help $f;exit;" | perl -ne \ # 'push(@c,$1) if /^ (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}' # done # # elfun: Elementary math functions # specfun: Special Math functions # elmat: Elementary matrices and matrix manipulation # # taken from Matlab version 7.4.0.336 (R2007a) # elfun = ["sin","sind","sinh","asin","asind","asinh","cos","cosd","cosh", "acos","acosd","acosh","tan","tand","tanh","atan","atand","atan2", "atanh","sec","secd","sech","asec","asecd","asech","csc","cscd", "csch","acsc","acscd","acsch","cot","cotd","coth","acot","acotd", "acoth","hypot","exp","expm1","log","log1p","log10","log2","pow2", "realpow","reallog","realsqrt","sqrt","nthroot","nextpow2","abs", "angle","complex","conj","imag","real","unwrap","isreal","cplxpair", "fix","floor","ceil","round","mod","rem","sign"] specfun = ["airy","besselj","bessely","besselh","besseli","besselk","beta", "betainc","betaln","ellipj","ellipke","erf","erfc","erfcx", "erfinv","expint","gamma","gammainc","gammaln","psi","legendre", "cross","dot","factor","isprime","primes","gcd","lcm","rat", "rats","perms","nchoosek","factorial","cart2sph","cart2pol", "pol2cart","sph2cart","hsv2rgb","rgb2hsv"] elmat = ["zeros","ones","eye","repmat","rand","randn","linspace","logspace", "freqspace","meshgrid","accumarray","size","length","ndims","numel", "disp","isempty","isequal","isequalwithequalnans","cat","reshape", "diag","blkdiag","tril","triu","fliplr","flipud","flipdim","rot90", "find","end","sub2ind","ind2sub","bsxfun","ndgrid","permute", "ipermute","shiftdim","circshift","squeeze","isscalar","isvector", "ans","eps","realmax","realmin","pi","i","inf","nan","isnan", "isinf","isfinite","j","why","compan","gallery","hadamard","hankel", "hilb","invhilb","magic","pascal","rosser","toeplitz","vander", "wilkinson"] tokens = { 'root': [ # line starting with '!' is sent as a system command. not sure what # label to use... (r'^!.*', String.Other), (r'%.*$', Comment), (r'^\s*function', Keyword, 'deffunc'), # from 'iskeyword' on version 7.11 (R2010): (r'(break|case|catch|classdef|continue|else|elseif|end|enumerated|' r'events|for|function|global|if|methods|otherwise|parfor|' r'persistent|properties|return|spmd|switch|try|while)\b', Keyword), ("(" + "|".join(elfun+specfun+elmat) + r')\b', Name.Builtin), # operators: (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator), # operators requiring escape for re: (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), # punctuation: (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), (r'=|:|;', Punctuation), # quote can be transpose, instead of string: # (not great, but handles common cases...) (r'(?<=[\w\)\]])\'', Operator), (r'(?<![\w\)\]])\'', String, 'string'), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'.', Text), ], 'string': [ (r'[^\']*\'', String, '#pop') ], 'deffunc': [ (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)', bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation, Text.Whitespace, Name.Function, Punctuation, Text, Punctuation, Text.Whitespace), '#pop'), ], } def analyse_text(text): if re.match('^\s*%', text, re.M): # comment return 0.9 elif re.match('^!\w+', text, re.M): # system cmd return 0.9 return 0.1
class MuPADLexer(RegexLexer): """ A `MuPAD <http://www.mupad.com>`_ lexer. Contributed by Christopher Creutzig <*****@*****.**>. *New in Pygments 0.8.* """ name = 'MuPAD' aliases = ['mupad'] filenames = ['*.mu'] tokens = { 'root' : [ (r'//.*?$', Comment.Single), (r'/\*', Comment.Multiline, 'comment'), (r'"(?:[^"\\]|\\.)*"', String), (r'\(|\)|\[|\]|\{|\}', Punctuation), (r'''(?x)\b(?: next|break|end| axiom|end_axiom|category|end_category|domain|end_domain|inherits| if|%if|then|elif|else|end_if| case|of|do|otherwise|end_case| while|end_while| repeat|until|end_repeat| for|from|to|downto|step|end_for| proc|local|option|save|begin|end_proc| delete|frame )\b''', Keyword), (r'''(?x)\b(?: DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR| DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT| DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC| DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR )\b''', Name.Class), (r'''(?x)\b(?: PI|EULER|E|CATALAN| NIL|FAIL|undefined|infinity| TRUE|FALSE|UNKNOWN )\b''', Name.Constant), (r'\b(?:dom|procname)\b', Name.Builtin.Pseudo), (r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator), (r'''(?x)\b(?: and|or|not|xor| assuming| div|mod| union|minus|intersect|in|subset )\b''', Operator.Word), (r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number), #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin), (r'''(?x) ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''', bygroups(Name.Function, Punctuation)), (r'''(?x) (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable), (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number), (r'\.[0-9]+(?:e[0-9]+)?', Number), (r'.', Text) ], 'comment' : [ (r'[^*/]', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline) ] }
class AntlrLexer(RegexLexer): """ Generic `ANTLR`_ Lexer. Should not be called directly, instead use DelegatingLexer for your target language. *New in Pygments 1.1.* .. _ANTLR: http://www.antlr.org/ """ name = 'ANTLR' aliases = ['antlr'] filenames = [] _id = r'[A-Za-z][A-Za-z_0-9]*' _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*' _RULE_REF = r'[a-z][A-Za-z_0-9]*' _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\'' _INT = r'[0-9]+' tokens = { 'whitespace': [ (r'\s+', Whitespace), ], 'comments': [ (r'//.*$', Comment), (r'/\*(.|\n)*?\*/', Comment), ], 'root': [ include('whitespace'), include('comments'), (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)', bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class, Punctuation)), # optionsSpec (r'options\b', Keyword, 'options'), # tokensSpec (r'tokens\b', Keyword, 'tokens'), # attrScope (r'(scope)(\s*)(' + _id + ')(\s*)({)', bygroups(Keyword, Whitespace, Name.Variable, Whitespace, Punctuation), 'action'), # exception (r'(catch|finally)\b', Keyword, 'exception'), # action (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, Name.Label, Whitespace, Punctuation), 'action'), # rule (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \ bygroups(Keyword, Whitespace, Name.Label, Punctuation), ('rule-alts', 'rule-prelims')), ], 'exception': [ (r'\n', Whitespace, '#pop'), (r'\s', Whitespace), include('comments'), (r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), ], 'rule-prelims': [ include('whitespace'), include('comments'), (r'returns\b', Keyword), (r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), # throwsSpec (r'(throws)(\s+)(' + _id + ')', bygroups(Keyword, Whitespace, Name.Label)), (r'(?:(,)(\s*)(' + _id + '))+', bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws # optionsSpec (r'options\b', Keyword, 'options'), # ruleScopeSpec - scope followed by target language code or name of action # TODO finish implementing other possibilities for scope # L173 ANTLRv3.g from ANTLR book (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation), 'action'), (r'(scope)(\s+)(' + _id + ')(\s*)(;)', bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)), # ruleAction (r'(@' + _id + ')(\s*)({)', bygroups(Name.Label, Whitespace, Punctuation), 'action'), # finished prelims, go to rule alts! (r':', Punctuation, '#pop') ], 'rule-alts': [ include('whitespace'), include('comments'), # These might need to go in a separate 'block' state triggered by ( (r'options\b', Keyword, 'options'), (r':', Punctuation), # literals (r"'(\\\\|\\'|[^'])*'", String), (r'"(\\\\|\\"|[^"])*"', String), (r'<<([^>]|>[^>])>>', String), # identifiers # Tokens start with capital letter. (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant), # Rules start with small letter. (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable), # operators (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator), (r',', Punctuation), (r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), (r';', Punctuation, '#pop') ], 'tokens': [ include('whitespace'), include('comments'), (r'{', Punctuation), (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL + ')?(\s*)(;)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, String, Whitespace, Punctuation)), (r'}', Punctuation, '#pop'), ], 'options': [ include('whitespace'), include('comments'), (r'{', Punctuation), (r'(' + _id + r')(\s*)(=)(\s*)(' + '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)', bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, Text, Whitespace, Punctuation)), (r'}', Punctuation, '#pop'), ], 'action': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^\${}\'"/\\]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. r'/(?!\*)(\\\\|\\/|[^/])*/', # backslashes are okay, as long as we are not backslashing a % r'\\(?!%)', # Now that we've handled regex and javadoc comments # it's safe to let / through. r'/', )) + r')+', Other), (r'(\\)(%)', bygroups(Punctuation, Other)), (r'(\$[a-zA-Z]+)(\.?)(text|value)?', bygroups(Name.Variable, Punctuation, Name.Property)), (r'{', Punctuation, '#push'), (r'}', Punctuation, '#pop'), ], 'nested-arg-action': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks. r'[^\$\[\]\'"/]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. r'/(?!\*)(\\\\|\\/|[^/])*/', # Now that we've handled regex and javadoc comments # it's safe to let / through. r'/', )) + r')+', Other), (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), (r'(\$[a-zA-Z]+)(\.?)(text|value)?', bygroups(Name.Variable, Punctuation, Name.Property)), (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other), ] } def analyse_text(text): return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
class RagelEmbeddedLexer(RegexLexer): """ A lexer for `Ragel`_ embedded in a host language file. This will only highlight Ragel statements. If you want host language highlighting then call the language-specific Ragel lexer. *New in Pygments 1.1.* """ name = 'Embedded Ragel' aliases = ['ragel-em'] filenames = ['*.rl'] tokens = { 'root': [ ( r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^%\'"/#]+', # exclude unsafe characters r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression # / is safe now that we've handled regex and javadoc comments r'/', )) + r')+', Other), # Single Line FSM. # Please don't put a quoted newline in a single line FSM. # That's just mean. It will break this. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation, using(RagelLexer), Punctuation, Text)), # Multi Line FSM. (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'), ], 'multi-line-fsm': [ ( r'(' + r'|'.join(( # keep ragel code in largest possible chunks. r'(' + r'|'.join(( r'[^}\'"\[/#]', # exclude unsafe characters r'}(?=[^%]|$)', # } is okay as long as it's not followed by % r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two... r'[^\\][\\][{}]', # ...and } is okay if it's escaped # allow / if it's preceded with one of these symbols # (ragel EOF actions) r'(>|\$|%|<|@|<>)/', # specifically allow regex followed immediately by * # so it doesn't get mistaken for a comment r'/(?!\*)(\\\\|\\/|[^/])*/\*', # allow / as long as it's not followed by another / or by a * r'/(?=[^/\*]|$)', # We want to match as many of these as we can in one block. # Not sure if we need the + sign here, # does it help performance? )) + r')+', # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment )) + r')+', using(RagelLexer)), (r'}%%', Punctuation, '#pop'), ] } def analyse_text(text): return '@LANG: indep' in text or 0.1
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. *New in Pygments 0.8.* """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' + '[^' + _escape( uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc')) + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in levels.items(): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(\\\\|\\"|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|base|break|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|lock|new|null|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [(cs_ident, Name.Class, '#pop')], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class VbNetLexer(RegexLexer): """ For `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_ source code. """ name = 'VB.net' aliases = ['vb.net', 'vbnet'] filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^\s*<.*?>', Name.Attribute), (r'\s+', Text), (r'\n', Text), (r'rem\b.*?\n', Comment), (r"'.*?\n", Comment), (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#End\s+If|#Const|' r'#ExternalSource.*?\n|#End\s+ExternalSource|' r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc), (r'[\(\){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), (r'(?<!\.)(AddHandler|Alias|' r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|' r'CDec|CDbl|CInt|CLng|CObj|Continue|CSByte|CShort|' r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|' r'Default|Delegate|DirectCast|Do|Each|Else|ElseIf|' r'EndIf|Erase|Error|Event|Exit|False|Finally|For|' r'Friend|Get|Global|GoSub|GoTo|Handles|If|' r'Implements|Inherits|Interface|' r'Let|Lib|Loop|Me|MustInherit|' r'MustOverride|MyBase|MyClass|Narrowing|New|Next|' r'Not|Nothing|NotInheritable|NotOverridable|Of|On|' r'Operator|Option|Optional|Overloads|Overridable|' r'Overrides|ParamArray|Partial|Private|Protected|' r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|' r'Return|Select|Set|Shadows|Shared|Single|' r'Static|Step|Stop|SyncLock|Then|' r'Throw|To|True|Try|TryCast|Wend|' r'Using|When|While|Widening|With|WithEvents|' r'WriteOnly)\b', Keyword), (r'(?<!\.)End\b', Keyword, 'end'), (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'), (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(?<!\.)(Module|Namespace|Imports)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|' r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|' r'UShort)\b', Keyword.Type), (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|' r'Or|OrElse|TypeOf|Xor)\b', Operator.Word), (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|' r'<=|>=|<>|[-&*/\\^+=<>]', Operator), ('"', String, 'string'), ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'_\n', Text), # Line continuation ], 'string': [ (r'""', String), (r'"C?', String, '#pop'), (r'[^"]+', String), ], 'dim': [ (r'[a-z_][a-z0-9_]*', Name.Variable, '#pop'), (r'', Text, '#pop'), # any other syntax ], 'funcname': [ (r'[a-z_][a-z0-9_]*', Name.Function, '#pop'), ], 'classname': [ (r'[a-z_][a-z0-9_]*', Name.Class, '#pop'), ], 'namespace': [ (r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop'), ], 'end': [ (r'\s+', Text), (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b', Keyword, '#pop'), (r'', Text, '#pop'), ] }
class VerilogLexer(RegexLexer): """ For verilog source code with preprocessor directives. *New in Pygments 1.4.* """ name = 'verilog' aliases = ['v'] filenames = ['*.v', '*.sv'] mimetypes = ['text/x-verilog'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'root': [ (r'^\s*`define', Comment.Preproc, 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), (r'[{}#@]', Punctuation), (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'([0-9]+)|(\'h)[0-9a-fA-F]+', Number.Hex), (r'([0-9]+)|(\'b)[0-1]+', Number.Hex), # should be binary (r'([0-9]+)|(\'d)[0-9]+', Number.Integer), (r'([0-9]+)|(\'o)[0-7]+', Number.Oct), (r'\'[01xz]', Number), (r'\d+[Ll]?', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.;\']', Punctuation), (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), (r'^\s*(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'^\s*(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'(always|always_comb|always_ff|always_latch|and|assign|automatic|' r'begin|break|buf|bufif0|bufif1|case|casex|casez|cmos|const|' r'continue|deassign|default|defparam|disable|do|edge|else|end|endcase|' r'endfunction|endgenerate|endselectedModulee|endpackage|endprimitive|endspecify|' r'endtable|endtask|enum|event|final|for|force|forever|fork|function|' r'generate|genvar|highz0|highz1|if|initial|inout|input|' r'integer|join|large|localparam|macroselectedModulee|medium|selectedModulee|' r'nand|negedge|nmos|nor|not|notif0|notif1|or|output|packed|' r'parameter|pmos|posedge|primitive|pull0|pull1|pulldown|pullup|rcmos|' r'ref|release|repeat|return|rnmos|rpmos|rtran|rtranif0|' r'rtranif1|scalared|signed|small|specify|specparam|strength|' r'string|strong0|strong1|struct|table|task|' r'tran|tranif0|tranif1|type|typedef|' r'unsigned|var|vectored|void|wait|weak0|weak1|while|' r'xnor|xor)\b', Keyword), (r'(`accelerate|`autoexpand_vectornets|`celldefine|`default_nettype|' r'`else|`elsif|`endcelldefine|`endif|`endprotect|`endprotected|' r'`expand_vectornets|`ifdef|`ifndef|`include|`noaccelerate|`noexpand_vectornets|' r'`noremove_gatenames|`noremove_netnames|`nounconnected_drive|' r'`protect|`protected|`remove_gatenames|`remove_netnames|`resetall|' r'`timescale|`unconnected_drive|`undef)\b', Comment.Preproc), (r'(\$cits|\$citstoreal|\$citstoshortreal|\$countdrivers|\$display|\$fclose|' r'\$fdisplay|\$finish|\$floor|\$fmonitor|\$fopen|\$fstrobe|\$fwrite|' r'\$getpattern|\$history|\$incsave|\$input|\$itor|\$key|\$list|\$log|' r'\$monitor|\$monitoroff|\$monitoron|\$nokey|\$nolog|\$printtimescale|' r'\$random|\$readmemb|\$readmemh|\$realtime|\$realtobits|\$reset|\$reset_count|' r'\$reset_value|\$restart|\$rtoi|\$save|\$scale|\$scope|\$shortrealtobits|' r'\$showscopes|\$showvariables|\$showvars|\$sreadmemb|\$sreadmemh|' r'\$stime|\$stop|\$strobe|\$time|\$timeformat|\$write)\b', Name.Builtin), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(byte|shortint|int|longint|interger|time|' r'bit|logic|reg|' r'supply0|supply1|tri|triand|trior|tri0|tri1|trireg|uwire|wire|wand|wor' r'shortreal|real|realtime)\b', Keyword.Type), ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'classname': [ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'import': [(r'[a-zA-Z0-9_:]+\*?', Name.Namespace, '#pop')] } def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): # Convention: mark all upper case names as constants if token is Name: if value.isupper(): token = Name.Constant yield index, token, value
class ErlangLexer(RegexLexer): """ For the Erlang functional programming language. Blame Jeremy Thurgood (http://jerith.za.net/). *New in Pygments 0.9.* """ name = 'Erlang' aliases = ['erlang'] filenames = ['*.erl', '*.hrl'] mimetypes = ['text/x-erlang'] keywords = [ 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive', 'try', 'when', ] builtins = [ # See erlang(3) man page 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 'byte_size', 'cancel_timer', 'check_process_code', 'delete_selectedModulee', 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 'float', 'float_to_list', 'fun_info', 'fun_to_list', 'function_exported', 'garbage_collect', 'get', 'get_keys', 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 'load_selectedModulee', 'localtime_to_universaltime', 'make_tuple', 'md5', 'md5_final', 'md5_update', 'memory', 'selectedModulee_loaded', 'monitor', 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 'pid_to_list', 'port_close', 'port_command', 'port_connect', 'port_control', 'port_call', 'port_info', 'port_to_list', 'process_display', 'process_flag', 'process_info', 'purge_selectedModulee', 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 'spawn_opt', 'split_binary', 'start_timer', 'statistics', 'suspend_process', 'system_flag', 'system_info', 'system_monitor', 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' ] operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)' word_operators = [ 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor' ] atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')" variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' macro_re = r'(?:' + variable_re + r'|' + atom_re + r')' base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' tokens = { 'root': [ (r'\s+', Text), (r'%.*\n', Comment), ('(' + '|'.join(keywords) + r')\b', Keyword), ('(' + '|'.join(builtins) + r')\b', Name.Builtin), ('(' + '|'.join(word_operators) + r')\b', Operator.Word), (r'^-', Punctuation, 'directive'), (operators, Operator), (r'"', String, 'string'), (r'<<', Name.Label), (r'>>', Name.Label), (r'(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), (r'^(' + atom_re + r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), (r'[+-]?\d+', Number.Integer), (r'[+-]?\d+.\d+', Number.Float), (r'[]\[:_@\".{}()|;,]', Punctuation), (variable_re, Name.Variable), (atom_re, Name), (r'\?' + macro_re, Name.Constant), (r'\$(?:' + escape_re + r'|\\[ %]|[^\\])', String.Char), (r'#' + atom_re + r'(:?\.' + atom_re + r')?', Name.Label), ], 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], 'directive': [ (r'(define)(\s*)(\()(' + macro_re + r')', bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), (r'(record)(\s*)(\()(' + macro_re + r')', bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), (atom_re, Name.Entity, '#pop'), ], }
class HaskellLexer(RegexLexer): """ A Haskell lexer based on the lexemes defined in the Haskell 98 Report. *New in Pygments 0.8.* """ name = 'Haskell' aliases = ['haskell', 'hs'] filenames = ['*.hs'] mimetypes = ['text/x-haskell'] reserved = [ 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'in', 'infix[lr]?', 'instance', 'let', 'newtype', 'of', 'then', 'type', 'where', '_' ] ascii = [ 'NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL' ] tokens = { 'root': [ # Whitespace: (r'\s+', Text), #(r'--\s*|.*$', Comment.Doc), (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), # Lexemes: # Identifiers (r'\bimport\b', Keyword.Reserved, 'import'), (r'\bselectedModulee\b', Keyword.Reserved, 'selectedModulee'), (r'\berror\b', Name.Exception), (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), (r'^[_a-z][\w\']*', Name.Function), (r'[_a-z][\w\']*', Name), (r'[A-Z][\w\']*', Keyword.Type), # Operators (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function ), # lambda operator (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators # Numbers (r'\d+[eE][+-]?\d+', Number.Float), (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), (r'0[oO][0-7]+', Number.Oct), (r'0[xX][\da-fA-F]+', Number.Hex), (r'\d+', Number.Integer), # Character/String Literals (r"'", String.Char, 'character'), (r'"', String, 'string'), # Special (r'\[\]', Keyword.Type), (r'\(\)', Name.Builtin), (r'[][(),;`{}]', Punctuation), ], 'import': [ # Import statements (r'\s+', Text), (r'"', String, 'string'), # after "funclist" state (r'\)', Punctuation, '#pop'), (r'qualified\b', Keyword), # import X as Y (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), # import X hiding (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), # import X (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), # import X (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), ], 'selectedModulee': [ (r'\s+', Text), (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), ], 'funclist': [ (r'\s+', Text), (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), (r'--.*$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), (r',', Punctuation), (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # (HACK, but it makes sense to push two instances, believe me) (r'\(', Punctuation, ('funclist', 'funclist')), (r'\)', Punctuation, '#pop:2'), ], 'comment': [ # Multiline Comments (r'[^-{}]+', Comment.Multiline), (r'{-', Comment.Multiline, '#push'), (r'-}', Comment.Multiline, '#pop'), (r'[-{}]', Comment.Multiline), ], 'character': [ # Allows multi-chars, incorrectly. (r"[^\\']", String.Char), (r"\\", String.Escape, 'escape'), ("'", String.Char, '#pop'), ], 'string': [ (r'[^\\"]+', String), (r"\\", String.Escape, 'escape'), ('"', String, '#pop'), ], 'escape': [ (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), (r'\^[][A-Z@\^_]', String.Escape, '#pop'), ('|'.join(ascii), String.Escape, '#pop'), (r'o[0-7]+', String.Escape, '#pop'), (r'x[\da-fA-F]+', String.Escape, '#pop'), (r'\d+', String.Escape, '#pop'), (r'\s+\\', String.Escape, '#pop'), ], }
class CommonLispLexer(RegexLexer): """ A Common Lisp lexer. *New in Pygments 0.9.* """ name = 'Common Lisp' aliases = ['common-lisp', 'cl'] filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too mimetypes = ['text/x-common-lisp'] flags = re.IGNORECASE | re.MULTILINE ### couple of useful regexes # characters that are not macro-characters and can be used to begin a symbol nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]' constituent = nonmacro + '|[#.:]' terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters ### symbol token, reverse-engineered from hyperspec # Take a deep breath... symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent) def __init__(self, **options): from plushcms.syntaxhighlight.pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \ SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \ BUILTIN_TYPES, BUILTIN_CLASSES self.builtin_function = BUILTIN_FUNCTIONS self.special_forms = SPECIAL_FORMS self.macros = MACROS self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS self.declarations = DECLARATIONS self.builtin_types = BUILTIN_TYPES self.builtin_classes = BUILTIN_CLASSES RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] for index, token, value in RegexLexer.get_tokens_unprocessed( self, text, stack): if token is Name.Variable: if value in self.builtin_function: yield index, Name.Builtin, value continue if value in self.special_forms: yield index, Keyword, value continue if value in self.macros: yield index, Name.Builtin, value continue if value in self.lambda_list_keywords: yield index, Keyword, value continue if value in self.declarations: yield index, Keyword, value continue if value in self.builtin_types: yield index, Keyword.Type, value continue if value in self.builtin_classes: yield index, Name.Class, value continue yield index, token, value tokens = { 'root' : [ ('', Text, 'body'), ], 'multiline-comment' : [ (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19) (r'\|#', Comment.Multiline, '#pop'), (r'[^|#]+', Comment.Multiline), (r'[|#]', Comment.Multiline), ], 'commented-form' : [ (r'\(', Comment.Preproc, '#push'), (r'\)', Comment.Preproc, '#pop'), (r'[^()]+', Comment.Preproc), ], 'body' : [ # whitespace (r'\s+', Text), # single-line comment (r';.*$', Comment.Single), # multi-line comment (r'#\|', Comment.Multiline, 'multiline-comment'), # encoding comment (?) (r'#\d*Y.*$', Comment.Special), # strings and characters (r'"(\\.|[^"\\])*"', String), # quoting (r":" + symbol, String.Symbol), (r"'" + symbol, String.Symbol), (r"'", Operator), (r"`", Operator), # decimal numbers (r'[-+]?\d+\.?' + terminated, Number.Integer), (r'[-+]?\d+/\d+' + terminated, Number), (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \ + terminated, Number.Float), # sharpsign strings and characters (r"#\\." + terminated, String.Char), (r"#\\" + symbol, String.Char), # vector (r'#\(', Operator, 'body'), # bitstring (r'#\d*\*[01]*', Literal.Other), # uninterned symbol (r'#:' + symbol, String.Symbol), # read-time and load-time evaluation (r'#[.,]', Operator), # function shorthand (r'#\'', Name.Function), # binary rational (r'#[bB][+-]?[01]+(/[01]+)?', Number), # octal rational (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct), # hex rational (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex), # radix rational (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number), # complex (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'), # array (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'), # structure (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'), # path (r'#[pP]?"(\\.|[^"])*"', Literal.Other), # reference (r'#\d+=', Operator), (r'#\d+#', Operator), # read-time comment (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'), # read-time conditional (r'#[+-]', Operator), # special operators that should have been parsed already (r'(,@|,|\.)', Operator), # special constants (r'(t|nil)' + terminated, Name.Constant), # functions and variables (r'\*' + symbol + '\*', Name.Variable.Global), (symbol, Name.Variable), # parentheses (r'\(', Punctuation, 'body'), (r'\)', Punctuation, '#pop'), ], }