def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _ws=_ws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' % (_nl, _ws, _nl, _ws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ]
def _objdump_lexer_tokens(asm_lexer): """ Common objdump lexer tokens to wrap an ASM lexer. """ hex_re = r'[0-9A-Za-z]' return { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('('+hex_re+'+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(asm_lexer))), # Code line with ascii ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes (r'\t\.\.\.$', Text), # Relocation line # (With offset) (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), (r'[^\n]+\n', Other) ] }
def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable, _ws=_ws): op = r'=+\-*/!~' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'[(),]+', Punctuation), (r'([%s]|%%|\^\^)+' % op, Operator), (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' % (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws, r'[^)]' if compound else r'[\w\W]'), using(this, state='variable')), (r'(?=[\x00|&])', Text, '#pop'), include('follow') ] return state
from pygments.lexer import ( Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using, ) from pygments.token import ( Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error, ) from pygments.util import get_bool_opt # Local from IPython.testing.skipdoctest import skip_doctest line_re = re.compile('.*?\n') ipython_tokens = [ (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)), (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), ] def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`. The lexer inherits from an appropriate Python lexer and then adds information about IPython specific keywords (i.e. magic commands, shell commands, etc.) Parameters ---------- python3 : bool If `True`, then build an IPython lexer from a Python 3 lexer.
def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ]
class NedLexer(RegexLexer): name = 'ned' filenames = ['*.ned'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(true|false)\b', Name.Builtin), (r'(<-->|-->|<--|\.\.)', Keyword), (r'(bool|double|int|xml)\b', Keyword.Type), (r'(inout|input|output)\b', Keyword.Type), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words(("channel", "channelinterface", "simple", "module", "network", "moduleinterface"), suffix=r'\b'), Keyword), (words( ("parameters", "gates", "types", "submodules", "connections"), suffix=r'\b'), Keyword), (words(("volatile", "allowunconnected", "extends", "for", "if", "import", "like", "package", "property"), suffix=r'\b'), Keyword), (words(("sizeof", "const", "default", "ask", "this", "index", "typename", "xmldoc"), suffix=r'\b'), Keyword), (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta", "binomial", "cauchy", "ceil", "chi_square", "cos", "erlang_k", "exp", "exponential", "fabs", "floor", "fmod", "gamma_d", "genk_exponential", "genk_intuniform", "genk_normal", "genk_truncnormal", "genk_uniform", "geometric", "hypergeometric", "hypot", "intuniform", "log", "log10", "lognormal", "max", "min", "negbinomial", "normal", "pareto_shifted", "poisson", "pow", "simTime", "sin", "sqrt", "student_t", "tan", "triang", "truncnormal", "uniform", "weibull", "xml", "xmldoc"), suffix=r'\b'), Name.Builtin), ('@[a-zA-Z_]\w*', Name.Builtin), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ] }
from pygments.lexer import using, bygroups from pygments.token import Punctuation, Other, Generic from pygments.styles import get_style_by_name SQL_STYLE = get_style_by_name('colorful') JSON_STYLE = get_style_by_name('tango') class InsideStringJavascriptLexer(JavascriptLexer): def get_tokens_unprocessed(self, text, stack=('root',)): text = text[1:-1] text = text.replace('\\n', '\n') return JavascriptLexer.get_tokens_unprocessed(self, text, stack) mongo_tokens = {} mongo_tokens.update(JavascriptLexer.tokens) mongo_tokens['root'].insert(0, (r'"(function(\\\\|\\"|[^"])*)"', using(InsideStringJavascriptLexer))) class MongoLexer(JavascriptLexer): tokens = mongo_tokens HAVE_PYGMENTS = True except ImportError: # pragma: no cover HAVE_PYGMENTS = False def get_root_controller(): """Return the root controller of the application.""" module = config['application_root_module'] if module not in sys.modules: __import__(module) return sys.modules[module].RootController
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. .. versionadded:: 0.8 """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in iteritems(levels): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|async|await|base|break|by|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|let|lock|new|null|on|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value|alias|ascending|' r'descending|from|group|into|orderby|select|thenby|where|' r'join|equals)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|' r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [ (cs_ident, Name.Class, '#pop'), default('#pop'), ], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'), ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class ActionScript3Lexer(RegexLexer): """ For ActionScript 3 source code. .. versionadded:: 0.11 """ name = 'ActionScript 3' aliases = ['as3', 'actionscript3'] filenames = ['*.as'] mimetypes = [ 'application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3' ] identifier = r'[$a-zA-Z_]\w*' typeidentifier = identifier + r'(?:\.<\w+>)?' flags = re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'\s+', Text), (r'(function\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword.Declaration, Name.Function, Text, Operator), 'funcparams'), (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + typeidentifier + r')', bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text, Keyword.Type)), (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)', bygroups(Keyword, Text, Name.Namespace, Text)), (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()', bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'/(\\\\|\\[^\\]|[^\\\n])*/[gisx]*', String.Regex), (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' r'switch|import|include|as|is)\b', Keyword), (r'(class|public|final|internal|native|override|private|protected|' r'static|import|extends|implements|interface|intrinsic|return|super|' r'dynamic|function|const|get|namespace|package|set)\b', Keyword.Declaration), (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b', Keyword.Constant), (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|' r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|' r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|' r'unescape)\b', Name.Function), (identifier, Name), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'[~^*!%&<>|+=:;,/?\\{}\[\]().-]+', Operator), ], 'funcparams': [(r'\s+', Text), (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' + typeidentifier + r'|\*)(\s*)', bygroups(Text, Punctuation, Name, Text, Operator, Text, Keyword.Type, Text), 'defval'), (r'\)', Operator, 'type')], 'type': [(r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)', bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'), (r'\s+', Text, '#pop:2'), default('#pop:2')], 'defval': [(r'(=)(\s*)([^(),]+)(\s*)(,?)', bygroups(Operator, Text, using(this), Text, Operator), '#pop'), (r',', Operator, '#pop'), default('#pop')] } def analyse_text(text): if re.match(r'\w+\s*:\s*\w', text): return 0.3 return 0
class PhpLexer(RegexLexer): """ For `PHP <http://www.php.net/>`_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``<?php`` required). The default is ``False``. `funcnamehighlighting` If given and ``True``, highlight builtin function names (default: ``True``). `disabledmodules` If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted except the special ``'unknown'`` module that includes functions that are known to php but are undocumented. To get a list of allowed modules have a look into the `_phpbuiltins` module: .. sourcecode:: pycon >>> from pygments.lexers._phpbuiltins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]'] mimetypes = ['text/x-php'] flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other)], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String), (r'\s+', Text), (r'#.*?\n', Comment), (r'//.*?\n', Comment), (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment), (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/?@-]+', Operator), (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|__FILE__|' r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|' r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this)\b', Keyword), ('(true|false|null)\b', Keyword.Constant), (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable), (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'classname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'functionname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')], 'string': [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]+', String.Double)], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt(options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt(options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from pygments.lexers._phpbuiltins import MODULES for key, value in MODULES.iteritems(): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 if '?>' in text: rv += 0.1 return rv
class AdaLexer(RegexLexer): """ For Ada source code. .. versionadded:: 1.3 """ name = 'Ada' aliases = ['ada', 'ada95', 'ada2005'] filenames = ['*.adb', '*.ads', '*.ada'] mimetypes = ['text/x-ada'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'[^\S\n]+', Text), (r'--.*?\n', Comment.Single), (r'[^\S\n]+', Text), (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), (r'(subtype|type)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), (r'task|protected', Keyword.Declaration), (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, Comment.Preproc)), (r'(true|false|null)\b', Keyword.Constant), (words( ('Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float', 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'), Keyword.Type), (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), (r'generic|private', Keyword.Declaration), (r'package', Keyword.Declaration, 'package'), (r'array\b', Keyword.Reserved, 'array_def'), (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'(\w+)(\s*)(:)(\s*)(constant)', bygroups(Name.Constant, Text, Punctuation, Text, Keyword.Reserved)), (r'<<\w+>>', Name.Label), (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), (words( ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all', 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding', 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue', 'return', 'reverse', 'select', 'separate', 'some', 'subtype', 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when', 'while', 'xor'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), (r'"[^"]*"', String), include('attribute'), include('numbers'), (r"'[^']'", String.Character), (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))), (r"(<>|=>|:=|[()|:;,.'])", Punctuation), (r'[*<>+=/&-]', Operator), (r'\n+', Text), ], 'numbers': [ (r'[0-9_]+#[0-9a-f_\.]+#', Number.Hex), (r'[0-9_]+\.[0-9_]*', Number.Float), (r'[0-9_]+', Number.Integer), ], 'attribute': [ (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), ], 'subprogram': [ (r'\(', Punctuation, ('#pop', 'formal_part')), (r';', Punctuation, '#pop'), (r'is\b', Keyword.Reserved, '#pop'), (r'"[^"]+"|\w+', Name.Function), include('root'), ], 'end': [ ('(if|case|record|loop|select)', Keyword.Reserved), (r'"[^"]+"|[\w.]+', Name.Function), (r'\s+', Text), (';', Punctuation, '#pop'), ], 'type_def': [ (r';', Punctuation, '#pop'), (r'\(', Punctuation, 'formal_part'), (r'with|and|use', Keyword.Reserved), (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), (r'record\b', Keyword.Reserved, ('record_def')), (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'), include('root'), ], 'array_def': [ (r';', Punctuation, '#pop'), (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)), include('root'), ], 'record_def': [ (r'end record', Keyword.Reserved, '#pop'), include('root'), ], 'import': [ (r'[\w.]+', Name.Namespace, '#pop'), default('#pop'), ], 'formal_part': [ (r'\)', Punctuation, '#pop'), (r'\w+', Name.Variable), (r',|:[^=]', Punctuation), (r'(in|not|null|out|access)\b', Keyword.Reserved), include('root'), ], 'package': [ ('body', Keyword.Declaration), (r'is\s+new|renames', Keyword.Reserved), ('is', Keyword.Reserved, '#pop'), (';', Punctuation, '#pop'), (r'\(', Punctuation, 'package_instantiation'), (r'([\w.]+)', Name.Class), include('root'), ], 'package_instantiation': [ (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)), (r'[\w.\'"]', Text), (r'\)', Punctuation, '#pop'), include('root'), ], }
class FantomLexer(RegexLexer): """ For Fantom source code. .. versionadded:: 1.5 """ name = 'Fantom' aliases = ['fan'] filenames = ['*.fan'] mimetypes = ['application/x-fantom'] # often used regexes def s(str): return Template(str).substitute( dict( pod=r'[\"\w\.]+', eos=r'\n|;', id=r'[a-zA-Z_]\w*', # all chars which can be part of type definition. Starts with # either letter, or [ (maps), or | (funcs) type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?', ) ) tokens = { 'comments': [ (r'(?s)/\*.*?\*/', Comment.Multiline), # Multiline (r'//.*?\n', Comment.Single), # Single line # TODO: highlight references in fandocs (r'\*\*.*?\n', Comment.Special), # Fandoc (r'#.*\n', Comment.Single) # Shell-style ], 'literals': [ (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration with dot (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float), # Float/Decimal (r'\b-?0x[0-9a-fA-F_]+', Number.Hex), # Hex (r'\b-?[\d_]+', Number.Integer), # Int (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), # Char (r'"', Punctuation, 'insideStr'), # Opening quote (r'`', Punctuation, 'insideUri'), # Opening accent (r'\b(true|false|null)\b', Keyword.Constant), # Bool & null (r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)', # DSL bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation, String, Punctuation)), (r'(?:(\w+)(::))?(\w+)?(#)(\w+)?', # Type/slot literal bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation, Name.Function)), (r'\[,\]', Literal), # Empty list (s(r'($type)(\[,\])'), # Typed empty list bygroups(using(this, state='inType'), Literal)), (r'\[:\]', Literal), # Empty Map (s(r'($type)(\[:\])'), bygroups(using(this, state='inType'), Literal)), ], 'insideStr': [ (r'\\\\', String.Escape), # Escaped backslash (r'\\"', String.Escape), # Escaped " (r'\\`', String.Escape), # Escaped ` (r'\$\w+', String.Interpol), # Subst var (r'\$\{.*?\}', String.Interpol), # Subst expr (r'"', Punctuation, '#pop'), # Closing quot (r'.', String) # String content ], 'insideUri': [ # TODO: remove copy/paste str/uri (r'\\\\', String.Escape), # Escaped backslash (r'\\"', String.Escape), # Escaped " (r'\\`', String.Escape), # Escaped ` (r'\$\w+', String.Interpol), # Subst var (r'\$\{.*?\}', String.Interpol), # Subst expr (r'`', Punctuation, '#pop'), # Closing tick (r'.', String.Backtick) # URI content ], 'protectionKeywords': [ (r'\b(public|protected|private|internal)\b', Keyword), ], 'typeKeywords': [ (r'\b(abstract|final|const|native|facet|enum)\b', Keyword), ], 'methodKeywords': [ (r'\b(abstract|native|once|override|static|virtual|final)\b', Keyword), ], 'fieldKeywords': [ (r'\b(abstract|const|final|native|override|static|virtual|' r'readonly)\b', Keyword) ], 'otherKeywords': [ (words(( 'try', 'catch', 'throw', 'finally', 'for', 'if', 'else', 'while', 'as', 'is', 'isnot', 'switch', 'case', 'default', 'continue', 'break', 'do', 'return', 'get', 'set'), prefix=r'\b', suffix=r'\b'), Keyword), (r'\b(it|this|super)\b', Name.Builtin.Pseudo), ], 'operators': [ (r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator) ], 'inType': [ (r'[\[\]|\->:?]', Punctuation), (s(r'$id'), Name.Class), default('#pop'), ], 'root': [ include('comments'), include('protectionKeywords'), include('typeKeywords'), include('methodKeywords'), include('fieldKeywords'), include('literals'), include('otherKeywords'), include('operators'), (r'using\b', Keyword.Namespace, 'using'), # Using stmt (r'@\w+', Name.Decorator, 'facet'), # Symbol (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class), 'inheritance'), # Inheritance list # Type var := val (s(r'($type)([ \t]+)($id)(\s*)(:=)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Operator)), # var := val (s(r'($id)(\s*)(:=)'), bygroups(Name.Variable, Text, Operator)), # .someId( or ->someId( ### (s(r'(\.|(?:\->))($id)(\s*)(\()'), bygroups(Operator, Name.Function, Text, Punctuation), 'insideParen'), # .someId or ->someId (s(r'(\.|(?:\->))($id)'), bygroups(Operator, Name.Function)), # new makeXXX ( (r'(new)(\s+)(make\w*)(\s*)(\()', bygroups(Keyword, Text, Name.Function, Text, Punctuation), 'insideMethodDeclArgs'), # Type name ( (s(r'($type)([ \t]+)' # Return type and whitespace r'($id)(\s*)(\()'), # method name + open brace bygroups(using(this, state='inType'), Text, Name.Function, Text, Punctuation), 'insideMethodDeclArgs'), # ArgType argName, (s(r'($type)(\s+)($id)(\s*)(,)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), # ArgType argName) # Covered in 'insideParen' state # ArgType argName -> ArgType| (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation, Text, using(this, state='inType'), Punctuation)), # ArgType argName| (s(r'($type)(\s+)($id)(\s*)(\|)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), # Type var (s(r'($type)([ \t]+)($id)'), bygroups(using(this, state='inType'), Text, Name.Variable)), (r'\(', Punctuation, 'insideParen'), (r'\{', Punctuation, 'insideBrace'), (r'.', Text) ], 'insideParen': [ (r'\)', Punctuation, '#pop'), include('root'), ], 'insideMethodDeclArgs': [ (r'\)', Punctuation, '#pop'), (s(r'($type)(\s+)($id)(\s*)(\))'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation), '#pop'), include('root'), ], 'insideBrace': [ (r'\}', Punctuation, '#pop'), include('root'), ], 'inheritance': [ (r'\s+', Text), # Whitespace (r':|,', Punctuation), (r'(?:(\w+)(::))?(\w+)', bygroups(Name.Namespace, Punctuation, Name.Class)), (r'\{', Punctuation, '#pop') ], 'using': [ (r'[ \t]+', Text), # consume whitespaces (r'(\[)(\w+)(\])', bygroups(Punctuation, Comment.Special, Punctuation)), # ffi (r'(\")?([\w.]+)(\")?', bygroups(Punctuation, Name.Namespace, Punctuation)), # podname (r'::', Punctuation, 'usingClass'), default('#pop') ], 'usingClass': [ (r'[ \t]+', Text), # consume whitespaces (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Name.Class), '#pop:2'), (r'[\w$]+', Name.Class), default('#pop:2') # jump out to root state ], 'facet': [ (r'\s+', Text), (r'\{', Punctuation, 'facetFields'), default('#pop') ], 'facetFields': [ include('comments'), include('literals'), include('operators'), (r'\s+', Text), (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)), (r'\}', Punctuation, '#pop'), (r'.', Text) ], }
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words( ('@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation ('([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [('([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), ('([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop')], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods ( r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
class ModelicaLexer(RegexLexer): """ For `Modelica <http://www.modelica.org/>`_ source code. .. versionadded:: 1.1 """ name = 'Modelica' aliases = ['modelica'] filenames = ['*.mo'] mimetypes = ['text/x-modelica'] flags = re.DOTALL | re.MULTILINE _name = r"(?:'(?:[^\\']|\\.)+'|[a-zA-Z_]\w*)" tokens = { 'whitespace': [ (r'[\s\ufeff]+', Text), (r'//[^\n]*\n?', Comment.Single), (r'/\*.*?\*/', Comment.Multiline) ], 'root': [ include('whitespace'), (r'"', String.Double, 'string'), (r'[()\[\]{},;]+', Punctuation), (r'\.?[*^/+-]|\.|<>|[<>:=]=?', Operator), (r'\d+(\.?\d*[eE][-+]?\d+|\.\d*)', Number.Float), (r'\d+', Number.Integer), (r'(abs|acos|actualStream|array|asin|assert|AssertionLevel|atan|' r'atan2|backSample|Boolean|cardinality|cat|ceil|change|Clock|' r'Connections|cos|cosh|cross|delay|diagonal|div|edge|exp|' r'ExternalObject|fill|floor|getInstanceName|hold|homotopy|' r'identity|inStream|integer|Integer|interval|inverse|isPresent|' r'linspace|log|log10|matrix|max|min|mod|ndims|noClock|noEvent|' r'ones|outerProduct|pre|previous|product|Real|reinit|rem|rooted|' r'sample|scalar|semiLinear|shiftSample|sign|sin|sinh|size|skew|' r'smooth|spatialDistribution|sqrt|StateSelect|String|subSample|' r'sum|superSample|symmetric|tan|tanh|terminal|terminate|time|' r'transpose|vector|zeros)\b', Name.Builtin), (r'(algorithm|annotation|break|connect|constant|constrainedby|der|' r'discrete|each|else|elseif|elsewhen|encapsulated|enumeration|' r'equation|exit|expandable|extends|external|firstTick|final|flow|for|if|' r'import|impure|in|initial|inner|input|interval|loop|nondiscrete|outer|' r'output|parameter|partial|protected|public|pure|redeclare|' r'replaceable|return|stream|then|when|while)\b', Keyword.Reserved), (r'(and|not|or)\b', Operator.Word), (r'(block|class|connector|end|function|model|operator|package|' r'record|type)\b', Keyword.Reserved, 'class'), (r'(false|true)\b', Keyword.Constant), (r'within\b', Keyword.Reserved, 'package-prefix'), (_name, Name) ], 'class': [ include('whitespace'), (r'(function|record)\b', Keyword.Reserved), (r'(if|for|when|while)\b', Keyword.Reserved, '#pop'), (_name, Name.Class, '#pop'), default('#pop') ], 'package-prefix': [ include('whitespace'), (_name, Name.Namespace, '#pop'), default('#pop') ], 'string': [ (r'"', String.Double, '#pop'), (r'\\[\'"?\\abfnrtv]', String.Escape), (r'(?i)<\s*html\s*>([^\\"]|\\.)+?(<\s*/\s*html\s*>|(?="))', using(HtmlLexer)), (r'<|\\?[^"\\<]+', String.Double) ] }
class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. MIR is a human readable serialization format that's used to represent LLVM's machine specific intermediate representation. It allows LLVM's developers to see the state of the compilation process at various points, as well as test individual pieces of the compiler. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR' aliases = ['llvm-mir'] filenames = ['*.mir'] tokens = { 'root': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents starting with | are LLVM-IR (r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR (r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency (r'[^-#]+|.', Text), ], 'llvm_ir': [ # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), ], 'llvm_mir': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes (r'name:', Keyword, 'name'), (words(('alignment', ), suffix=':'), Keyword, 'number'), (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'), suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'), suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer (r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else (r'.+', Text), (r'\n', Text), ], 'name': [(r'[^\n]+', Name), default('#pop')], 'boolean': [(r' *(true|false)', Name.Builtin), default('#pop')], 'number': [(r' *[0-9]+', Number), default('#pop')], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. (r'(?!\.\.\.|---)((.|\n)+)', bygroups(using(LlvmMirBodyLexer), Keyword)), ], }
class ScdocLexer(RegexLexer): """ `scdoc` is a simple man page generator for POSIX systems written in C99. https://git.sr.ht/~sircmpwn/scdoc .. versionadded:: 2.5 """ name = 'scdoc' aliases = ['scdoc', 'scd'] filenames = ['*.scd', '*.scdoc'] flags = re.MULTILINE tokens = { 'root': [ # comment (r'^(;.+\n)', bygroups(Comment)), # heading with pound prefix (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)), # bulleted lists (r'^(\s*)([*-])(\s)(.+\n)', bygroups(Text, Keyword, Text, using(this, state='inline'))), # numbered lists (r'^(\s*)(\.+\.)( .+\n)', bygroups(Text, Keyword, using(this, state='inline'))), # quote (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), # text block (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), include('inline'), ], 'inline': [ # escape (r'\\.', Text), # underlines (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)), # bold (r'(\s)(\*[^*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)), # inline code (r'`[^`]+`', String.Backtick), # general text, must come last! (r'[^\\\s]+', Text), (r'.', Text), ], } def analyse_text(text): """This is very similar to markdown, save for the escape characters needed for * and _.""" result = 0 if '\\*' in text: result += 0.01 if '\\_' in text: result += 0.01 return result
class SlimLexer(ExtendedRegexLexer): """ For Slim markup. .. versionadded:: 2.0 """ name = 'Slim' aliases = ['slim'] filenames = ['*.slim'] mimetypes = ['text/x-slim'] flags = re.IGNORECASE _dot = r'(?: \|\n(?=.* \|)|.)' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'([ \t]*==?)(.*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), (r'[ \t]+[\w:-]+(?==)', Name.Attribute, 'html-attributes'), default('plain'), ], 'content': [ include('css'), (r'[\w:-]+:[ \t]*\n', Text, 'plain'), (r'(-)(.*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'), (r'\|' + _dot + r'*\n', _starts_block(Text, 'plain'), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment.Preproc, 'slim-comment-block'), '#pop'), (r'[\w:-]+', Name.Tag, 'tag'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), (r'[ \t]+\n', Punctuation, '#pop:2'), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(.*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'=', Punctuation), (r'"[^"]+"', using(RubyLexer), 'tag'), (r'\'[^\']+\'', using(RubyLexer), 'tag'), (r'\w+', Text, 'tag'), ], 'slim-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], }
from pygments.token import Punctuation, Other, Generic from pygments.styles import get_style_by_name SQL_STYLE = get_style_by_name('colorful') JSON_STYLE = get_style_by_name('tango') class InsideStringJavascriptLexer(JavascriptLexer): def get_tokens_unprocessed(self, text, stack=('root', )): text = text[1:-1] text = text.replace('\\n', '\n') return JavascriptLexer.get_tokens_unprocessed(self, text, stack) mongo_tokens = {} mongo_tokens.update(JavascriptLexer.tokens) mongo_tokens['root'].insert( 0, (r'"(function(\\\\|\\"|[^"])*)"', using(InsideStringJavascriptLexer))) class MongoLexer(JavascriptLexer): tokens = mongo_tokens HAVE_PYGMENTS = True except ImportError: # pragma: no cover HAVE_PYGMENTS = False def get_root_controller(): """Return the root controller of the application.""" module = config['application_root_module'] if module not in sys.modules: __import__(module) return sys.modules[module].RootController
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' # Hexadecimal part in an hexadecimal integer/floating-point literal. # This includes decimal separators matching. _hexpart = r'[0-9a-fA-F](\'?[0-9a-fA-F])*' # Decimal part in an decimal integer/floating-point literal. # This includes decimal separators matching. _decpart = r'\d(\'?\d)*' # Integer literal suffix (e.g. 'ull' or 'll'). _intsuffix = r'(([uU][lL]{0,2})|[lL]{1,2}[uU]?)?' # Identifier regex with C and C++ Universal Character Name (UCN) support. _ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*' _namespaced_ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)*' tokens = { 'whitespace': [ # preprocessor directives: without whitespace (r'^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Whitespace), (r'[^\S\n]+', Whitespace), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ include('keywords'), include('types'), (r'([LuU]|u8)?(")', bygroups(String.Affix, String), 'string'), (r"([LuU]|u8)?(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), # Hexadecimal floating-point literals (C11, C++17) (r'0[xX](' + _hexpart + r'\.' + _hexpart + r'|\.' + _hexpart + r'|' + _hexpart + r')[pP][+-]?' + _hexpart + r'[lL]?', Number.Float), (r'(-)?(' + _decpart + r'\.' + _decpart + r'|\.' + _decpart + r'|' + _decpart + r')[eE][+-]?' + _decpart + r'[fFlL]?', Number.Float), (r'(-)?((' + _decpart + r'\.(' + _decpart + r')?|\.' + _decpart + r')[fFlL]?)|(' + _decpart + r'[fFlL])', Number.Float), (r'(-)?0[xX]' + _hexpart + _intsuffix, Number.Hex), (r'(-)?0[bB][01](\'?[01])*' + _intsuffix, Number.Bin), (r'(-)?0(\'?[0-7])+' + _intsuffix, Number.Oct), (r'(-)?' + _decpart + _intsuffix, Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'(true|false|NULL)\b', Name.Builtin), (r'(' + _ident + r')(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (_ident, Name) ], 'types': [(words(('int8', 'int16', 'int32', 'int64', 'wchar_t'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (words(('bool', 'int', 'long', 'float', 'short', 'double', 'char', 'unsigned', 'signed', 'void'), suffix=r'\b'), Keyword.Type)], 'keywords': [ (r'(struct|union)(\s+)', bygroups(Keyword, Text), 'classname'), (words(('asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'struct', 'static', 'switch', 'typedef', 'volatile', 'while', 'union', 'thread_local', 'alignas', 'alignof', 'static_assert', '_Pragma'), suffix=r'\b'), Keyword), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'based', 'except', 'stdcall', 'cdecl', 'fastcall', 'declspec', 'finally', 'try', 'leave', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved) ], 'root': [ include('whitespace'), include('keywords'), # functions ( r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments r'(' + _namespaced_ident + r')' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments r'(' + _namespaced_ident + r')' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), (r'\}', Punctuation), (r'[{;]', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'(' + _ws1 + r')(include)(' + _ws1 + r')("[^"]+")([^\n]*)', bygroups(using(this), Comment.Preproc, using(this), Comment.PreprocFile, Comment.Single)), (r'(' + _ws1 + r')(include)(' + _ws1 + r')(<[^>]+>)([^\n]*)', bygroups(using(this), Comment.Preproc, using(this), Comment.PreprocFile, Comment.Single)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ], 'classname': [ (_ident, Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), default('#pop') ] } stdlib_types = { 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t' } c99_types = { 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' } linux_types = { 'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t' } c11_atomic_types = { 'atomic_bool', 'atomic_char', 'atomic_schar', 'atomic_uchar', 'atomic_short', 'atomic_ushort', 'atomic_int', 'atomic_uint', 'atomic_long', 'atomic_ulong', 'atomic_llong', 'atomic_ullong', 'atomic_char16_t', 'atomic_char32_t', 'atomic_wchar_t', 'atomic_int_least8_t', 'atomic_uint_least8_t', 'atomic_int_least16_t', 'atomic_uint_least16_t', 'atomic_int_least32_t', 'atomic_uint_least32_t', 'atomic_int_least64_t', 'atomic_uint_least64_t', 'atomic_int_fast8_t', 'atomic_uint_fast8_t', 'atomic_int_fast16_t', 'atomic_uint_fast16_t', 'atomic_int_fast32_t', 'atomic_uint_fast32_t', 'atomic_int_fast64_t', 'atomic_uint_fast64_t', 'atomic_intptr_t', 'atomic_uintptr_t', 'atomic_size_t', 'atomic_ptrdiff_t', 'atomic_intmax_t', 'atomic_uintmax_t' } def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) self.c11highlighting = get_bool_opt(options, 'c11highlighting', True) self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type elif self.c11highlighting and value in self.c11_atomic_types: token = Keyword.Type elif self.platformhighlighting and value in self.linux_types: token = Keyword.Type yield index, token, value
class HttpPromptLexer(RegexLexer): name = 'HttpPrompt' aliases = ['http-prompt'] filenames = ['*.http-prompt'] tokens = { 'root': [(r'\s+', Text), (r'(cd)(\s*)', bygroups(Keyword, Text), 'cd'), (r'(rm)(\s*)', bygroups(Keyword, Text), 'rm_option'), (r'(httpie|curl)(\s*)', bygroups(Keyword, Text), 'action'), (words(HTTP_METHODS, prefix='(?i)', suffix='(?!\S)(\s*)'), bygroups(Keyword, Text), combined('redir_out', 'urlpath')), (r'(clear)(\s*)', bygroups(Keyword, Text), 'end'), (r'(exit)(\s*)', bygroups(Keyword, Text), 'end'), (r'(help)(\s)*', bygroups(Keyword, Text), 'end'), (r'(env)(\s*)', bygroups(Keyword, Text), combined('redir_out', 'pipe')), (r'(source)(\s*)', bygroups(Keyword, Text), 'file_path'), (r'(exec)(\s*)', bygroups(Keyword, Text), 'file_path'), (r'(ls)(\s*)', bygroups(Keyword, Text), combined('redir_out', 'urlpath')), (r'', Text, 'concat_mut')], 'cd': string_rules('end'), 'rm_option': [(r'(\-(?:h|o|b|q))(\s*)', bygroups(Name, Text), 'rm_name'), (r'(\*)(\s*)', bygroups(Name, Text), 'end')], 'rm_name': string_rules('end'), 'shell_command': [ (r'(`)([^`]*)(`)', bygroups(Text, using(BashLexer), Text)), ], 'pipe': [ (r'(\s*)(\|)(.*)', bygroups(Text, Operator, using(BashLexer))), ], 'concat_mut': [ (r'$', Text, 'end'), (r'\s+', Text), # Flag options, such as (--form) and (--json) (words(FLAG_OPTIONS, suffix=r'\b'), Name, 'concat_mut'), # Options with values, such as (--style=default) and (--pretty all) (words(VALUE_OPTIONS, suffix=r'\b'), Name, combined('shell_command', 'option_op')), include('shell_command'), # Unquoted or value-quoted request mutation, # such as (name="John Doe") and (name=John\ Doe) (r'((?:[^\s\'"\\=:]|(?:\\.))*)(:=|:|==|=)', bygroups(Name, Operator), combined('shell_command', 'unquoted_mut')), # Full single-quoted request mutation, such as ('name=John Doe') (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)(:=|:|==|=)", bygroups(Text, Name, Operator), combined('shell_command', 'squoted_mut')), # Full double-quoted request mutation, such as ("name=John Doe") (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)(:=|:|==|=)', bygroups(Text, Name, Operator), combined('shell_command', 'dquoted_mut')) ], 'option_op': [ (r'(\s+|=)', Operator, 'option_value'), ], 'option_value': string_rules('#pop:2'), 'file_path': string_rules('end'), 'redir_out': [(r'(?i)(>>?)(\s*)', bygroups(Operator, Text), 'file_path')], 'unquoted_mut': string_rules('#pop'), 'squoted_mut': [(r"((?:[^\r\n'\\]|(?:\\.))+)(')", bygroups(String, Text), '#pop'), (r"([^\r\n'\\]|(\\.))+", String, '#pop')], 'dquoted_mut': [(r'((?:[^\r\n"\\]|(?:\\.))+)(")', bygroups(String, Text), '#pop'), (r'([^\r\n"\\]|(\\.))+', String, '#pop')], 'action': [(words(HTTP_METHODS, prefix='(?i)', suffix='(\s*)'), bygroups(Keyword, Text), combined('redir_out', 'pipe', 'urlpath')), (r'', Text, combined('redir_out', 'pipe', 'urlpath'))], 'urlpath': [(r'https?://([^\s"\'\\]|(\\.))+', String, combined('concat_mut', 'redir_out', 'pipe')), (r'(")(https?://(?:[^\r\n"\\]|(?:\\.))+)(")', bygroups(Text, String, Text), combined('concat_mut', 'redir_out', 'pipe')), (r'(")(https?://(?:[^\r\n"\\]|(?:\\.))+)', bygroups(Text, String)), (r"(')(https?://(?:[^\r\n'\\]|(?:\\.))+)(')", bygroups(Text, String, Text), combined('concat_mut', 'redir_out', 'pipe')), (r"(')(https?://(?:[^\r\n'\\]|(?:\\.))+)", bygroups(Text, String)), (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)(")', bygroups(Text, String, Text), combined('concat_mut', 'redir_out', 'pipe')), (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)', bygroups(Text, String)), (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)(')", bygroups(Text, String, Text), combined('concat_mut', 'redir_out', 'pipe')), (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)", bygroups(Text, String)), (r'([^\-](?:[^\s"\'\\=:]|(?:\\.))+)(\s+|$)', bygroups(String, Text), combined('concat_mut', 'redir_out', 'pipe')), (r'', Text, combined('concat_mut', 'redir_out', 'pipe'))], 'end': [(r'\n', Text, 'root')] }
class AsymptoteLexer(RegexLexer): """ For `Asymptote <http://asymptote.sf.net/>`_ source code. .. versionadded:: 1.2 """ name = 'Asymptote' aliases = ['asy', 'asymptote'] filenames = ['*.asy'] mimetypes = ['text/x-asymptote'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment), (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment), ], 'statements': [ # simple string (TeX friendly) (r'"(\\\\|\\"|[^"])*"', String), # C style string (with character escapes) (r"'", String, 'string'), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)), (r'(and|controls|tension|atleast|curl|if|else|while|for|do|' r'return|break|continue|struct|typedef|new|access|import|' r'unravel|from|include|quote|static|public|private|restricted|' r'this|explicit|true|false|null|cycle|newframe|operator)\b', Keyword), # Since an asy-type-name can be also an asy-function-name, # in the following we test if the string " [a-zA-Z]" follows # the Keyword.Type. # Of course it is not perfect ! (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|' r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|' r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|' r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|' r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|' r'path3|pen|picture|point|position|projection|real|revolution|' r'scaleT|scientific|segment|side|slice|splitface|string|surface|' r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|' r'transformation|tree|triangle|trilinear|triple|vector|' r'vertex|void)(?=\s+[a-zA-Z])', Keyword.Type), # Now the asy-type-name which are not asy-function-name # except yours ! # Perhaps useless (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|' r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|' r'picture|position|real|revolution|slice|splitface|ticksgridT|' r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type), ('[a-zA-Z_]\w*:(?!:)', Name.Label), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions (r'((?:[\w*\s])+?(?:\s|\*))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations (r'((?:[\w*\s])+?(?:\s|\*))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r"'", String, '#pop'), (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'\n', String), (r"[^\\'\n]+", String), # all other characters (r'\\\n', String), (r'\\n', String), # line continuation (r'\\', String), # stray backslash ], } def get_tokens_unprocessed(self, text): from pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name and value in ASYFUNCNAME: token = Name.Function elif token is Name and value in ASYVARNAME: token = Name.Variable yield index, token, value
class BatchLexer(RegexLexer): """ Lexer for the DOS/Windows Batch file format. .. versionadded:: 0.7 """ name = 'Batchfile' aliases = ['bat', 'batch', 'dosbatch', 'winbatch'] filenames = ['*.bat', '*.cmd'] mimetypes = ['application/x-dos-batch'] flags = re.MULTILINE | re.IGNORECASE _nl = r'\n\x1a' _punct = r'&<>|' _ws = r'\t\v\f\r ,;=\xa0' _nlws = r'\s\x1a\xa0,;=' _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws) _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' % (_nl, _ws, _nl, _punct)) _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl) _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws _label = r'(?:(?:[^%s%s+:^]|\^[%s]?[\w\W])*)' % (_nlws, _punct, _nl) _label_compound = r'(?:(?:[^%s%s+:^)]|\^[%s]?[^)])*)' % (_nlws, _punct, _nl) _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator _opword = r'(?:equ|geq|gtr|leq|lss|neq)' _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl) _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|' r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|' r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|' r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:' r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' % (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl)) _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s])+)' % (_nl, _nlws, _punct) _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s)])+)' % (_nl, _nlws, _punct) _token = r'(?:[%s]+|%s)' % (_punct, _core_token) _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound) _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token)) def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ] def _make_follow_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _space=_space, _start_label=_start_label, _token=_token, _token_compound=_token_compound, _ws=_ws): suffix = '/compound' if compound else '' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'%s([%s]*)(%s)(.*)' % (_start_label, _ws, _label_compound if compound else _label), bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)), include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'), (r'\|\|?|&&?', Punctuation, '#pop'), include('text') ] return state def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable, _ws=_ws, _nlws=_nlws): op = r'=+\-*/!~' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'[(),]+', Punctuation), (r'([%s]|%%|\^\^)+' % op, Operator), (r'(%s|%s|(\^[%s]?)?[^()%s%%\^"%s%s]|\^[%s]?%s)+' % (_string, _variable, _nl, op, _nlws, _punct, _nlws, r'[^)]' if compound else r'[\w\W]'), using(this, state='variable')), (r'(?=[\x00|&])', Text, '#pop'), include('follow')] return state def _make_call_state(compound, _label=_label, _label_compound=_label_compound): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(:?)(%s)' % (_label_compound if compound else _label), bygroups(Punctuation, Name.Label), '#pop')) return state def _make_label_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append( (r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' % (_label_compound if compound else _label, _string, _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl, _punct, r')' if compound else ''), bygroups(Name.Label, Comment.Single), '#pop')) return state def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _nlws=_nlws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s])\d)?)(>>?&|<&)([%s]*)(\d)' % (_nlws, _nlws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nlws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ] tokens = { 'root': _make_begin_state(False), 'follow': _make_follow_state(False), 'arithmetic': _make_arithmetic_state(False), 'call': _make_call_state(False), 'label': _make_label_state(False), 'redirect': _make_redirect_state(False), 'root/compound': _make_begin_state(True), 'follow/compound': _make_follow_state(True), 'arithmetic/compound': _make_arithmetic_state(True), 'call/compound': _make_call_state(True), 'label/compound': _make_label_state(True), 'redirect/compound': _make_redirect_state(True), 'variable-or-escape': [(_variable, Name.Variable), (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)], 'string': [(r'"', String.Double, '#pop'), (_variable, Name.Variable), (r'\^!|%%', String.Escape), (r'[^"%%^%s]+|[%%^]' % _nl, String.Double), default('#pop')], 'sqstring': [include('variable-or-escape'), (r'[^%]+|%', String.Single)], 'bqstring': [include('variable-or-escape'), (r'[^%]+|%', String.Backtick)], 'text': [(r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s%s\d)]+|.' % (_nlws, _punct), Text)], 'variable': [(r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s]+|.' % _nl, Name.Variable)], 'for': [(r'(%s)(in)(%s)(\()' % (_space, _space), bygroups(using(this, state='text'), Keyword, using(this, state='text'), Punctuation), '#pop'), include('follow')], 'for2': [(r'\)', Punctuation), (r'(%s)(do%s)' % (_space, _token_terminator), bygroups(using(this, state='text'), Keyword), '#pop'), (r'[%s]+' % _nl, Text), include('follow')], 'for/f': [(r'(")((?:%s|[^"])*?")([%s]*)(\))' % (_variable, _nlws), bygroups(String.Double, using(this, state='string'), Text, Punctuation)), (r'"', String.Double, ('#pop', 'for2', 'string')), (r"('(?:%%%%|%s|[\w\W])*?')([%s]*)(\))" % (_variable, _nlws), bygroups(using(this, state='sqstring'), Text, Punctuation)), (r'(`(?:%%%%|%s|[\w\W])*?`)([%s]*)(\))' % (_variable, _nlws), bygroups(using(this, state='bqstring'), Text, Punctuation)), include('for2')], 'for/l': [(r'-?\d+', Number.Integer), include('for2')], 'if': [ (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' % (_token_terminator, _space), bygroups(Keyword, using(this, state='text'), Number.Integer), '#pop'), (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text'), using(this, state='variable')), '#pop'), (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text')), '#pop'), (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number), bygroups(using(this, state='arithmetic'), Operator.Word, using(this, state='arithmetic')), '#pop'), (_stoken, using(this, state='text'), ('#pop', 'if2')), ], 'if2': [(r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken), bygroups(using(this, state='text'), Operator, using(this, state='text')), '#pop'), (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken), bygroups(using(this, state='text'), Operator.Word, using(this, state='text')), '#pop')], '(?': [(_space, using(this, state='text')), (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')), default('#pop')], 'else?': [(_space, using(this, state='text')), (r'else%s' % _token_terminator, Keyword, '#pop'), default('#pop')] }
class NemerleLexer(RegexLexer): """ For `Nemerle <http://nemerle.org>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. .. versionadded:: 1.5 """ name = 'Nemerle' aliases = ['nemerle'] filenames = ['*.n'] mimetypes = ['text/x-nemerle'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in iteritems(levels): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), (r'\n', Text), (r'\$\s*"', String, 'splice-string'), (r'\$\s*<#', String, 'splice-string2'), (r'<#', String, 'recursive-string'), (r'(<\[)\s*(' + cs_ident + ':)?', Keyword), (r'\]\>', Keyword), # quasiquotation only (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|and|as|base|catch|def|delegate|' r'enum|event|extern|false|finally|' r'fun|implements|interface|internal|' r'is|macro|match|matches|module|mutable|new|' r'null|out|override|params|partial|private|' r'protected|public|ref|sealed|static|' r'syntax|this|throw|true|try|type|typeof|' r'virtual|volatile|when|where|with|' r'assert|assert2|async|break|checked|continue|do|else|' r'ensures|for|foreach|if|late|lock|new|nolate|' r'otherwise|regexp|repeat|requires|return|surroundwith|' r'unchecked|unless|using|while|yield)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort|void|array|list)\b\??', Keyword.Type), (r'(:>?)\s*(' + cs_ident + r'\??)', bygroups(Punctuation, Keyword.Type)), (r'(class|struct|variant|module)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [(cs_ident, Name.Class, '#pop')], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ], 'splice-string': [(r'[^"$]', String), (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'\\"', String), (r'"', String, '#pop')], 'splice-string2': [(r'[^#<>$]', String), (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'<#', String, '#push'), (r'#>', String, '#pop')], 'recursive-string': [(r'[^#<>]', String), (r'<#', String, '#push'), (r'#>', String, '#pop')], 'splice-string-content': [(r'if|match', Keyword), (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation), (cs_ident, Name), (r'\d+', Number), (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop')] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class ThriftLexer(RegexLexer): """ For `Thrift <https://thrift.apache.org/>`__ interface definitions. .. versionadded:: 2.1 """ name = 'Thrift' aliases = ['thrift'] filenames = ['*.thrift'] mimetypes = ['application/x-thrift'] tokens = { 'root': [ include('whitespace'), include('comments'), (r'"', String.Double, combined('stringescape', 'dqs')), (r'\'', String.Single, combined('stringescape', 'sqs')), (r'(namespace)(\s+)', bygroups(Keyword.Namespace, Text.Whitespace), 'namespace'), (r'(enum|union|struct|service|exception)(\s+)', bygroups(Keyword.Declaration, Text.Whitespace), 'class'), (r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)' # return arguments r'((?:[^\W\d]|\$)[\w$]*)' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), include('keywords'), include('numbers'), (r'[&=]', Operator), (r'[:;,{}()<>\[\]]', Punctuation), (r'[a-zA-Z_](\.\w|\w)*', Name), ], 'whitespace': [ (r'\n', Text.Whitespace), (r'\s+', Text.Whitespace), ], 'comments': [ (r'#.*$', Comment), (r'//.*?\n', Comment), (r'/\*[\w\W]*?\*/', Comment.Multiline), ], 'stringescape': [ (r'\\([\\nrt"\'])', String.Escape), ], 'dqs': [ (r'"', String.Double, '#pop'), (r'[^\\"\n]+', String.Double), ], 'sqs': [ (r"'", String.Single, '#pop'), (r'[^\\\'\n]+', String.Single), ], 'namespace': [ (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), default('#pop'), ], 'class': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), default('#pop'), ], 'keywords': [ (r'(async|oneway|extends|throws|required|optional)\b', Keyword), (r'(true|false)\b', Keyword.Constant), (r'(const|typedef)\b', Keyword.Declaration), (words(( 'cpp_namespace', 'cpp_include', 'cpp_type', 'java_package', 'cocoa_prefix', 'csharp_namespace', 'delphi_namespace', 'php_namespace', 'py_module', 'perl_package', 'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix', 'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace', 'xsd_attrs', 'include'), suffix=r'\b'), Keyword.Namespace), (words(( 'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double', 'string', 'binary', 'map', 'list', 'set', 'slist', 'senum'), suffix=r'\b'), Keyword.Type), (words(( 'BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__', '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__', 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin', 'break', 'case', 'catch', 'class', 'clone', 'continue', 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic', 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare', 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile', 'ensure', 'except', 'exec', 'finally', 'float', 'for', 'foreach', 'function', 'global', 'goto', 'if', 'implements', 'import', 'in', 'inline', 'instanceof', 'interface', 'is', 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not', 'or', 'pass', 'public', 'print', 'private', 'protected', 'raise', 'redo', 'rescue', 'retry', 'register', 'return', 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized', 'then', 'this', 'throw', 'transient', 'try', 'undef', 'unless', 'unsigned', 'until', 'use', 'var', 'virtual', 'volatile', 'when', 'while', 'with', 'xor', 'yield'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), ], 'numbers': [ (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float), (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex), (r'[+-]?[0-9]+', Number.Integer), ], }
class HamlLexer(ExtendedRegexLexer): """ For Haml markup. .. versionadded:: 1.3 """ name = 'Haml' aliases = ['haml'] filenames = ['*.haml'] mimetypes = ['text/x-haml'] flags = re.IGNORECASE # Haml can include " |\n" anywhere, # which is ignored and used to wrap long lines. # To accommodate this, use this custom faux dot instead. _dot = r'(?: \|\n(?=.* \|)|.)' # In certain places, a comma at the end of the line # allows line wrapping as well. _comma_dot = r'(?:,\s*\n|' + _dot + ')' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'), (r'(-)(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)), (r'\[' + _dot + r'*?\]', using(RubyLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'haml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)*' tokens = { 'whitespace': [ # preprocessor directives: without whitespace ('^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words( ('auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void|' r'[a-z_][a-z0-9_]*_t)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread', 'typename'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } stdlib_types = [ 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t' ] c99_types = [ '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' ] def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type yield index, token, value
class HtmlLexer(RegexLexer): """ For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted by the appropriate lexer. """ name = 'HTML' aliases = ['html'] filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt'] mimetypes = ['text/html', 'application/xhtml+xml'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), (r'<!--(.|\n)*?-->', Comment.Multiline), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'(<)(\s*)(script)(\s*)', bygroups(Punctuation, Text, Name.Tag, Text), ('script-content', 'tag')), (r'(<)(\s*)(style)(\s*)', bygroups(Punctuation, Text, Name.Tag, Text), ('style-content', 'tag')), # note: this allows tag names not used in HTML like <x:with-dash>, # this is to support yet-unknown template engines and the like (r'(<)(\s*)([\w:.-]+)', bygroups(Punctuation, Text, Name.Tag), 'tag'), (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation)), ], 'tag': [ (r'\s+', Text), (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), 'attr'), (r'[\w:-]+', Name.Attribute), (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), ], 'script-content': [ (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation), '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), # fallback cases for when there is no closing script tag # first look for newline and then go back into root state # if that fails just read the rest of the file # this is similar to the error handling logic in lexer.py (r'.+?\n', using(JavascriptLexer), '#pop'), (r'.+', using(JavascriptLexer), '#pop'), ], 'style-content': [ (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation), '#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), # fallback cases for when there is no closing style tag # first look for newline and then go back into root state # if that fails just read the rest of the file # this is similar to the error handling logic in lexer.py (r'.+?\n', using(CssLexer), '#pop'), (r'.+', using(CssLexer), '#pop'), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): if html_doctype_matches(text): return 0.5
class VhdlLexer(RegexLexer): """ For VHDL source code. .. versionadded:: 1.5 """ name = 'vhdl' aliases = ['vhdl'] filenames = ['*.vhdl', '*.vhd'] mimetypes = ['text/x-vhdl'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char), (r'[~!%^&*+=|?:<>/-]', Operator), (r"'[a-zA-Z_][a-zA-Z0-9_]*", Name.Attribute), (r'[()\[\],.;\']', Punctuation), (r'"[^\n\\]*"', String), (r'(library)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Namespace)), (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)), (r'(use)(\s+)([a-zA-Z_][\.a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Namespace)), (r'(entity|component)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Class)), (r'(architecture|configuration)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)' r'(of)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)(is)', bygroups(Keyword, Text, Name.Class, Text, Keyword, Text, Name.Class, Text, Keyword)), (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'), include('types'), include('keywords'), include('numbers'), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'endblock': [ include('keywords'), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class), (r'(\s+)', Text), (r';', Punctuation, '#pop'), ], 'types': [ (r'(boolean|bit|character|severity_level|integer|time|delay_length|' r'natural|positive|string|bit_vector|file_open_kind|' r'file_open_status|std_ulogic|std_ulogic_vector|std_logic|' r'std_logic_vector)\b', Keyword.Type), ], 'keywords': [ (r'(abs|access|after|alias|all|and|' r'architecture|array|assert|attribute|begin|block|' r'body|buffer|bus|case|component|configuration|' r'constant|disconnect|downto|else|elsif|end|' r'entity|exit|file|for|function|generate|' r'generic|group|guarded|if|impure|in|' r'inertial|inout|is|label|library|linkage|' r'literal|loop|map|mod|nand|new|' r'next|nor|not|null|of|on|' r'open|or|others|out|package|port|' r'postponed|procedure|process|pure|range|record|' r'register|reject|return|rol|ror|select|' r'severity|signal|shared|sla|sli|sra|' r'srl|subtype|then|to|transport|type|' r'units|until|use|variable|wait|when|' r'while|with|xnor|xor)\b', Keyword), ], 'numbers': [ (r'\d{1,2}#[0-9a-fA-F_]+#?', Number.Integer), (r'[0-1_]+(\.[0-1_])', Number.Integer), (r'\d+', Number.Integer), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), (r'H"[0-9a-fA-F_]+"', Number.Oct), (r'O"[0-7_]+"', Number.Oct), (r'B"[0-1_]+"', Number.Oct), ], }
class ScamlLexer(ExtendedRegexLexer): """ For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala. .. versionadded:: 1.4 """ name = 'Scaml' aliases = ['scaml'] filenames = ['*.scaml'] mimetypes = ['text/x-scaml'] flags = re.IGNORECASE # Scaml does not yet support the " |\n" notation to # wrap long lines. Once it does, use the custom faux # dot instead. # _dot = r'(?: \|\n(?=.* \|)|.)' _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), (r'\[' + _dot + r'*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
from pygments.token import ( Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error, ) from pygments.util import get_bool_opt # Local line_re = re.compile('.*?\n') __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer', 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer', 'IPythonConsoleLexer', 'IPyLexer'] ipython_tokens = [ (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)), (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))), (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)), (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)), (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)), (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)), (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)), ] def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`. The lexer inherits from an appropriate Python lexer and then adds
class PugLexer(ExtendedRegexLexer): """ For Pug markup. Pug is a variant of Scaml, see: http://scalate.fusesource.org/documentation/scaml-reference.html .. versionadded:: 1.4 """ name = 'Pug' aliases = ['pug', 'jade'] filenames = ['*.pug', '*.jade'] mimetypes = ['text/x-pug', 'text/x-jade'] flags = re.IGNORECASE _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), (r'[\w:-]+', Name.Tag, 'tag'), (r'\|', Text, 'eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), (r'\[' + _dot + r'*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
tokens = {"root": [(r"`[^`]*?`", String.Backtick), inherit]} ROOT_TOKENS = [ (r"\?", Keyword), (r"\$\w+", Name.Variable), (r"\$\{", Keyword, ("pymode",)), (r"\$\(", Keyword, ("subproc",)), (r"\$\[", Keyword, ("subproc",)), (r"@\(", Keyword, ("pymode",)), inherit, ] PYMODE_TOKENS = [ (r"(.+)(\))", bygroups(using(this), Keyword), "#pop"), (r"(.+)(\})", bygroups(using(this), Keyword), "#pop"), ] SUBPROC_TOKENS = [ (r"(.+)(\))", bygroups(using(XonshSubprocLexer), Keyword), "#pop"), (r"(.+)(\])", bygroups(using(XonshSubprocLexer), Keyword), "#pop"), ] class XonshLexer(PythonLexer): """Xonsh console lexer for pygments.""" name = "Xonsh lexer" aliases = ["xonsh", "xsh"] filenames = ["*.xsh", "*xonshrc"]
class RagelEmbeddedLexer(RegexLexer): """ A lexer for `Ragel`_ embedded in a host language file. This will only highlight Ragel statements. If you want host language highlighting then call the language-specific Ragel lexer. *New in Pygments 1.1.* """ name = 'Embedded Ragel' aliases = ['ragel-em'] filenames = ['*.rl'] tokens = { 'root': [ ( r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^%\'"/#]+', # exclude unsafe characters r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression # / is safe now that we've handled regex and javadoc comments r'/', )) + r')+', Other), # Single Line FSM. # Please don't put a quoted newline in a single line FSM. # That's just mean. It will break this. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation, using(RagelLexer), Punctuation, Text)), # Multi Line FSM. (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'), ], 'multi-line-fsm': [ ( r'(' + r'|'.join(( # keep ragel code in largest possible chunks. r'(' + r'|'.join(( r'[^}\'"\[/#]', # exclude unsafe characters r'}(?=[^%]|$)', # } is okay as long as it's not followed by % r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two... r'[^\\][\\][{}]', # ...and } is okay if it's escaped # allow / if it's preceded with one of these symbols # (ragel EOF actions) r'(>|\$|%|<|@|<>)/', # specifically allow regex followed immediately by * # so it doesn't get mistaken for a comment r'/(?!\*)(\\\\|\\/|[^/])*/\*', # allow / as long as it's not followed by another / or by a * r'/(?=[^/\*]|$)', # We want to match as many of these as we can in one block. # Not sure if we need the + sign here, # does it help performance? )) + r')+', # strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment )) + r')+', using(RagelLexer)), (r'}%%', Punctuation, '#pop'), ] } def analyse_text(text): return '@LANG: indep' in text or 0.1
class GosuLexer(RegexLexer): """ For Gosu source code. *New in Pygments 1.5.* """ name = 'Gosu' aliases = ['gosu'] filenames = ['*.gs', '*.gsx', '*.gsp', '*.vark'] mimetypes = ['text/x-gosu'] flags = re.MULTILINE | re.DOTALL tokens = { 'root': [ # method names ( r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # modifiers etc. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), (r'(in|as|typeof|statictypeof|typeis|typeas|if|else|foreach|for|' r'index|while|do|continue|break|return|try|catch|finally|this|' r'throw|new|switch|case|default|eval|super|outer|classpath|' r'using)\b', Keyword), (r'(var|delegate|construct|function|private|internal|protected|' r'public|abstract|override|final|static|extends|transient|' r'implements|represents|readonly)\b', Keyword.Declaration), (r'(property\s+)(get|set)?', Keyword.Declaration), (r'(boolean|byte|char|double|float|int|long|short|void|block)\b', Keyword.Type), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null|NaN|Infinity)\b', Keyword.Constant), (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword.Declaration, Text, Name.Class)), (r'(uses)(\s+)([a-zA-Z0-9_.]+\*?)', bygroups(Keyword.Namespace, Text, Name.Namespace)), (r'"', String, 'string'), (r'(\??[\.#])([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'(:)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'and|or|not|[\\~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'[0-9]+', Number.Integer), (r'\n', Text) ], 'templateText': [(r'(\\<)|(\\\$)', String), (r'(<%@\s+)(extends|params)', bygroups(Operator, Name.Decorator), 'stringTemplate'), (r'<%!--.*?--%>', Comment.Multiline), (r'(<%)|(<%=)', Operator, 'stringTemplate'), (r'\$\{', Operator, 'stringTemplateShorthand'), (r'.', String)], 'string': [(r'"', String, '#pop'), include('templateText')], 'stringTemplate': [(r'"', String, 'string'), (r'%>', Operator, '#pop'), include('root')], 'stringTemplateShorthand': [(r'"', String, 'string'), (r'\{', Operator, 'stringTemplateShorthand'), (r'\}', Operator, '#pop'), include('root')], }
class CeylonLexer(RegexLexer): """ For `Ceylon <http://ceylon-lang.org/>`_ source code. *New in Pygments 1.6.* """ name = 'Ceylon' aliases = ['ceylon'] filenames = ['*.ceylon'] mimetypes = ['text/x-ceylon'] flags = re.MULTILINE | re.DOTALL #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'root': [ # method names ( r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'(variable|shared|abstract|doc|by|formal|actual|late|native)', Name.Decorator), (r'(break|case|catch|continue|default|else|finally|for|in|' r'variable|if|return|switch|this|throw|try|while|is|exists|dynamic|' r'nonempty|then|outer|assert)\b', Keyword), (r'(abstracts|extends|satisfies|adapts|' r'super|given|of|out|assign|' r'transient|volatile)\b', Keyword.Declaration), (r'(function|value|void)\b', Keyword.Type), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null)\b', Keyword.Constant), (r'(class|interface|object|alias)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char), (r'".*``.*``.*"', String.Interpol), (r'(\.)([a-z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'\d{1,3}(_\d{3})+\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float), (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', Number.Float), (r'[0-9][0-9]*\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float), (r'[0-9][0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', Number.Float), (r'#([0-9a-fA-F]{4})(_[0-9a-fA-F]{4})+', Number.Hex), (r'#[0-9a-fA-F]+', Number.Hex), (r'\$([01]{4})(_[01]{4})+', Number.Integer), (r'\$[01]+', Number.Integer), (r'\d{1,3}(_\d{3})+[kMGTP]?', Number.Integer), (r'[0-9]+[kMGTP]?', Number.Integer), (r'\n', Text) ], 'class': [(r'[A-Za-z_][a-zA-Z0-9_]*', Name.Class, '#pop')], 'import': [(r'[a-z][a-zA-Z0-9_.]*', Name.Namespace, '#pop')], }
def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`. The lexer inherits from an appropriate Python lexer and then adds information about IPython specific keywords (i.e. magic commands, shell commands, etc.) Parameters ---------- python3 : bool If `True`, then build an IPython lexer from a Python 3 lexer. """ # It would be nice to have a single IPython lexer class which takes # a boolean `python3`. But since there are two Python lexer classes, # we will also have two IPython lexer classes. if python3: PyLexer = Python3Lexer name = 'IPython3' aliases = ['ipython3'] doc = """IPython3 Lexer""" else: PyLexer = PythonLexer name = 'IPython' aliases = ['ipython2', 'ipython'] doc = """IPython Lexer""" ipython_tokens = [ (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))), (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))), (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))), (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))), (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))), (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))), (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))), (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))), (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)), (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))), (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)), (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)), (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)), (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)), (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)), ] tokens = PyLexer.tokens.copy() tokens['root'] = ipython_tokens + tokens['root'] attrs = {'name': name, 'aliases': aliases, 'filenames': [], '__doc__': doc, 'tokens': tokens} return type(name, (PyLexer,), attrs)
class KotlinLexer(RegexLexer): """ For `Kotlin <http://confluence.jetbrains.net/display/Kotlin/>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. *New in Pygments 1.5.* """ name = 'Kotlin' aliases = ['kotlin'] filenames = ['*.kt'] mimetypes = ['text/x-kotlin'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in list(levels.items()): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|break|catch|' r'fun|continue|default|delegate|' r'do|else|enum|extern|false|finally|' r'fixed|for|goto|if|implicit|in|interface|' r'internal|is|lock|null|' r'out|override|private|protected|public|readonly|' r'ref|return|sealed|sizeof|' r'when|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|val|var)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|dynamic|float|int|long|' r'short)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(package|using)(\s+)', bygroups(Keyword, Text), 'package'), (cs_ident, Name), ], 'class': [(cs_ident, Name.Class, '#pop')], 'package': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class XonshSubprocLexer(BashLexer): """Lexer for xonsh subproc mode.""" name = 'Xonsh subprocess lexer' tokens = {'root': [(SearchPath, String.Backtick), inherit, ]} ROOT_TOKENS = [(r'\?', Keyword), (r'\$\w+', Name.Variable), (r'\$\{', Keyword, ('pymode', )), (r'[\!\$]\(', Keyword, ('subproc', )), (r'[\!\$]\[', Keyword, ('subproc', )), (r'@\$\(', Keyword, ('subproc', )), (r'@\(', Keyword, ('pymode', )), inherit, ] PYMODE_TOKENS = [(r'(.+)(\))', bygroups(using(this), Keyword), '#pop'), (r'(.+)(\})', bygroups(using(this), Keyword), '#pop'), ] SUBPROC_TOKENS = [ (r'(.+)(\))', bygroups(using(XonshSubprocLexer), Keyword), '#pop'), (r'(.+)(\])', bygroups(using(XonshSubprocLexer), Keyword), '#pop'), ] class XonshLexer(PythonLexer): """Xonsh console lexer for pygments.""" name = 'Xonsh lexer' aliases = ['xonsh', 'xsh'] filenames = ['*.xsh', '*xonshrc']
class VCLLexer(RegexLexer): """ For Varnish Configuration Language (VCL). .. versionadded:: 2.2 """ name = 'VCL' aliases = ['vcl'] filenames = ['*.vcl'] mimetypes = ['text/x-vclsrc'] def analyse_text(text): # If the very first line is 'vcl 4.0;' it's pretty much guaranteed # that this is VCL if text.startswith('vcl 4.0;'): return 1.0 # Skip over comments and blank lines # This is accurate enough that returning 0.9 is reasonable. # Almost no VCL files start without some comments. elif '\nvcl 4\.0;' in text[:1000]: return 0.9 tokens = { 'probe': [ include('whitespace'), include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), include('comments'), (r'[!/]+', Operator), (r';', Punctuation), (r'\d+', Number), (r'\}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), (r'(\.probe)(\s*=\s*)(\{)', bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'statements': [ (r'(\d\.)?\d+[sdwhmy]', Literal.Date), (r'(\d\.)?\d+ms', Literal.Date), (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|' r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|' r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|' r'miss|fetch|restart)\b', Name.Constant), (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), (words( ('obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip', 'bereq.uncacheable'), suffix=r'\b'), Name.Variable), (r'[!%&+*\-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'return', 'set', 'unset', 'import', 'include', 'new', 'rollback', 'call'), suffix=r'\b'), Keyword), (r'storage\.\w+\.\w+\b', Name.Variable), (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), (r'(backend)(\s+\w+)(\s*\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), (r'(probe\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), (r'(acl\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved, Name.Constant, Punctuation)), (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', bygroups(Keyword, Name.Function, Punctuation)), (r'([a-zA-Z_]\w*)' r'(\.)' r'([a-zA-Z_]\w*)' r'(\s*\(.*\))', bygroups(Name.Function, Punctuation, Name.Function, using(this))), ('[a-zA-Z_]\w*', Name), ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'comments': [ (r'#.*$', Comment), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment), ], 'string': [ (r'"', String, '#pop'), (r'[^"\n]+', String), # all other characters ], 'multistring': [ (r'[^"}]', String), (r'"\}', String, '#pop'), (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), (r'\{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation ], 'root': [ include('whitespace'), include('comments'), include('statements'), (r'\s+', Text), ], }