class ProtoBufLexer(RegexLexer): """ Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_ definition files. .. versionadded:: 1.4 """ name = 'Protocol Buffer' aliases = ['protobuf', 'proto'] filenames = ['*.proto'] tokens = { 'root': [ (r'[ \t]+', Text), (r'[,;{}\[\]()<>]', Punctuation), (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), (words(( 'import', 'option', 'optional', 'required', 'repeated', 'default', 'packed', 'ctype', 'extensions', 'to', 'max', 'rpc', 'returns', 'oneof'), prefix=r'\b', suffix=r'\b'), Keyword), (words(( 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'), Keyword.Type), (r'(true|false)\b', Keyword.Constant), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'), (r'(message|extend)(\s+)', bygroups(Keyword.Declaration, Text), 'message'), (r'(enum|group|service)(\s+)', bygroups(Keyword.Declaration, Text), 'type'), (r'\".*?\"', String), (r'\'.*?\'', String), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'(\-?(inf|nan))\b', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'[+-=]', Operator), (r'([a-zA-Z_][\w.]*)([ \t]*)(=)', bygroups(Name.Attribute, Text, Operator)), (r'[a-zA-Z_][\w.]*', Name), ], 'package': [ (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'), default('#pop'), ], 'message': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), default('#pop'), ], 'type': [ (r'[a-zA-Z_]\w*', Name, '#pop'), default('#pop'), ], }
class ScssLexer(RegexLexer): """ For SCSS stylesheets. """ name = 'SCSS' aliases = ['scss'] filenames = ['*.scss'] mimetypes = ['text/x-scss'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'@import', Keyword, 'value'), (r'@for', Keyword, 'for'), (r'@(debug|warn|if|while)', Keyword, 'value'), (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'), (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'), (r'@extend', Keyword, 'selector'), (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'), (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], 'attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*:', Operator, 'value'), default('#pop'), ], 'inline-comment': [ (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline), (r'#\{', String.Interpol, 'interpolation'), (r"\*/", Comment, '#pop'), ], } for group, common in common_sass_tokens.items(): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
class KernelLogLexer(RegexLexer): """ For Linux Kernel log ("dmesg") output. .. versionadded:: 2.6 """ name = 'Kernel log' aliases = ['kmsg', 'dmesg'] filenames = ['*.kmsg', '*.dmesg'] tokens = { 'root': [ (r'^[^:]+:debug : (?=\[)', Text, 'debug'), (r'^[^:]+:info : (?=\[)', Text, 'info'), (r'^[^:]+:warn : (?=\[)', Text, 'warn'), (r'^[^:]+:notice: (?=\[)', Text, 'warn'), (r'^[^:]+:err : (?=\[)', Text, 'error'), (r'^[^:]+:crit : (?=\[)', Text, 'error'), (r'^(?=\[)', Text, 'unknown'), ], 'unknown': [ (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'), (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'), default('info'), ], 'base': [ (r'\[[0-9. ]+\] ', Number), (r'(?<=\] ).+?:', Keyword), (r'\n', Text, '#pop'), ], 'debug': [include('base'), (r'.+\n', Comment, '#pop')], 'info': [include('base'), (r'.+\n', Text, '#pop')], 'warn': [include('base'), (r'.+\n', Generic.Strong, '#pop')], 'error': [include('base'), (r'.+\n', Generic.Error, '#pop')] }
class ZephirLexer(RegexLexer): """ For `Zephir language <http://zephir-lang.com/>`_ source code. Zephir is a compiled high level language aimed to the creation of C-extensions for PHP. .. versionadded:: 2.0 """ name = 'Zephir' aliases = ['zephir'] filenames = ['*.zep'] zephir_keywords = ['fetch', 'echo', 'isset', 'empty'] zephir_type = ['bit', 'bits', 'string'] flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'/', Operator, '#pop'), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|' r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|' r'empty)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|' r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|' r'float|unsigned|private|protected|public|short|static|self|throws|reverse|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' r'window)\b', Name.Builtin), (r'[$a-zA-Z_][\w\\]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] }
class DebianControlLexer(RegexLexer): """ Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. .. versionadded:: 0.9 """ name = 'Debian Control file' aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { 'root': [ (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), ], 'maintainer': [ (r'<[^>]+>', Generic.Strong), (r'<[^>]+>$', Generic.Strong, '#pop'), (r',\n?', Text), (r'.', Text), ], 'description': [ (r'(.*)(Homepage)(: )(\S+)', bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), default('#pop'), ], 'depends': [ (r':\s*', Text), (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), (r'\(', Text, 'depend_vers'), (r',', Text), (r'\|', Operator), (r'[\s]+', Text), (r'[})]\s*$', Text, '#pop'), (r'\}', Text), (r'[^,]$', Name.Function, '#pop'), (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [ (r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'), (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number)) ] }
class TexLexer(RegexLexer): """ Lexer for the TeX and LaTeX typesetting languages. """ name = 'TeX' aliases = ['tex', 'latex'] filenames = ['*.tex', '*.aux', '*.toc'] mimetypes = ['text/x-tex', 'text/x-latex'] tokens = { 'general': [ (r'%.*?\n', Comment), (r'[{}]', Name.Builtin), (r'[&_^]', Name.Builtin), ], 'root': [ (r'\\\[', String.Backtick, 'displaymath'), (r'\\\(', String, 'inlinemath'), (r'\$\$', String.Backtick, 'displaymath'), (r'\$', String, 'inlinemath'), (r'\\([a-zA-Z]+|.)', Keyword, 'command'), (r'\\$', Keyword), include('general'), (r'[^\\$%&_^{}]+', Text), ], 'math': [ (r'\\([a-zA-Z]+|.)', Name.Variable), include('general'), (r'[0-9]+', Number), (r'[-=!+*/()\[\]]', Operator), (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), ], 'inlinemath': [ (r'\\\)', String, '#pop'), (r'\$', String, '#pop'), include('math'), ], 'displaymath': [ (r'\\\]', String, '#pop'), (r'\$\$', String, '#pop'), (r'\$', Name.Builtin), include('math'), ], 'command': [ (r'\[.*?\]', Name.Attribute), (r'\*', Keyword), default('#pop'), ], } def analyse_text(text): for start in ("\\documentclass", "\\input", "\\documentstyle", "\\relax"): if text[:len(start)] == start: return True
class GroffLexer(RegexLexer): """ Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources. .. versionadded:: 0.6 """ name = 'Groff' aliases = ['groff', 'nroff', 'man'] filenames = ['*.[1234567]', '*.man'] mimetypes = ['application/x-troff', 'text/troff'] tokens = { 'root': [ (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]+', Text, 'textline'), default('textline'), ], 'textline': [ include('escapes'), (r'[^\\\n]+', Text), (r'\n', Text, '#pop'), ], 'escapes': [ # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), ], 'request': [ (r'\n', Text, '#pop'), include('escapes'), (r'"[^\n"]+"', String.Double), (r'\d+', Number), (r'\S+', String), (r'\s+', Text), ], } def analyse_text(text): if text[:1] != '.': return False if text[:3] == '.\\"': return True if text[:4] == '.TH ': return True if text[1:3].isalnum() and text[3].isspace(): return 0.9
class CirruLexer(RegexLexer): r""" Syntax rules of Cirru can be found at: http://cirru.org/ * using ``()`` for expressions, but restricted in a same line * using ``""`` for strings, with ``\`` for escaping chars * using ``$`` as folding operator * using ``,`` as unfolding operator * using indentations for nested blocks .. versionadded:: 2.0 """ name = 'Cirru' aliases = ['cirru'] filenames = ['*.cirru'] mimetypes = ['text/x-cirru'] flags = re.MULTILINE tokens = { 'string': [ (r'[^"\\\n]', String), (r'\\', String.Escape, 'escape'), (r'"', String, '#pop'), ], 'escape': [ (r'.', String.Escape, '#pop'), ], 'function': [ (r'\,', Operator, '#pop'), (r'[^\s"()]+', Name.Function, '#pop'), (r'\)', Operator, '#pop'), (r'(?=\n)', Text, '#pop'), (r'\(', Operator, '#push'), (r'"', String, ('#pop', 'string')), (r'[ ]+', Text.Whitespace), ], 'line': [ (r'(?<!\w)\$(?!\w)', Operator, 'function'), (r'\(', Operator, 'function'), (r'\)', Operator), (r'\n', Text, '#pop'), (r'"', String, 'string'), (r'[ ]+', Text.Whitespace), (r'[+-]?[\d.]+\b', Number), (r'[^\s"()]+', Name.Variable) ], 'root': [ (r'^\n+', Text.Whitespace), default(('line', 'function')), ] }
class AwkLexer(RegexLexer): """ For Awk scripts. .. versionadded:: 1.5 """ name = 'Awk' aliases = ['awk', 'gawk', 'mawk', 'nawk'] filenames = ['*.awk'] mimetypes = ['application/x-awk'] tokens = { 'commentsandwhitespace': [ (r'\s+', Text), (r'#.*$', Comment.Single) ], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'\B', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') ], 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|\|\||&&|in\b|\$|!?~|' r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(break|continue|do|while|exit|for|if|else|' r'return)\b', Keyword, 'slashstartsregex'), (r'function\b', Keyword.Declaration, 'slashstartsregex'), (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' r'fflush|getline|next|nextfile|print|printf|strftime|systime|' r'delete|system)\b', Keyword.Reserved), (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' r'RSTART|RT|SUBSEP)\b', Name.Builtin), (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] }
class CapnProtoLexer(RegexLexer): """ For `Cap'n Proto <https://capnproto.org>`_ source. .. versionadded:: 2.2 """ name = 'Cap\'n Proto' filenames = ['*.capnp'] aliases = ['capnp'] flags = re.MULTILINE | re.UNICODE tokens = { 'root': [ (r'#.*?$', Comment.Single), (r'@[0-9a-zA-Z]*', Name.Decorator), (r'=', Literal, 'expression'), (r':', Name.Class, 'type'), (r'\$', Name.Attribute, 'annotation'), (r'(struct|enum|interface|union|import|using|const|annotation|' r'extends|in|of|on|as|with|from|fixed)\b', Keyword), (r'[\w.]+', Name), (r'[^#@=:$\w]+', Text), ], 'type': [ (r'[^][=;,(){}$]+', Name.Class), (r'[\[(]', Name.Class, 'parentype'), default('#pop'), ], 'parentype': [ (r'[^][;()]+', Name.Class), (r'[\[(]', Name.Class, '#push'), (r'[])]', Name.Class, '#pop'), default('#pop'), ], 'expression': [ (r'[^][;,(){}$]+', Literal), (r'[\[(]', Literal, 'parenexp'), default('#pop'), ], 'parenexp': [ (r'[^][;()]+', Literal), (r'[\[(]', Literal, '#push'), (r'[])]', Literal, '#pop'), default('#pop'), ], 'annotation': [ (r'[^][;,(){}=:]+', Name.Attribute), (r'[\[(]', Name.Attribute, 'annexp'), default('#pop'), ], 'annexp': [ (r'[^][;()]+', Name.Attribute), (r'[\[(]', Name.Attribute, '#push'), (r'[])]', Name.Attribute, '#pop'), default('#pop'), ], }
def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', '}', 'cb'), (r'\[', r'\]', r'\]', 'sb'), (r'\(', r'\)', ')', 'pa'), ('<', '>', '>', 'ab'), ('/', '/', '/', 'slas'), (r'\|', r'\|', '|', 'pipe'), ('"', '"', '"', 'quot'), ("'", "'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term, ), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term, ), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, rterm_class, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = \ gen_elixir_sigstr_rules(rterm, rterm_class, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False) return states
class BSTLexer(RegexLexer): """ A lexer for BibTeX bibliography styles. .. versionadded:: 2.2 """ name = 'BST' aliases = ['bst', 'bst-pybtex'] filenames = ['*.bst'] flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ include('whitespace'), (words(['read', 'sort']), Keyword), (words(['execute', 'integers', 'iterate', 'reverse', 'strings']), Keyword, ('group')), (words(['function', 'macro']), Keyword, ('group', 'group')), (words(['entry']), Keyword, ('group', 'group', 'group')), ], 'group': [ include('whitespace'), (r'\{', Punctuation, ('#pop', 'group-end', 'body')), ], 'group-end': [ include('whitespace'), (r'\}', Punctuation, '#pop'), ], 'body': [ include('whitespace'), (r"\'[^#\"\{\}\s]+", Name.Function), (r'[^#\"\{\}\s]+\$', Name.Builtin), (r'[^#\"\{\}\s]+', Name.Variable), (r'"[^\"]*"', String), (r'#-?\d+', Number), (r'\{', Punctuation, ('group-end', 'body')), default('#pop'), ], 'whitespace': [ (r'\s+', Text), ('%.*?$', Comment.SingleLine), ], }
class FortranFixedLexer(RegexLexer): """ Lexer for fixed format Fortran. .. versionadded:: 2.1 """ name = 'FortranFixed' aliases = ['fortranfixed'] filenames = ['*.f', '*.F'] flags = re.IGNORECASE def _lex_fortran(self, match, ctx=None): """Lex a line just as free form fortran without line break.""" lexer = FortranLexer() text = match.group(0) + "\n" for index, token, value in lexer.get_tokens_unprocessed(text): value = value.replace('\n', '') if value != '': yield index, token, value tokens = { 'root': [ (r'[C*].*\n', Comment), (r'#.*\n', Comment.Preproc), (r' {0,4}!.*\n', Comment), (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), ('.', Generic.Strong, 'code'), ], 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), default('root'), ] }
class RegeditLexer(RegexLexer): """ Lexer for `Windows Registry <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced by regedit. .. versionadded:: 1.6 """ name = 'reg' aliases = ['registry'] filenames = ['*.reg'] mimetypes = ['text/x-windows-registry'] tokens = { 'root': [ (r'Windows Registry Editor.*', Text), (r'\s+', Text), (r'[;#].*', Comment.Single), (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', bygroups(Keyword, Operator, Name.Builtin, Keyword)), # String keys, which obey somewhat normal escaping (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', bygroups(Name.Attribute, Text, Operator, Text), 'value'), # Bare keys (includes @) (r'(.*?)([ \t]*)(=)([ \t]*)', bygroups(Name.Attribute, Text, Operator, Text), 'value'), ], 'value': [ (r'-', Operator, '#pop'), # delete value (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', bygroups(Name.Variable, Punctuation, Number), '#pop'), # As far as I know, .reg files do not support line continuation. (r'.+', String, '#pop'), default('#pop'), ] } def analyse_text(text): return text.startswith('Windows Registry Editor')
class GDScriptLexer(RegexLexer): """ For `GDScript source code <https://www.godotengine.org>`_. """ name = "GDScript" aliases = ["gdscript", "gd"] filenames = ["*.gd"] mimetypes = ["text/x-gdscript", "application/x-gdscript"] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting ( r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?" "[hlL]?[E-GXc-giorsux%]", String.Interpol, ), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r"%", ttype), # newlines are an error (use "nl" state) ] tokens = { "root": [ (r"\n", Text), ( r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc), ), ( r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc), ), (r"[^\S\n]+", Text), (r"#.*$", Comment.Single), (r"[]{}:(),;[]", Punctuation), (r"\\\n", Text), (r"\\", Text), (r"(in|and|or|not)\b", Operator.Word), ( r"!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|$]", Operator, ), include("keywords"), (r"(func)((?:\s|\\\s)+)", bygroups(Keyword, Text), "funcname"), (r"(class)((?:\s|\\\s)+)", bygroups(Keyword, Text), "classname"), include("builtins"), ( '([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), "tdqs", ), ( "([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), "tsqs", ), ( '([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), "dqs", ), ( "([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), "sqs", ), ( '([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined("stringescape", "tdqs"), ), ( "([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined("stringescape", "tsqs"), ), ( '([uUbB]?)(")', bygroups(String.Affix, String.Double), combined("stringescape", "dqs"), ), ( "([uUbB]?)(')", bygroups(String.Affix, String.Single), combined("stringescape", "sqs"), ), include("name"), include("numbers"), ], "keywords": [ ( words( ( "and", "in", "not", "or", "as", "breakpoint", "class", "class_name", "extends", "is", "func", "setget", "signal", "tool", "const", "enum", "export", "onready", "static", "var", "break", "continue", "if", "elif", "else", "for", "pass", "return", "match", "while", "remote", "master", "puppet", "remotesync", "mastersync", "puppetsync", ), suffix=r"\b", ), Keyword, ), ], "builtins": [ ( words( ( "Color8", "ColorN", "abs", "acos", "asin", "assert", "atan", "atan2", "bytes2var", "ceil", "char", "clamp", "convert", "cos", "cosh", "db2linear", "decimals", "dectime", "deg2rad", "dict2inst", "ease", "exp", "floor", "fmod", "fposmod", "funcref", "hash", "inst2dict", "instance_from_id", "is_inf", "is_nan", "lerp", "linear2db", "load", "log", "max", "min", "nearest_po2", "pow", "preload", "print", "print_stack", "printerr", "printraw", "prints", "printt", "rad2deg", "rand_range", "rand_seed", "randf", "randi", "randomize", "range", "round", "seed", "sign", "sin", "sinh", "sqrt", "stepify", "str", "str2var", "tan", "tan", "tanh", "type_exist", "typeof", "var2bytes", "var2str", "weakref", "yield", ), prefix=r"(?<!\.)", suffix=r"\b", ), Name.Builtin, ), (r"((?<!\.)(self|false|true)|(PI|TAU|NAN|INF)" r")\b", Name.Builtin.Pseudo), ( words( ( "bool", "int", "float", "String", "NodePath", "Vector2", "Rect2", "Transform2D", "Vector3", "Rect3", "Plane", "Quat", "Basis", "Transform", "Color", "RID", "Object", "NodePath", "Dictionary", "Array", "PackedByteArray", "PackedInt32Array", "PackedInt64Array", "PackedFloat32Array", "PackedFloat64Array", "PackedStringArray", "PackedVector2Array", "PackedVector3Array", "PackedColorArray", "null", ), prefix=r"(?<!\.)", suffix=r"\b", ), Name.Builtin.Type, ), ], "numbers": [ (r"(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?", Number.Float), (r"\d+[eE][+-]?[0-9]+j?", Number.Float), (r"0[xX][a-fA-F0-9]+", Number.Hex), (r"\d+j?", Number.Integer), ], "name": [(r"[a-zA-Z_]\w*", Name)], "funcname": [(r"[a-zA-Z_]\w*", Name.Function, "#pop"), default("#pop")], "classname": [(r"[a-zA-Z_]\w*", Name.Class, "#pop")], "stringescape": [( r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r"U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})", String.Escape, )], "strings-single": innerstring_rules(String.Single), "strings-double": innerstring_rules(String.Double), "dqs": [ (r'"', String.Double, "#pop"), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include("strings-double"), ], "sqs": [ (r"'", String.Single, "#pop"), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include("strings-single"), ], "tdqs": [ (r'"""', String.Double, "#pop"), include("strings-double"), (r"\n", String.Double), ], "tsqs": [ (r"'''", String.Single, "#pop"), include("strings-single"), (r"\n", String.Single), ], } def analyse_text(text): score = 0.0 if re.search(r"func (_ready|_init|_input|_process|_unhandled_input)", text): score += 0.8 if re.search( r"(extends |class_name |onready |preload|load|setget|func [^_])", text): score += 0.4 if re.search(r"(var|const|enum|export|signal|tool)", text): score += 0.2 return min(score, 1.0)
class VBScriptLexer(RegexLexer): """ VBScript is scripting language that is modeled on Visual Basic. .. versionadded:: 2.4 """ name = 'VBScript' aliases = ['vbscript'] filenames = ['*.vbs', '*.VBS'] flags = re.IGNORECASE tokens = { 'root': [ (r"'[^\n]*", Comment.Single), (r'\s+', Whitespace), ('"', String.Double, 'string'), ('&h[0-9a-f]+', Number.Hex), # Float variant 1, for example: 1., 1.e2, 1.2e3 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Float variant 2, for example: .1, .1e2 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Float variant 3, for example: 123e45 (r'[0-9]+', Number.Integer), ('#.+#', String), # date or time value (r'(dim)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Variable), 'dim_more'), (r'(function|sub)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Function)), (r'(class)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Class)), (r'(const)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Constant)), (r'(end)(\s+)(class|function|if|property|sub|with)', bygroups(Keyword, Whitespace, Keyword)), (r'(on)(\s+)(error)(\s+)(goto)(\s+)(0)', bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Number.Integer)), (r'(on)(\s+)(error)(\s+)(resume)(\s+)(next)', bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Keyword)), (r'(option)(\s+)(explicit)', bygroups(Keyword, Whitespace, Keyword)), (r'(property)(\s+)(get|let|set)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace, Name.Property)), (r'rem\s.*[^\n]*', Comment.Single), (words(_vbscript_builtins.KEYWORDS, suffix=r'\b'), Keyword), (words(_vbscript_builtins.OPERATORS), Operator), (words(_vbscript_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), (words(_vbscript_builtins.BUILTIN_CONSTANTS, suffix=r'\b'), Name.Constant), (words(_vbscript_builtins.BUILTIN_FUNCTIONS, suffix=r'\b'), Name.Builtin), (words(_vbscript_builtins.BUILTIN_VARIABLES, suffix=r'\b'), Name.Builtin), (r'[a-z_][a-z0-9_]*', Name), (r'\b_\n', Operator), (words(r'(),.:'), Punctuation), (r'.+(\n)?', Error) ], 'dim_more': [ (r'(\s*)(,)(\s*)([a-z_][a-z0-9]*)', bygroups(Whitespace, Punctuation, Whitespace, Name.Variable)), default('#pop'), ], 'string': [ (r'[^"\n]+', String.Double), (r'\"\"', String.Double), (r'"', String.Double, '#pop'), (r'\n', Error, '#pop'), # Unterminated string ], }
class BBCBasicLexer(RegexLexer): """ BBC Basic was supplied on the BBC Micro, and later Acorn RISC OS. It is also used by BBC Basic For Windows. .. versionadded:: 2.4 """ base_keywords = ['OTHERWISE', 'AND', 'DIV', 'EOR', 'MOD', 'OR', 'ERROR', 'LINE', 'OFF', 'STEP', 'SPC', 'TAB', 'ELSE', 'THEN', 'OPENIN', 'PTR', 'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'ABS', 'ACS', 'ADVAL', 'ASC', 'ASN', 'ATN', 'BGET', 'COS', 'COUNT', 'DEG', 'ERL', 'ERR', 'EVAL', 'EXP', 'EXT', 'FALSE', 'FN', 'GET', 'INKEY', 'INSTR', 'INT', 'LEN', 'LN', 'LOG', 'NOT', 'OPENUP', 'OPENOUT', 'PI', 'POINT', 'POS', 'RAD', 'RND', 'SGN', 'SIN', 'SQR', 'TAN', 'TO', 'TRUE', 'USR', 'VAL', 'VPOS', 'CHR$', 'GET$', 'INKEY$', 'LEFT$', 'MID$', 'RIGHT$', 'STR$', 'STRING$', 'EOF', 'PTR', 'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'SOUND', 'BPUT', 'CALL', 'CHAIN', 'CLEAR', 'CLOSE', 'CLG', 'CLS', 'DATA', 'DEF', 'DIM', 'DRAW', 'END', 'ENDPROC', 'ENVELOPE', 'FOR', 'GOSUB', 'GOTO', 'GCOL', 'IF', 'INPUT', 'LET', 'LOCAL', 'MODE', 'MOVE', 'NEXT', 'ON', 'VDU', 'PLOT', 'PRINT', 'PROC', 'READ', 'REM', 'REPEAT', 'REPORT', 'RESTORE', 'RETURN', 'RUN', 'STOP', 'COLOUR', 'TRACE', 'UNTIL', 'WIDTH', 'OSCLI'] basic5_keywords = ['WHEN', 'OF', 'ENDCASE', 'ENDIF', 'ENDWHILE', 'CASE', 'CIRCLE', 'FILL', 'ORIGIN', 'POINT', 'RECTANGLE', 'SWAP', 'WHILE', 'WAIT', 'MOUSE', 'QUIT', 'SYS', 'INSTALL', 'LIBRARY', 'TINT', 'ELLIPSE', 'BEATS', 'TEMPO', 'VOICES', 'VOICE', 'STEREO', 'OVERLAY', 'APPEND', 'AUTO', 'CRUNCH', 'DELETE', 'EDIT', 'HELP', 'LIST', 'LOAD', 'LVAR', 'NEW', 'OLD', 'RENUMBER', 'SAVE', 'TEXTLOAD', 'TEXTSAVE', 'TWIN', 'TWINO', 'INSTALL', 'SUM', 'BEAT'] name = 'BBC Basic' aliases = ['bbcbasic'] filenames = ['*.bbc'] tokens = { 'root': [ (r"[0-9]+", Name.Label), (r"(\*)([^\n]*)", bygroups(Keyword.Pseudo, Comment.Special)), default('code'), ], 'code': [ (r"(REM)([^\n]*)", bygroups(Keyword.Declaration, Comment.Single)), (r'\n', Whitespace, 'root'), (r'\s+', Whitespace), (r':', Comment.Preproc), # Some special cases to make functions come out nicer (r'(DEF)(\s*)(FN|PROC)([A-Za-z_@][\w@]*)', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Name.Function)), (r'(FN|PROC)([A-Za-z_@][\w@]*)', bygroups(Keyword, Name.Function)), (r'(GOTO|GOSUB|THEN|RESTORE)(\s*)(\d+)', bygroups(Keyword, Whitespace, Name.Label)), (r'(TRUE|FALSE)', Keyword.Constant), (r'(PAGE|LOMEM|HIMEM|TIME|WIDTH|ERL|ERR|REPORT\$|POS|VPOS|VOICES)', Keyword.Pseudo), (words(base_keywords), Keyword), (words(basic5_keywords), Keyword), ('"', String.Double, 'string'), ('%[01]{1,32}', Number.Bin), ('&[0-9a-f]{1,8}', Number.Hex), (r'[+-]?[0-9]+\.[0-9]*(E[+-]?[0-9]+)?', Number.Float), (r'[+-]?\.[0-9]+(E[+-]?[0-9]+)?', Number.Float), (r'[+-]?[0-9]+E[+-]?[0-9]+', Number.Float), (r'[+-]?\d+', Number.Integer), (r'([A-Za-z_@][\w@]*[%$]?)', Name.Variable), (r'([+\-]=|[$!|?+\-*/%^=><();]|>=|<=|<>|<<|>>|>>>|,)', Operator), ], 'string': [ (r'[^"\n]+', String.Double), (r'"', String.Double, '#pop'), (r'\n', Error, 'root'), # Unterminated string ], } def analyse_text(text): if text.startswith('10REM >') or text.startswith('REM >'): return 0.9
class KokaLexer(RegexLexer): """ Lexer for the `Koka <http://koka.codeplex.com>`_ language. .. versionadded:: 1.6 """ name = 'Koka' aliases = ['koka'] filenames = ['*.kk', '*.kki'] mimetypes = ['text/x-koka'] keywords = [ 'infix', 'infixr', 'infixl', 'type', 'cotype', 'rectype', 'alias', 'struct', 'con', 'fun', 'function', 'val', 'var', 'external', 'if', 'then', 'else', 'elif', 'return', 'match', 'private', 'public', 'private', 'module', 'import', 'as', 'include', 'inline', 'rec', 'try', 'yield', 'enum', 'interface', 'instance', ] # keywords that are followed by a type typeStartKeywords = [ 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', ] # keywords valid in a type typekeywords = [ 'forall', 'exists', 'some', 'with', ] # builtin names and special names builtin = [ 'for', 'while', 'repeat', 'foreach', 'foreach-indexed', 'error', 'catch', 'finally', 'cs', 'js', 'file', 'ref', 'assigned', ] # symbols that can be in an operator symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' # symbol boundary: an operator keyword should not be followed by any of these sboundary = '(?!' + symbols + ')' # name boundary: a keyword should not be followed by any of these boundary = r'(?![\w/])' # koka token abstractions tokenType = Name.Attribute tokenTypeDef = Name.Class tokenConstructor = Generic.Emph # main lexer tokens = { 'root': [ include('whitespace'), # go into type mode (r'::?' + sboundary, tokenType, 'type'), (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'alias-type'), (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'struct-type'), ((r'(%s)' % '|'.join(typeStartKeywords)) + r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'type'), # special sequences of tokens (we use ?: for non-capturing group as # required by 'bygroups') (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', bygroups(Keyword, Text, Keyword, Name.Namespace)), (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' r'((?:[a-z]\w*/)*[a-z]\w*))?', bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, Keyword, Name.Namespace)), (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', bygroups(Keyword, Text, Name.Function)), (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' r'([a-z]\w*|\((?:' + symbols + r'|/)\))', bygroups(Keyword, Text, Keyword, Name.Function)), # keywords (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), (r'(%s)' % '|'.join(keywords) + boundary, Keyword), (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), (r'::?|:=|\->|[=.]' + sboundary, Keyword), # names (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace, tokenConstructor)), (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', bygroups(Name.Namespace, Name)), (r'_\w*', Name.Variable), # literal string (r'@"', String.Double, 'litstring'), # operators (symbols + "|/(?![*/])", Operator), (r'`', Operator), (r'[{}()\[\];,]', Punctuation), # literals. No check for literal characters with len > 1 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), (r'0[xX][0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r"'", String.Char, 'char'), (r'"', String.Double, 'string'), ], # type started by alias 'alias-type': [(r'=', Keyword), include('type')], # type started by struct 'struct-type': [(r'(?=\((?!,*\)))', Punctuation, '#pop'), include('type')], # type started by colon 'type': [(r'[(\[<]', tokenType, 'type-nested'), include('type-content')], # type nested in brackets: can contain parameters, comma etc. 'type-nested': [ (r'[)\]>]', tokenType, '#pop'), (r'[(\[<]', tokenType, 'type-nested'), (r',', tokenType), (r'([a-z]\w*)(\s*)(:)(?!:)', bygroups(Name, Text, tokenType)), # parameter name include('type-content') ], # shared contents of a type 'type-content': [ include('whitespace'), # keywords (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', Keyword, '#pop'), # need to match because names overlap... # kinds (r'[EPHVX]' + boundary, tokenType), # type names (r'[a-z][0-9]*(?![\w/])', tokenType), (r'_\w*', tokenType.Variable), # Generic.Emph (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace, tokenType)), (r'((?:[a-z]\w*/)*)([a-z]\w+)', bygroups(Name.Namespace, tokenType)), # type keyword operators (r'::|->|[.:|]', tokenType), # catchall default('#pop') ], # comments and literals 'whitespace': [(r'\n\s*#.*$', Comment.Preproc), (r'\s+', Text), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment.Single)], 'comment': [ (r'[^/*]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'litstring': [ (r'[^"]+', String.Double), (r'""', String.Escape), (r'"', String.Double, '#pop'), ], 'string': [ (r'[^\\"\n]+', String.Double), include('escape-sequence'), (r'["\n]', String.Double, '#pop'), ], 'char': [ (r'[^\\\'\n]+', String.Char), include('escape-sequence'), (r'[\'\n]', String.Char, '#pop'), ], 'escape-sequence': [ (r'\\[nrt\\"\']', String.Escape), (r'\\x[0-9a-fA-F]{2}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), # Yes, \U literals are 6 hex digits. (r'\\U[0-9a-fA-F]{6}', String.Escape) ] }
class MonkeyLexer(RegexLexer): """ For `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_ source code. .. versionadded:: 1.6 """ name = 'Monkey' aliases = ['monkey'] filenames = ['*.monkey'] mimetypes = ['text/x-monkey'] name_variable = r'[a-z_]\w*' name_function = r'[A-Z]\w*' name_constant = r'[A-Z_][A-Z0-9_]*' name_class = r'[A-Z]\w*' name_module = r'[a-z0-9_]*' keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)' # ? == Bool // % == Int // # == Float // $ == String keyword_type_special = r'[?%#$]' flags = re.MULTILINE tokens = { 'root': [ # Text (r'\s+', Text), # Comments (r"'.*", Comment), (r'(?i)^#rem\b', Comment.Multiline, 'comment'), # preprocessor directives (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b', Comment.Preproc), # preprocessor variable (any line starting with '#' that is not a directive) (r'^#', Comment.Preproc, 'variables'), # String ('"', String.Double, 'string'), # Numbers (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), (r'\.[0-9]+(?!\.)', Number.Float), (r'[0-9]+', Number.Integer), (r'\$[0-9a-fA-Z]+', Number.Hex), (r'\%[10]+', Number.Bin), # Native data types (r'\b%s\b' % keyword_type, Keyword.Type), # Exception handling (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved), (r'Throwable', Name.Exception), # Builtins (r'(?i)\b(?:Null|True|False)\b', Name.Builtin), (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo), (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant), # Keywords (r'(?i)^(Import)(\s+)(.*)(\n)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text)), (r'(?i)^Strict\b.*\n', Keyword.Reserved), (r'(?i)(Const|Local|Global|Field)(\s+)', bygroups(Keyword.Declaration, Text), 'variables'), (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)', bygroups(Keyword.Reserved, Text), 'classname'), (r'(?i)(Function|Method)(\s+)', bygroups(Keyword.Reserved, Text), 'funcname'), (r'(?i)(?:End|Return|Public|Private|Extern|Property|' r'Final|Abstract)\b', Keyword.Reserved), # Flow Control stuff (r'(?i)(?:If|Then|Else|ElseIf|EndIf|' r'Select|Case|Default|' r'While|Wend|' r'Repeat|Until|Forever|' r'For|To|Until|Step|EachIn|Next|' r'Exit|Continue)\s+', Keyword.Reserved), # not used yet (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved), # Array (r'[\[\]]', Punctuation), # Other (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator), (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word), (r'[(){}!#,.:]', Punctuation), # catch the rest (r'%s\b' % name_constant, Name.Constant), (r'%s\b' % name_function, Name.Function), (r'%s\b' % name_variable, Name.Variable), ], 'funcname': [ (r'(?i)%s\b' % name_function, Name.Function), (r':', Punctuation, 'classname'), (r'\s+', Text), (r'\(', Punctuation, 'variables'), (r'\)', Punctuation, '#pop') ], 'classname': [ (r'%s\.' % name_module, Name.Namespace), (r'%s\b' % keyword_type, Keyword.Type), (r'%s\b' % name_class, Name.Class), # array (of given size) (r'(\[)(\s*)(\d*)(\s*)(\])', bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)), # generics (r'\s+(?!<)', Text, '#pop'), (r'<', Punctuation, '#push'), (r'>', Punctuation, '#pop'), (r'\n', Text, '#pop'), default('#pop') ], 'variables': [ (r'%s\b' % name_constant, Name.Constant), (r'%s\b' % name_variable, Name.Variable), (r'%s' % keyword_type_special, Keyword.Type), (r'\s+', Text), (r':', Punctuation, 'classname'), (r',', Punctuation, '#push'), default('#pop') ], 'string': [ (r'[^"~]+', String.Double), (r'~q|~n|~r|~t|~z|~~', String.Escape), (r'"', String.Double, '#pop'), ], 'comment': [ (r'(?i)^#rem.*?', Comment.Multiline, "#push"), (r'(?i)^#end.*?', Comment.Multiline, "#pop"), (r'\n', Comment.Multiline), (r'.+', Comment.Multiline), ], }
class QVToLexer(RegexLexer): """ For the `QVT Operational Mapping language <http://www.omg.org/spec/QVT/1.1/>`_. Reference for implementing this: «Meta Object Facility (MOF) 2.0 Query/View/Transformation Specification», Version 1.1 - January 2011 (http://www.omg.org/spec/QVT/1.1/), see §8.4, «Concrete Syntax» in particular. Notable tokens assignments: - Name.Class is assigned to the identifier following any of the following keywords: metamodel, class, exception, primitive, enum, transformation or library - Name.Function is assigned to the names of mappings and queries - Name.Builtin.Pseudo is assigned to the pre-defined variables 'this', 'self' and 'result'. """ # With obvious borrowings & inspiration from the Java, Python and C lexers name = 'QVTO' aliases = ['qvto', 'qvt'] filenames = ['*.qvto'] tokens = { 'root': [ (r'\n', Text), (r'[^\S\n]+', Text), (r'(--|//)(\s*)(directive:)?(.*)$', bygroups(Comment, Comment, Comment.Preproc, Comment)), # Uncomment the following if you want to distinguish between # '/*' and '/**', à la javadoc # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\\\n', Text), (r'(and|not|or|xor|##?)\b', Operator.Word), (r'(:{1,2}=|[-+]=)\b', Operator.Word), (r'(@|<<|>>)\b', Keyword), # stereotypes (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), (r'[]{}:(),;[]', Punctuation), (r'(true|false|unlimited|null)\b', Keyword.Constant), (r'(this|self|result)\b', Name.Builtin.Pseudo), (r'(var)\b', Keyword.Declaration), (r'(from|import)\b', Keyword.Namespace, 'fromimport'), (r'(metamodel|class|exception|primitive|enum|transformation|' r'library)(\s+)(\w+)', bygroups(Keyword.Word, Text, Name.Class)), (r'(exception)(\s+)(\w+)', bygroups(Keyword.Word, Text, Name.Exception)), (r'(main)\b', Name.Function), (r'(mapping|helper|query)(\s+)', bygroups(Keyword.Declaration, Text), 'operation'), (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'), (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b', Keyword.Type), include('keywords'), ('"', String, combined('stringescape', 'dqs')), ("'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)', # bygroups(Text, Text, Text)), ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), (r'[a-zA-Z_][\w.]*', Name.Namespace), default('#pop'), ], 'operation': [ (r'::', Text), (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()', bygroups(Text, Name.Function, Text, Punctuation), '#pop') ], 'assert': [ (r'(warning|error|fatal)\b', Keyword, '#pop'), default('#pop'), # all else: go back ], 'keywords': [ (words(( 'abstract', 'access', 'any', 'assert', 'blackbox', 'break', 'case', 'collect', 'collectNested', 'collectOne', 'collectselect', 'collectselectOne', 'composes', 'compute', 'configuration', 'constructor', 'continue', 'datatype', 'default', 'derived', 'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except', 'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if', 'implies', 'in', 'inherits', 'init', 'inout', 'intermediate', 'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn', 'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map', 'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out', 'package', 'population', 'property', 'raise', 'readonly', 'references', 'refines', 'reject', 'resolve', 'resolveIn', 'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne', 'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef', 'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect', 'xmap', 'xselect'), suffix=r'\b'), Keyword), ], # There is no need to distinguish between String.Single and # String.Double: 'strings' is factorised for 'dqs' and 'sqs' 'strings': [ (r'[^\\\'"\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), ], 'stringescape': [ (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape) ], 'dqs': [ # double-quoted string (r'"', String, '#pop'), (r'\\\\|\\"', String.Escape), include('strings') ], 'sqs': [ # single-quoted string (r"'", String, '#pop'), (r"\\\\|\\'", String.Escape), include('strings') ], 'name': [ (r'[a-zA-Z_]\w*', Name), ], # numbers: excerpt taken from the python lexer 'numbers': [ (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+[eE][+-]?[0-9]+', Number.Float), (r'\d+', Number.Integer) ], }
class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. MIR is a human readable serialization format that's used to represent LLVM's machine specific intermediate representation. It allows LLVM's developers to see the state of the compilation process at various points, as well as test individual pieces of the compiler. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR' aliases = ['llvm-mir'] filenames = ['*.mir'] tokens = { 'root': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents starting with | are LLVM-IR (r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR (r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency (r'[^-#]+|.', Text), ], 'llvm_ir': [ # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), ], 'llvm_mir': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes (r'name:', Keyword, 'name'), (words(('alignment', ), suffix=':'), Keyword, 'number'), (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'), suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'), suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer (r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else (r'.+', Text), (r'\n', Text), ], 'name': [ (r'[^\n]+', Name), default('#pop'), ], 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop'), ], 'number': [ (r' *[0-9]+', Number), default('#pop'), ], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), ], }
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words(('@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool', '@import'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation (r'([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [(r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop')], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods ( r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from typecode._vendor.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ # preprocessor directives: without whitespace (r'^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words( ('asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread', 'typename'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (r'[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'(include)(' + _ws1 + r')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } stdlib_types = set( ('size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t')) c99_types = set( ('_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t')) linux_types = set( ('clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t')) def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type elif self.platformhighlighting and value in self.linux_types: token = Keyword.Type yield index, token, value
class SassLexer(ExtendedRegexLexer): """ For Sass stylesheets. .. versionadded:: 1.3 """ name = 'Sass' aliases = ['sass'] filenames = ['*.sass'] mimetypes = ['text/x-sass'] flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'content': [ (r'//[^\n]*', _starts_block(Comment.Single, 'single-comment'), 'root'), (r'/\*[^\n]*', _starts_block(Comment.Multiline, 'multi-comment'), 'root'), (r'@import', Keyword, 'import'), (r'@for', Keyword, 'for'), (r'@(debug|warn|if|while)', Keyword, 'value'), (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'), (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'), (r'@extend', Keyword, 'selector'), (r'@[\w-]+', Keyword, 'selector'), (r'=[\w-]+', Name.Function, 'value'), (r'\+[\w-]+', Name.Decorator, 'value'), (r'([!$][\w-]\w*)([ \t]*(?:(?:\|\|)?=|:))', bygroups(Name.Variable, Operator), 'value'), (r':', Name.Attribute, 'old-style-attr'), (r'(?=.+?[=:]([^a-z]|$))', Name.Attribute, 'new-style-attr'), default('selector'), ], 'single-comment': [ (r'.+', Comment.Single), (r'\n', Text, 'root'), ], 'multi-comment': [ (r'.+', Comment.Multiline), (r'\n', Text, 'root'), ], 'import': [ (r'[ \t]+', Text), (r'\S+', String), (r'\n', Text, 'root'), ], 'old-style-attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*=', Operator, 'value'), default('value'), ], 'new-style-attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*[=:]', Operator, 'value'), ], 'inline-comment': [ (r"(\\#|#(?=[^\n{])|\*(?=[^\n/])|[^\n#*])+", Comment.Multiline), (r'#\{', String.Interpol, 'interpolation'), (r"\*/", Comment, '#pop'), ], } for group, common in common_sass_tokens.items(): tokens[group] = copy.copy(common) tokens['value'].append((r'\n', Text, 'root')) tokens['selector'].append((r'\n', Text, 'root'))
class KconfigLexer(RegexLexer): """ For Linux-style Kconfig files. .. versionadded:: 1.6 """ name = 'Kconfig' aliases = ['kconfig', 'menuconfig', 'linux-config', 'kernel-config'] # Adjust this if new kconfig file names appear in your environment filenames = [ 'Kconfig*', '*Config.in*', 'external.in*', 'standard-modules.in' ] mimetypes = ['text/x-kconfig'] # No re.MULTILINE, indentation-aware help text needs line-by-line handling flags = 0 def call_indent(level): # If indentation >= {level} is detected, enter state 'indent{level}' return (_rx_indent(level), String.Doc, 'indent%s' % level) def do_indent(level): # Print paragraphs of indentation level >= {level} as String.Doc, # ignoring blank lines. Then return to 'root' state. return [(_rx_indent(level), String.Doc), (r'\s*\n', Text), default('#pop:2')] tokens = { 'root': [ (r'\s+', Text), (r'#.*?\n', Comment.Single), (words(('mainmenu', 'config', 'menuconfig', 'choice', 'endchoice', 'comment', 'menu', 'endmenu', 'visible if', 'if', 'endif', 'source', 'prompt', 'select', 'depends on', 'default', 'range', 'option'), suffix=r'\b'), Keyword), (r'(---help---|help)[\t ]*\n', Keyword, 'help'), (r'(bool|tristate|string|hex|int|defconfig_list|modules|env)\b', Name.Builtin), (r'[!=&|]', Operator), (r'[()]', Punctuation), (r'[0-9]+', Number.Integer), (r"'(''|[^'])*'", String.Single), (r'"(""|[^"])*"', String.Double), (r'\S+', Text), ], # Help text is indented, multi-line and ends when a lower indentation # level is detected. 'help': [ # Skip blank lines after help token, if any (r'\s*\n', Text), # Determine the first help line's indentation level heuristically(!). # Attention: this is not perfect, but works for 99% of "normal" # indentation schemes up to a max. indentation level of 7. call_indent(7), call_indent(6), call_indent(5), call_indent(4), call_indent(3), call_indent(2), call_indent(1), default('#pop'), # for incomplete help sections without text ], # Handle text for indentation levels 7 to 1 'indent7': do_indent(7), 'indent6': do_indent(6), 'indent5': do_indent(5), 'indent4': do_indent(4), 'indent3': do_indent(3), 'indent2': do_indent(2), 'indent1': do_indent(1), }
class CssLexer(RegexLexer): """ For CSS (Cascading Style Sheets). """ name = 'CSS' aliases = ['css'] filenames = ['*.css'] mimetypes = ['text/css'] tokens = { 'root': [ include('basics'), ], 'basics': [ (r'\s+', Text), (r'/\*(?:.|\n)*?\*/', Comment), (r'\{', Punctuation, 'content'), (r'(\:{1,2})([\w-]+)', bygroups(Punctuation, Name.Decorator)), (r'(\.)([\w-]+)', bygroups(Punctuation, Name.Class)), (r'(\#)([\w-]+)', bygroups(Punctuation, Name.Namespace)), (r'(@)([\w-]+)', bygroups(Punctuation, Keyword), 'atrule'), (r'[\w-]+', Name.Tag), (r'[~^*!%&$\[\]()<>|+=@:;,./?-]', Operator), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ], 'atrule': [ (r'\{', Punctuation, 'atcontent'), (r';', Punctuation, '#pop'), include('basics'), ], 'atcontent': [ include('basics'), (r'\}', Punctuation, '#pop:2'), ], 'content': [ (r'\s+', Text), (r'\}', Punctuation, '#pop'), (r';', Punctuation), (r'^@.*?$', Comment.Preproc), (words(_vendor_prefixes,), Keyword.Pseudo), (r'('+r'|'.join(_css_properties)+r')(\s*)(\:)', bygroups(Keyword, Text, Punctuation), 'value-start'), (r'([-]+[a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name.Variable, Text, Punctuation), 'value-start'), (r'([a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name, Text, Punctuation), 'value-start'), (r'/\*(?:.|\n)*?\*/', Comment), ], 'value-start': [ (r'\s+', Text), (words(_vendor_prefixes,), Name.Builtin.Pseudo), include('urls'), (r'('+r'|'.join(_functional_notation_keyword_values)+r')(\()', bygroups(Name.Builtin, Punctuation), 'function-start'), (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation), 'function-start'), (words(_keyword_values, suffix=r'\b'), Keyword.Constant), (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant), (words(_color_keywords, suffix=r'\b'), Keyword.Constant), # for transition-property etc. (words(_css_properties, suffix=r'\b'), Keyword), (r'\!important', Comment.Preproc), (r'/\*(?:.|\n)*?\*/', Comment), include('numeric-values'), (r'[~^*!%&<>|+=@:./?-]+', Operator), (r'[\[\](),]+', Punctuation), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'[a-zA-Z_][\w-]*', Name), (r';', Punctuation, '#pop'), (r'\}', Punctuation, '#pop:2'), ], 'function-start': [ (r'\s+', Text), (r'[-]+([\w+]+[-]*)+', Name.Variable), include('urls'), (words(_vendor_prefixes,), Keyword.Pseudo), (words(_keyword_values, suffix=r'\b'), Keyword.Constant), (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant), (words(_color_keywords, suffix=r'\b'), Keyword.Constant), # function-start may be entered recursively (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()', bygroups(Name.Builtin, Punctuation), 'function-start'), (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation), 'function-start'), (r'/\*(?:.|\n)*?\*/', Comment), include('numeric-values'), (r'[*+/-]', Operator), (r',', Punctuation), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'[a-zA-Z_-]\w*', Name), (r'\)', Punctuation, '#pop'), ], 'urls': [ (r'(url)(\()(".*?")(\))', bygroups(Name.Builtin, Punctuation, String.Double, Punctuation)), (r"(url)(\()('.*?')(\))", bygroups(Name.Builtin, Punctuation, String.Single, Punctuation)), (r'(url)(\()(.*?)(\))', bygroups(Name.Builtin, Punctuation, String.Other, Punctuation)), ], 'numeric-values': [ (r'\#[a-zA-Z0-9]{1,6}', Number.Hex), (r'[+\-]?[0-9]*[.][0-9]+', Number.Float, 'numeric-end'), (r'[+\-]?[0-9]+', Number.Integer, 'numeric-end'), ], 'numeric-end': [ (words(_all_units, suffix=r'\b'), Keyword.Type), (r'%', Keyword.Type), default('#pop'), ], }
'string-single': [ (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Single), (r'#\{', String.Interpol, 'interpolation'), (r"'", String.Single, '#pop'), ], 'string-url': [ (r'(\\#|#(?=[^\n{])|[^\n#)])+', String.Other), (r'#\{', String.Interpol, 'interpolation'), (r'\)', String.Other, '#pop'), ], 'pseudo-class': [ (r'[\w-]+', Name.Decorator), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'class': [ (r'[\w-]+', Name.Class), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'id': [ (r'[\w-]+', Name.Namespace), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'for': [
class LimboLexer(RegexLexer): """ Lexer for `Limbo programming language <http://www.vitanuova.com/inferno/limbo.html>`_ TODO: - maybe implement better var declaration highlighting - some simple syntax error highlighting .. versionadded:: 2.0 """ name = 'Limbo' aliases = ['limbo'] filenames = ['*.b'] mimetypes = ['text/limbo'] tokens = { 'whitespace': [ (r'^(\s*)([a-zA-Z_]\w*:(\s*)\n)', bygroups(Text, Name.Label)), (r'\n', Text), (r'\s+', Text), (r'#(\n|(.|\n)*?[^\\]\n)', Comment.Single), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\', String), # stray backslash ], 'statements': [ (r'"', String, 'string'), (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])', Number.Float), (r'16r[0-9a-fA-F]+', Number.Hex), (r'8r[0-7]+', Number.Oct), (r'((([1-3]\d)|([2-9]))r)?(\d+)', Number.Integer), (r'[()\[\],.]', Punctuation), (r'[~!%^&*+=|?:<>/-]|(->)|(<-)|(=>)|(::)', Operator), (r'(alt|break|case|continue|cyclic|do|else|exit' r'for|hd|if|implement|import|include|len|load|or' r'pick|return|spawn|tagof|tl|to|while)\b', Keyword), (r'(byte|int|big|real|string|array|chan|list|adt' r'|fn|ref|of|module|self|type)\b', Keyword.Type), (r'(con|iota|nil)\b', Keyword.Constant), (r'[a-zA-Z_]\w*', Name), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'root': [ include('whitespace'), default('statement'), ], } def analyse_text(text): # Any limbo module implements something if re.search(r'^implement \w+;', text, re.MULTILINE): return 0.7
def do_indent(level): # Print paragraphs of indentation level >= {level} as String.Doc, # ignoring blank lines. Then return to 'root' state. return [(_rx_indent(level), String.Doc), (r'\s*\n', Text), default('#pop:2')]
class SquidConfLexer(RegexLexer): """ Lexer for `squid <http://www.squid-cache.org/>`_ configuration files. .. versionadded:: 0.9 """ name = 'SquidConf' aliases = ['squidconf', 'squid.conf', 'squid'] filenames = ['squid.conf'] mimetypes = ['text/x-squidconf'] flags = re.IGNORECASE keywords = ( "access_log", "acl", "always_direct", "announce_host", "announce_period", "announce_port", "announce_to", "anonymize_headers", "append_domain", "as_whois_server", "auth_param_basic", "authenticate_children", "authenticate_program", "authenticate_ttl", "broken_posts", "buffered_logs", "cache_access_log", "cache_announce", "cache_dir", "cache_dns_program", "cache_effective_group", "cache_effective_user", "cache_host", "cache_host_acl", "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high", "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer", "cache_peer_access", "cahce_replacement_policy", "cache_stoplist", "cache_stoplist_pattern", "cache_store_log", "cache_swap", "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db", "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir", "dead_peer_timeout", "debug_options", "delay_access", "delay_class", "delay_initial_bucket_level", "delay_parameters", "delay_pools", "deny_info", "dns_children", "dns_defnames", "dns_nameservers", "dns_testnames", "emulate_httpd_log", "err_html_text", "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port", "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width", "ftp_passive", "ftp_user", "half_closed_clients", "header_access", "header_replace", "hierarchy_stoplist", "high_response_time_warning", "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", "http_anonymizer", "httpd_accel", "httpd_accel_host", "httpd_accel_port", "httpd_accel_uses_host_header", "httpd_accel_with_proxy", "http_port", "http_reply_access", "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout", "ident_lookup", "ident_lookup_access", "ident_timeout", "incoming_http_average", "incoming_icp_average", "inside_firewall", "ipcache_high", "ipcache_low", "ipcache_size", "local_domain", "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries", "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries", "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", "memory_pools_limit", "memory_replacement_policy", "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops", "minimum_object_size", "minimum_retry_timeout", "miss_access", "negative_dns_ttl", "negative_ttl", "neighbor_timeout", "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period", "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy", "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl", "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort", "quick_abort_max", "quick_abort_min", "quick_abort_pct", "range_offset_limit", "read_timeout", "redirect_children", "redirect_program", "redirect_rewrites_host_header", "reference_age", "refresh_pattern", "reload_into_ims", "request_body_max_size", "request_size", "request_timeout", "shutdown_lifetime", "single_parent_bypass", "siteselect_timeout", "snmp_access", "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy", "store_avg_object_size", "store_objects_per_bucket", "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize", "test_reachability", "udp_hit_obj", "udp_hit_obj_size", "udp_incoming_address", "udp_outgoing_address", "unique_hostname", "unlinkd_program", "uri_whitespace", "useragent_log", "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port", ) opts = ( "proxy-only", "weight", "ttl", "no-query", "default", "round-robin", "multicast-responder", "on", "off", "all", "deny", "allow", "via", "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2", "credentialsttl", "none", "disable", "offline_toggle", "diskd", ) actions = ( "shutdown", "info", "parameter", "server_list", "client_list", r'squid.conf', ) actions_stats = ( "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns", "redirector", "io", "reply_headers", "filedescriptors", "netdb", ) actions_log = ("status", "enable", "disable", "clear") acls = ( "url_regex", "urlpath_regex", "referer_regex", "port", "proto", "req_mime_type", "rep_mime_type", "method", "browser", "user", "src", "dst", "time", "dstdomain", "ident", "snmp_community", ) ip_re = ( r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|' r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|' r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|' r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}' r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|' r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|' r'[1-9]?\d)){3}))') tokens = { 'root': [ (r'\s+', Whitespace), (r'#', Comment, 'comment'), (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), (words(opts, prefix=r'\b', suffix=r'\b'), Name.Constant), # Actions (words(actions, prefix=r'\b', suffix=r'\b'), String), (words(actions_stats, prefix=r'stats/', suffix=r'\b'), String), (words(actions_log, prefix=r'log/', suffix=r'='), String), (words(acls, prefix=r'\b', suffix=r'\b'), Keyword), (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float), (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number), (r'\S+', Text), ], 'comment': [ (r'\s*TAG:.*', String.Escape, '#pop'), (r'.+', Comment, '#pop'), default('#pop'), ], }