class DuelLexer(RegexLexer): """ Lexer for Duel Views Engine (formerly JBST) markup with JavaScript code blocks. See http://duelengine.org/. See http://jsonml.org/jbst/. .. versionadded:: 1.4 """ name = 'Duel' aliases = ['duel', 'jbst', 'jsonml+bst'] filenames = ['*.duel', '*.jbst'] mimetypes = ['text/x-duel', 'text/x-jbst'] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#!:]?)(.*?)(%>)', bygroups(Name.Tag, using(JavascriptLexer), Name.Tag)), (r'(<%\$)(.*?)(:)(.*?)(%>)', bygroups(Name.Tag, Name.Function, Punctuation, String, Name.Tag)), (r'(<%--)(.*?)(--%>)', bygroups(Name.Tag, Comment.Multiline, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(HtmlLexer), using(JavascriptLexer), using(HtmlLexer))), (r'(.+?)(?=<)', using(HtmlLexer)), (r'.+', using(HtmlLexer)), ], }
class BBCodeLexer(RegexLexer): """ A lexer that highlights BBCode(-like) syntax. .. versionadded:: 0.6 """ name = 'BBCode' aliases = ['bbcode'] mimetypes = ['text/x-bbcode'] tokens = { 'root': [ (r'[^[]+', Text), # tag/end tag begin (r'\[/?\w+', Keyword, 'tag'), # stray bracket (r'\[', Text), ], 'tag': [ (r'\s+', Text), # attribute with value (r'(\w+)(=)("?[^\s"\]]+"?)', bygroups(Name.Attribute, Operator, String)), # tag argument (a la [color=green]) (r'(=)("?[^\s"\]]+"?)', bygroups(Operator, String)), # tag end (r'\]', Keyword, '#pop'), ], }
class TypoScriptCssDataLexer(RegexLexer): """ Lexer that highlights markers, constants and registers within css blocks. .. versionadded:: 2.2 """ name = 'TypoScriptCssData' aliases = ['typoscriptcssdata'] tokens = { 'root': [ # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace (r'\s+', Text), # comments (r'/\*(?:(?!\*/).)*\*/', Comment), (r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)', Comment), # other (r'[<>,:=.*%+|]', String), (r'[\w"\-!/&;(){}]+', String), ] }
class TypoScriptHtmlDataLexer(RegexLexer): """ Lexer that highlights markers, constants and registers within html tags. .. versionadded:: 2.2 """ name = 'TypoScriptHtmlData' aliases = ['typoscripthtmldata'] tokens = { 'root': [ # INCLUDE_TYPOSCRIPT (r'(INCLUDE_TYPOSCRIPT)', Name.Class), # Language label or extension resource FILE:... or LLL:... or EXT:... (r'(EXT|FILE|LLL):[^}\n"]*', String), # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace (r'\s+', Text), # other (r'[<>,:=.*%+|]', String), (r'[\w"\-!/&;(){}#]+', String), ] }
class GettextLexer(RegexLexer): """ Lexer for Gettext catalog files. .. versionadded:: 0.9 """ name = 'Gettext Catalog' aliases = ['pot', 'po'] filenames = ['*.pot', '*.po'] mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] tokens = { 'root': [ (r'^#,\s.*?$', Keyword.Type), (r'^#:\s.*?$', Keyword.Declaration), # (r'^#$', Comment), (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), (r'^(")([A-Za-z-]+:)(.*")$', bygroups(String, Name.Property, String)), (r'^".*"$', String), (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$', bygroups(Name.Variable, Text, String)), (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), ] }
class ProtoBufLexer(RegexLexer): """ Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_ definition files. .. versionadded:: 1.4 """ name = 'Protocol Buffer' aliases = ['protobuf', 'proto'] filenames = ['*.proto'] tokens = { 'root': [ (r'[ \t]+', Text), (r'[,;{}\[\]()<>]', Punctuation), (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), (words(('import', 'option', 'optional', 'required', 'repeated', 'reserved', 'default', 'packed', 'ctype', 'extensions', 'to', 'max', 'rpc', 'returns', 'oneof'), prefix=r'\b', suffix=r'\b'), Keyword), (words(('int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'), Keyword.Type), (r'(true|false)\b', Keyword.Constant), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'), (r'(message|extend)(\s+)', bygroups(Keyword.Declaration, Text), 'message'), (r'(enum|group|service)(\s+)', bygroups(Keyword.Declaration, Text), 'type'), (r'\".*?\"', String), (r'\'.*?\'', String), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'(\-?(inf|nan))\b', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'[+-=]', Operator), (r'([a-zA-Z_][\w.]*)([ \t]*)(=)', bygroups(Name.Attribute, Text, Operator)), (r'[a-zA-Z_][\w.]*', Name), ], 'package': [ (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'), default('#pop'), ], 'message': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), default('#pop'), ], 'type': [ (r'[a-zA-Z_]\w*', Name, '#pop'), default('#pop'), ], }
class AlloyLexer(RegexLexer): """ For `Alloy <http://alloy.mit.edu>`_ source code. .. versionadded:: 2.0 """ name = 'Alloy' aliases = ['alloy'] filenames = ['*.als'] mimetypes = ['text/x-alloy'] flags = re.MULTILINE | re.DOTALL iden_rex = r'[a-zA-Z_][\w\']*' text_tuple = (r'[^\S\n]+', Text) tokens = { 'sig': [ (r'(extends)\b', Keyword, '#pop'), (iden_rex, Name), text_tuple, (r',', Punctuation), (r'\{', Operator, '#pop'), ], 'module': [ text_tuple, (iden_rex, Name, '#pop'), ], 'fun': [ text_tuple, (r'\{', Operator, '#pop'), (iden_rex, Name, '#pop'), ], 'root': [ (r'--.*?$', Comment.Single), (r'//.*?$', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), text_tuple, (r'(module|open)(\s+)', bygroups(Keyword.Namespace, Text), 'module'), (r'(sig|enum)(\s+)', bygroups(Keyword.Declaration, Text), 'sig'), (r'(iden|univ|none)\b', Keyword.Constant), (r'(int|Int)\b', Keyword.Type), (r'(this|abstract|extends|set|seq|one|lone|let)\b', Keyword), (r'(all|some|no|sum|disj|when|else)\b', Keyword), (r'(run|check|for|but|exactly|expect|as)\b', Keyword), (r'(and|or|implies|iff|in)\b', Operator.Word), (r'(fun|pred|fact|assert)(\s+)', bygroups(Keyword, Text), 'fun'), (r'!|#|&&|\+\+|<<|>>|>=|<=>|<=|\.|->', Operator), (r'[-+/*%=<>&!^|~{}\[\]().]', Operator), (iden_rex, Name), (r'[:,]', Punctuation), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String), (r'\n', Text), ] }
class CppLexer(CFamilyLexer): """ For C++ source code with preprocessor directives. """ name = 'C++' aliases = ['cpp', 'c++'] filenames = [ '*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP' ] mimetypes = ['text/x-c++hdr', 'text/x-c++src'] priority = 0.1 tokens = { 'statements': [ (words( ('catch', 'const_cast', 'delete', 'dynamic_cast', 'explicit', 'export', 'friend', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', 'reinterpret_cast', 'restrict', 'static_cast', 'template', 'this', 'throw', 'throws', 'try', 'typeid', 'typename', 'using', 'virtual', 'constexpr', 'nullptr', 'decltype', 'thread_local', 'alignas', 'alignof', 'static_assert', 'noexcept', 'override', 'final'), suffix=r'\b'), Keyword), (r'char(16_t|32_t)\b', Keyword.Type), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # C++11 raw strings (r'(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")', bygroups(String.Affix, String, String.Delimiter, String.Delimiter, String, String.Delimiter, String)), # C++11 UTF-8/16/32 strings (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'), inherit, ], 'root': [ inherit, # C++ Microsoft-isms (words(('virtual_inheritance', 'uuidof', 'super', 'single_inheritance', 'multiple_inheritance', 'interface', 'event'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), # Offload C++ extensions, http://offload.codeplay.com/ (r'__(offload|blockingoffload|outer)\b', Keyword.Pseudo), ], 'classname': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), ], } def analyse_text(text): if re.search('#include <[a-z_]+>', text): return 0.2 if re.search('using namespace ', text): return 0.4
class BaseMakefileLexer(RegexLexer): """ Lexer for simple Makefiles (no preprocessing). .. versionadded:: 0.10 """ name = 'Base Makefile' aliases = ['basemake'] filenames = [] mimetypes = [] tokens = { 'root': [ # recipes (need to allow spaces because of expandtabs) (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), # special variables (r'\$[<@$+%?|*]', Keyword), (r'\s+', Text), (r'#.*?\n', Comment), (r'(export)(\s+)(?=[\w${}\t -]+\n)', bygroups(Keyword, Text), 'export'), (r'export\s+', Keyword), # assignment (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), # expansions (r'\$\(', Keyword, 'expansion'), ], 'expansion': [ (r'[^\w$().-]+', Text), (r'[\w.-]+', Name.Variable), (r'\$', Keyword), (r'\(', Keyword, '#push'), (r'\)', Keyword, '#pop'), ], 'export': [ (r'[\w${}-]+', Name.Variable), (r'\n', Text, '#pop'), (r'\s+', Text), ], 'block-header': [ (r'[,|]', Punctuation), (r'#.*?\n', Comment, '#pop'), (r'\\\n', Text), # line continuation (r'\$\(', Keyword, 'expansion'), (r'[a-zA-Z_]+', Name), (r'\n', Text, '#pop'), (r'.', Text), ], }
class BooLexer(RegexLexer): """ For `Boo <http://boo.codehaus.org/>`_ source code. """ name = 'Boo' aliases = ['boo'] filenames = ['*.boo'] mimetypes = ['text/x-boo'] tokens = { 'root': [ (r'\s+', Text), (r'(#|//).*$', Comment.Single), (r'/[*]', Comment.Multiline, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'/(\\\\|\\/|[^/\s])/', String.Regex), (r'@/(\\\\|\\/|[^/])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' r'partial|private|protected|public|return|set|static|' r'struct|transient|virtual|yield|super|and|break|cast|' r'continue|elif|else|ensure|except|for|given|goto|if|in|' r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|' r'while|from|as)\b', Keyword), (r'def(?=\s+\(.*?\))', Keyword), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|' r'assert|checked|enumerate|filter|getter|len|lock|map|' r'matrix|max|min|normalArrayIndexing|print|property|range|' r'rawArrayIndexing|required|typeof|unchecked|using|' r'yieldAll|zip)\b', Name.Builtin), (r'"""(\\\\|\\"|.*?)"""', String.Double), (r'"(\\\\|\\"|[^"]*?)"', String.Double), (r"'(\\\\|\\'|[^']*?)'", String.Single), (r'[a-zA-Z_]\w*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'[0-9][0-9.]*(ms?|d|h|s)', Number), (r'0\d+', Number.Oct), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer), ], 'comment': [('/[*]', Comment.Multiline, '#push'), ('[*]/', Comment.Multiline, '#pop'), ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline)], 'funcname': [(r'[a-zA-Z_]\w*', Name.Function, '#pop')], 'classname': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')], 'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')] }
class DarcsPatchLexer(RegexLexer): """ DarcsPatchLexer is a lexer for the various versions of the darcs patch format. Examples of this format are derived by commands such as ``darcs annotate --patch`` and ``darcs send``. .. versionadded:: 0.10 """ name = 'Darcs Patch' aliases = ['dpatch'] filenames = ['*.dpatch', '*.darcspatch'] DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', 'replace') tokens = { 'root': [ (r'<', Operator), (r'>', Operator), (r'\{', Operator), (r'\}', Operator), (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', bygroups(Operator, Keyword, Name, Text, Name, Operator, Literal.Date, Text, Operator)), (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', bygroups(Operator, Keyword, Name, Text, Name, Operator, Literal.Date, Text), 'comment'), (r'New patches:', Generic.Heading), (r'Context:', Generic.Heading), (r'Patch bundle hash:', Generic.Heading), (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS), bygroups(Text, Keyword, Text)), (r'\+', Generic.Inserted, "insert"), (r'-', Generic.Deleted, "delete"), (r'.*\n', Text), ], 'comment': [ (r'[^\]].*\n', Comment), (r'\]', Operator, "#pop"), ], 'specialText': [ # darcs add [_CODE_] special operators for clarity (r'\n', Text, "#pop"), # line-based (r'\[_[^_]*_]', Operator), ], 'insert': [ include('specialText'), (r'\[', Generic.Inserted), (r'[^\n\[]+', Generic.Inserted), ], 'delete': [ include('specialText'), (r'\[', Generic.Deleted), (r'[^\n\[]+', Generic.Deleted), ], }
class PostgresLexer(PostgresBase, RegexLexer): """ Lexer for the PostgreSQL dialect of SQL. .. versionadded:: 1.5 """ name = 'PostgreSQL SQL dialect' aliases = ['postgresql', 'postgres'] mimetypes = ['text/x-postgresql'] flags = re.IGNORECASE tokens = { 'root': [ (r'\s+', Text), (r'--.*\n?', Comment.Single), (r'/\*', Comment.Multiline, 'multiline-comments'), (r'(' + '|'.join( s.replace(" ", r"\s+") for s in DATATYPES + PSEUDO_TYPES) + r')\b', Name.Builtin), (words(KEYWORDS, suffix=r'\b'), Keyword), (r'[+*/<>=~!@#%^&|`?-]+', Operator), (r'::', Operator), # cast (r'\$\d+', Name.Variable), (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), (r'[0-9]+', Number.Integer), (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), # quoted identifier (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), (r'[a-z_]\w*', Name), # psql variable in SQL (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), (r'[;:()\[\]{},.]', Punctuation), ], 'multiline-comments': [(r'/\*', Comment.Multiline, 'multiline-comments'), (r'\*/', Comment.Multiline, '#pop'), (r'[^/*]+', Comment.Multiline), (r'[/*]', Comment.Multiline)], 'string': [ (r"[^']+", String.Single), (r"''", String.Single), (r"'", String.Single, '#pop'), ], 'quoted-ident': [ (r'[^"]+', String.Name), (r'""', String.Name), (r'"', String.Name, '#pop'), ], }
class DebianControlLexer(RegexLexer): """ Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. .. versionadded:: 0.9 """ name = 'Debian Control file' aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { 'root': [ (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), ], 'maintainer': [ (r'<[^>]+>', Generic.Strong), (r'<[^>]+>$', Generic.Strong, '#pop'), (r',\n?', Text), (r'.', Text), ], 'description': [ (r'(.*)(Homepage)(: )(\S+)', bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), default('#pop'), ], 'depends': [ (r':\s*', Text), (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), (r'\(', Text, 'depend_vers'), (r',', Text), (r'\|', Operator), (r'[\s]+', Text), (r'[})]\s*$', Text, '#pop'), (r'\}', Text), (r'[^,]$', Name.Function, '#pop'), (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [(r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'), (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number))] }
def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', 'cb'), (r'\[', r'\]', 'sb'), (r'\(', r'\)', 'pa'), (r'<', r'>', 'ab'), (r'/', r'/', 'slas'), (r'\|', r'\|', 'pipe'), ('"', '"', 'quot'), ("'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, token, interpol=False) return states
class ScssLexer(RegexLexer): """ For SCSS stylesheets. """ name = 'SCSS' aliases = ['scss'] filenames = ['*.scss'] mimetypes = ['text/x-scss'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'@import', Keyword, 'value'), (r'@for', Keyword, 'for'), (r'@(debug|warn|if|while)', Keyword, 'value'), (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'), (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'), (r'@extend', Keyword, 'selector'), (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'), (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], 'attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*:', Operator, 'value'), default('#pop'), ], 'inline-comment': [ (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline), (r'#\{', String.Interpol, 'interpolation'), (r"\*/", Comment, '#pop'), ], } for group, common in iteritems(common_sass_tokens): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
class CharmciLexer(CppLexer): """ For `Charm++ <https://charm.cs.illinois.edu>`_ interface files (.ci). .. versionadded:: 2.4 """ name = 'Charmci' aliases = ['charmci'] filenames = ['*.ci'] mimetypes = [] tokens = { 'statements': [ (r'(module)(\s+)', bygroups(Keyword, Text), 'classname'), (words(('mainmodule', 'mainchare', 'chare', 'array', 'group', 'nodegroup', 'message', 'conditional')), Keyword), (words(('entry', 'aggregate', 'threaded', 'sync', 'exclusive', 'nokeep', 'notrace', 'immediate', 'expedited', 'inline', 'local', 'python', 'accel', 'readwrite', 'writeonly', 'accelblock', 'memcritical', 'packed', 'varsize', 'initproc', 'initnode', 'initcall', 'stacksize', 'createhere', 'createhome', 'reductiontarget', 'iget', 'nocopy', 'mutable', 'migratable', 'readonly')), Keyword), inherit, ], }
class VGLLexer(RegexLexer): """ For `SampleManager VGL <http://www.thermoscientific.com/samplemanager>`_ source code. .. versionadded:: 1.6 """ name = 'VGL' aliases = ['vgl'] filenames = ['*.rpf'] flags = re.MULTILINE | re.DOTALL | re.IGNORECASE tokens = { 'root': [(r'\{[^}]*\}', Comment.Multiline), (r'declare', Keyword.Constant), (r'(if|then|else|endif|while|do|endwhile|and|or|prompt|object' r'|create|on|line|with|global|routine|value|endroutine|constant' r'|global|set|join|library|compile_option|file|exists|create|copy' r'|delete|enable|windows|name|notprotected)(?! *[=<>.,()])', Keyword), (r'(true|false|null|empty|error|locked)', Keyword.Constant), (r'[~^*#!%&\[\]()<>|+=:;,./?-]', Operator), (r'"[^"]*"', String), (r'(\.)([a-z_$][\w$]*)', bygroups(Operator, Name.Attribute)), (r'[0-9][0-9]*(\.[0-9]+(e[+\-]?[0-9]+)?)?', Number), (r'[a-z_$][\w$]*', Name), (r'[\r\n]+', Text), (r'\s+', Text)] }
class NewspeakLexer(RegexLexer): """ For `Newspeak <http://newspeaklanguage.org/>` syntax. .. versionadded:: 1.1 """ name = 'Newspeak' filenames = ['*.ns2'] aliases = [ 'newspeak', ] mimetypes = ['text/x-newspeak'] tokens = { 'root': [(r'\b(Newsqueak2)\b', Keyword.Declaration), (r"'[^']*'", String), (r'\b(class)(\s+)(\w+)(\s*)', bygroups(Keyword.Declaration, Text, Name.Class, Text)), (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b', Keyword), (r'(\w+\:)(\s*)([a-zA-Z_]\w+)', bygroups(Name.Function, Text, Name.Variable)), (r'(\w+)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)), (r'<\w+>', Comment.Special), include('expressionstat'), include('whitespace')], 'expressionstat': [ (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'\d+', Number.Integer), (r':\w+', Name.Variable), (r'(\w+)(::)', bygroups(Name.Variable, Operator)), (r'\w+:', Name.Function), (r'\w+', Name.Variable), (r'\(|\)', Punctuation), (r'\[|\]', Punctuation), (r'\{|\}', Punctuation), (r'(\^|\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-|:)', Operator), (r'\.|;', Punctuation), include('whitespace'), include('literals'), ], 'literals': [(r'\$.', String), (r"'[^']*'", String), (r"#'[^']*'", String.Symbol), (r"#\w+:?", String.Symbol), (r"#(\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-)+", String.Symbol)], 'whitespace': [(r'\s+', Text), (r'"[^"]*"', Comment)], }
class SnobolLexer(RegexLexer): """ Lexer for the SNOBOL4 programming language. Recognizes the common ASCII equivalents of the original SNOBOL4 operators. Does not require spaces around binary operators. .. versionadded:: 1.5 """ name = "Snobol" aliases = ["snobol"] filenames = ['*.snobol'] mimetypes = ['text/x-snobol'] tokens = { # root state, start of line # comments, continuation lines, and directives start in column 1 # as do labels 'root': [ (r'\*.*\n', Comment), (r'[+.] ', Punctuation, 'statement'), (r'-.*\n', Comment), (r'END\s*\n', Name.Label, 'heredoc'), (r'[A-Za-z$][\w$]*', Name.Label, 'statement'), (r'\s+', Text, 'statement'), ], # statement state, line after continuation or label 'statement': [ (r'\s*\n', Text, '#pop'), (r'\s+', Text), (r'(?<=[^\w.])(LT|LE|EQ|NE|GE|GT|INTEGER|IDENT|DIFFER|LGT|SIZE|' r'REPLACE|TRIM|DUPL|REMDR|DATE|TIME|EVAL|APPLY|OPSYN|LOAD|UNLOAD|' r'LEN|SPAN|BREAK|ANY|NOTANY|TAB|RTAB|REM|POS|RPOS|FAIL|FENCE|' r'ABORT|ARB|ARBNO|BAL|SUCCEED|INPUT|OUTPUT|TERMINAL)(?=[^\w.])', Name.Builtin), (r'[A-Za-z][\w.]*', Name), # ASCII equivalents of original operators # | for the EBCDIC equivalent, ! likewise # \ for EBCDIC negation (r'\*\*|[?$.!%*/#+\-@|&\\=]', Operator), (r'"[^"]*"', String), (r"'[^']*'", String), # Accept SPITBOL syntax for real numbers # as well as Macro SNOBOL4 (r'[0-9]+(?=[^.EeDd])', Number.Integer), (r'[0-9]+(\.[0-9]*)?([EDed][-+]?[0-9]+)?', Number.Float), # Goto (r':', Punctuation, 'goto'), (r'[()<>,;]', Punctuation), ], # Goto block 'goto': [(r'\s*\n', Text, "#pop:2"), (r'\s+', Text), (r'F|S', Keyword), (r'(\()([A-Za-z][\w.]*)(\))', bygroups(Punctuation, Name.Label, Punctuation))], # everything after the END statement is basically one # big heredoc. 'heredoc': [(r'.*\n', String.Heredoc)] }
class RslLexer(RegexLexer): """ `RSL <http://en.wikipedia.org/wiki/RAISE>`_ is the formal specification language used in RAISE (Rigorous Approach to Industrial Software Engineering) method. .. versionadded:: 2.0 """ name = 'RSL' aliases = ['rsl'] filenames = ['*.rsl'] mimetypes = ['text/rsl'] flags = re.MULTILINE | re.DOTALL tokens = { 'root': [ (words( ('Bool', 'Char', 'Int', 'Nat', 'Real', 'Text', 'Unit', 'abs', 'all', 'always', 'any', 'as', 'axiom', 'card', 'case', 'channel', 'chaos', 'class', 'devt_relation', 'dom', 'elems', 'else', 'elif', 'end', 'exists', 'extend', 'false', 'for', 'hd', 'hide', 'if', 'in', 'is', 'inds', 'initialise', 'int', 'inter', 'isin', 'len', 'let', 'local', 'ltl_assertion', 'object', 'of', 'out', 'post', 'pre', 'read', 'real', 'rng', 'scheme', 'skip', 'stop', 'swap', 'then', 'theory', 'test_case', 'tl', 'transition_system', 'true', 'type', 'union', 'until', 'use', 'value', 'variable', 'while', 'with', 'write', '~isin', '-inflist', '-infset', '-list', '-set'), prefix=r'\b', suffix=r'\b'), Keyword), (r'(variable|value)\b', Keyword.Declaration), (r'--.*?\n', Comment), (r'<:.*?:>', Comment), (r'\{!.*?!\}', Comment), (r'/\*.*?\*/', Comment), (r'^[ \t]*([\w]+)[ \t]*:[^:]', Name.Function), (r'(^[ \t]*)([\w]+)([ \t]*\([\w\s,]*\)[ \t]*)(is|as)', bygroups(Text, Name.Function, Text, Keyword)), (r'\b[A-Z]\w*\b', Keyword.Type), (r'(true|false)\b', Keyword.Constant), (r'".*"', String), (r'\'.\'', String.Char), (r'(><|->|-m->|/\\|<=|<<=|<\.|\|\||\|\^\||-~->|-~m->|\\/|>=|>>|' r'\.>|\+\+|-\\|<->|=>|:-|~=|\*\*|<<|>>=|\+>|!!|\|=\||#)', Operator), (r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'.', Text), ], } def analyse_text(text): """ Check for the most common text in the beginning of a RSL file. """ if re.search(r'scheme\s*.*?=\s*class\s*type', text, re.I) is not None: return 1.0
def _objdump_lexer_tokens(asm_lexer): """ Common objdump lexer tokens to wrap an ASM lexer. """ hex_re = r'[0-9A-Za-z]' return { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('(' + hex_re + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('(' + hex_re + '+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(asm_lexer))), # Code line with ascii ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes (r'\t\.\.\.$', Text), # Relocation line # (With offset) (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex_re + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), (r'[^\n]+\n', Other) ] }
def _make_call_state(compound, _label=_label, _label_compound=_label_compound): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(:?)(%s)' % (_label_compound if compound else _label), bygroups(Punctuation, Name.Label), '#pop')) return state
class XorgLexer(RegexLexer): """Lexer for xorg.conf file.""" name = 'Xorg' aliases = ['xorg.conf'] filenames = ['xorg.conf'] mimetypes = [] tokens = { 'root': [ (r'\s+', Text), (r'#.*$', Comment), (r'((?:Sub)?Section)(\s+)("\w+")', bygroups(String.Escape, Text, String.Escape)), (r'(End(|Sub)Section)', String.Escape), (r'(\w+)(\s+)([^\n#]+)', bygroups(Name.Builtin, Text, Name.Constant)), ], }
def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _ws=_ws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' % (_nl, _ws, _nl, _ws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ]
def gen_elixir_string_rules(name, symbol, token): states = {} states['string_' + name] = [ (r'[^#%s\\]+' % (symbol,), token), include('escapes'), (r'\\.', token), (r'(%s)' % (symbol,), bygroups(token), "#pop"), include('interpol') ] return states
class ScdocLexer(RegexLexer): """ `scdoc` is a simple man page generator for POSIX systems written in C99. https://git.sr.ht/~sircmpwn/scdoc .. versionadded:: 2.5 """ name = 'scdoc' aliases = ['scdoc', 'scd'] filenames = ['*.scd', '*.scdoc'] flags = re.MULTILINE tokens = { 'root': [ # comment (r'^(;.+\n)', bygroups(Comment)), # heading with pound prefix (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)), # bulleted lists (r'^(\s*)([*-])(\s)(.+\n)', bygroups(Text, Keyword, Text, using(this, state='inline'))), # numbered lists (r'^(\s*)(\.+\.)( .+\n)', bygroups(Text, Keyword, using(this, state='inline'))), # quote (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), # text block (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), include('inline'), ], 'inline': [ # escape (r'\\.', Text), # underlines (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)), # bold (r'(\s)(\*[^\*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)), # inline code (r'`[^`]+`', String.Backtick), # general text, must come last! (r'[^\\\s]+', Text), (r'.', Text), ], }
class ResourceLexer(RegexLexer): """Lexer for `ICU Resource bundles <http://userguide.icu-project.org/locale/resources>`_. .. versionadded:: 2.0 """ name = 'ResourceBundle' aliases = ['resource', 'resourcebundle'] filenames = [] _types = (':table', ':array', ':string', ':bin', ':import', ':intvector', ':int', ':alias') flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'//.*?$', Comment), (r'"', String, 'string'), (r'-?\d+', Number.Integer), (r'[,{}]', Operator), (r'([^\s{:]+)(\s*)(%s?)' % '|'.join(_types), bygroups(Name, Text, Keyword)), (r'\s+', Text), (words(_types), Keyword), ], 'string': [(r'(\\x[0-9a-f]{2}|\\u[0-9a-f]{4}|\\U00[0-9a-f]{6}|' r'\\[0-7]{1,3}|\\c.|\\[abtnvfre\'"?\\]|\\\{|[^"{\\])+', String), (r'\{', String.Escape, 'msgname'), (r'"', String, '#pop')], 'msgname': [(r'([^{},]+)(\s*)', bygroups(Name, String.Escape), ('#pop', 'message'))], 'message': [(r'\{', String.Escape, 'msgname'), (r'\}', String.Escape, '#pop'), (r'(,)(\s*)([a-z]+)(\s*\})', bygroups(Operator, String.Escape, Keyword, String.Escape), '#pop'), (r'(,)(\s*)([a-z]+)(\s*)(,)(\s*)(offset)(\s*)(:)(\s*)(-?\d+)(\s*)', bygroups(Operator, String.Escape, Keyword, String.Escape, Operator, String.Escape, Operator.Word, String.Escape, Operator, String.Escape, Number.Integer, String.Escape), 'choice'), (r'(,)(\s*)([a-z]+)(\s*)(,)(\s*)', bygroups(Operator, String.Escape, Keyword, String.Escape, Operator, String.Escape), 'choice'), (r'\s+', String.Escape)], 'choice': [(r'(=|<|>|<=|>=|!=)(-?\d+)(\s*\{)', bygroups(Operator, Number.Integer, String.Escape), 'message'), (r'([a-z]+)(\s*\{)', bygroups(Keyword.Type, String.Escape), 'str'), (r'\}', String.Escape, ('#pop', '#pop')), (r'\s+', String.Escape)], 'str': [(r'\}', String.Escape, '#pop'), (r'\{', String.Escape, 'msgname'), (r'[^{}]+', String)] } def analyse_text(text): if text.startswith('root:table'): return 1.0
class HspecLexer(HaskellLexer): """ A Haskell lexer with support for Hspec constructs. .. versionadded:: 2.4.0 """ name = 'Hspec' aliases = ['hspec'] filenames = [] mimetypes = [] tokens = { 'root': [ (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)), (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)), (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)), inherit, ], }
class GenericAspxLexer(RegexLexer): """ Lexer for ASP.NET pages. """ name = 'aspx-gen' filenames = [] mimetypes = [] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer), Other, using(XmlLexer))), (r'(.+?)(?=<)', using(XmlLexer)), (r'.+', using(XmlLexer)), ], }
class GroffLexer(RegexLexer): """ Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources. .. versionadded:: 0.6 """ name = 'Groff' aliases = ['groff', 'nroff', 'man'] filenames = ['*.[1234567]', '*.man'] mimetypes = ['application/x-troff', 'text/troff'] tokens = { 'root': [ (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]+', Text, 'textline'), default('textline'), ], 'textline': [ include('escapes'), (r'[^\\\n]+', Text), (r'\n', Text, '#pop'), ], 'escapes': [ # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), ], 'request': [ (r'\n', Text, '#pop'), include('escapes'), (r'"[^\n"]+"', String.Double), (r'\d+', Number), (r'\S+', String), (r'\s+', Text), ], } def analyse_text(text): if text[:1] != '.': return False if text[:3] == '.\\"': return True if text[:4] == '.TH ': return True if text[1:3].isalnum() and text[3].isspace(): return 0.9