class ProtoBufLexer(RegexLexer): """ Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_ definition files. .. versionadded:: 1.4 """ name = 'Protocol Buffer' aliases = ['protobuf', 'proto'] filenames = ['*.proto'] tokens = { 'root': [ (r'[ \t]+', Text), (r'[,;{}\[\]()<>]', Punctuation), (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), (words(('import', 'option', 'optional', 'required', 'repeated', 'reserved', 'default', 'packed', 'ctype', 'extensions', 'to', 'max', 'rpc', 'returns', 'oneof'), prefix=r'\b', suffix=r'\b'), Keyword), (words(('int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'), Keyword.Type), (r'(true|false)\b', Keyword.Constant), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'), (r'(message|extend)(\s+)', bygroups(Keyword.Declaration, Text), 'message'), (r'(enum|group|service)(\s+)', bygroups(Keyword.Declaration, Text), 'type'), (r'\".*?\"', String), (r'\'.*?\'', String), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'(\-?(inf|nan))\b', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'[+-=]', Operator), (r'([a-zA-Z_][\w.]*)([ \t]*)(=)', bygroups(Name.Attribute, Text, Operator)), (r'[a-zA-Z_][\w.]*', Name), ], 'package': [ (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'), default('#pop'), ], 'message': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), default('#pop'), ], 'type': [ (r'[a-zA-Z_]\w*', Name, '#pop'), default('#pop'), ], }
class ScssLexer(RegexLexer): """ For SCSS stylesheets. """ name = 'SCSS' aliases = ['scss'] filenames = ['*.scss'] mimetypes = ['text/x-scss'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'@import', Keyword, 'value'), (r'@for', Keyword, 'for'), (r'@(debug|warn|if|while)', Keyword, 'value'), (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'), (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'), (r'@extend', Keyword, 'selector'), (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'), (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], 'attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*:', Operator, 'value'), default('#pop'), ], 'inline-comment': [ (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline), (r'#\{', String.Interpol, 'interpolation'), (r"\*/", Comment, '#pop'), ], } for group, common in iteritems(common_sass_tokens): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
class ZephirLexer(RegexLexer): """ For `Zephir language <http://zephir-lang.com/>`_ source code. Zephir is a compiled high level language aimed to the creation of C-extensions for PHP. .. versionadded:: 2.0 """ name = 'Zephir' aliases = ['zephir'] filenames = ['*.zep'] zephir_keywords = ['fetch', 'echo', 'isset', 'empty'] zephir_type = ['bit', 'bits', 'string'] flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|' r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|' r'empty)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|' r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|' r'float|unsigned|private|protected|public|short|static|self|throws|reverse|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' r'window)\b', Name.Builtin), (r'[$a-zA-Z_][\w\\]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class TexLexer(RegexLexer): """ Lexer for the TeX and LaTeX typesetting languages. """ name = 'TeX' aliases = ['tex', 'latex'] filenames = ['*.tex', '*.aux', '*.toc'] mimetypes = ['text/x-tex', 'text/x-latex'] tokens = { 'general': [ (r'%.*?\n', Comment), (r'[{}]', Name.Builtin), (r'[&_^]', Name.Builtin), ], 'root': [ (r'\\\[', String.Backtick, 'displaymath'), (r'\\\(', String, 'inlinemath'), (r'\$\$', String.Backtick, 'displaymath'), (r'\$', String, 'inlinemath'), (r'\\([a-zA-Z]+|.)', Keyword, 'command'), (r'\\$', Keyword), include('general'), (r'[^\\$%&_^{}]+', Text), ], 'math': [ (r'\\([a-zA-Z]+|.)', Name.Variable), include('general'), (r'[0-9]+', Number), (r'[-=!+*/()\[\]]', Operator), (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), ], 'inlinemath': [ (r'\\\)', String, '#pop'), (r'\$', String, '#pop'), include('math'), ], 'displaymath': [ (r'\\\]', String, '#pop'), (r'\$\$', String, '#pop'), (r'\$', Name.Builtin), include('math'), ], 'command': [ (r'\[.*?\]', Name.Attribute), (r'\*', Keyword), default('#pop'), ], } def analyse_text(text): for start in ("\\documentclass", "\\input", "\\documentstyle", "\\relax"): if text[:len(start)] == start: return True
class DebianControlLexer(RegexLexer): """ Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. .. versionadded:: 0.9 """ name = 'Debian Control file' aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { 'root': [ (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), ], 'maintainer': [ (r'<[^>]+>', Generic.Strong), (r'<[^>]+>$', Generic.Strong, '#pop'), (r',\n?', Text), (r'.', Text), ], 'description': [ (r'(.*)(Homepage)(: )(\S+)', bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), default('#pop'), ], 'depends': [ (r':\s*', Text), (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), (r'\(', Text, 'depend_vers'), (r',', Text), (r'\|', Operator), (r'[\s]+', Text), (r'[})]\s*$', Text, '#pop'), (r'\}', Text), (r'[^,]$', Name.Function, '#pop'), (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [(r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'), (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number))] }
class GroffLexer(RegexLexer): """ Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources. .. versionadded:: 0.6 """ name = 'Groff' aliases = ['groff', 'nroff', 'man'] filenames = ['*.[1234567]', '*.man'] mimetypes = ['application/x-troff', 'text/troff'] tokens = { 'root': [ (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]+', Text, 'textline'), default('textline'), ], 'textline': [ include('escapes'), (r'[^\\\n]+', Text), (r'\n', Text, '#pop'), ], 'escapes': [ # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), ], 'request': [ (r'\n', Text, '#pop'), include('escapes'), (r'"[^\n"]+"', String.Double), (r'\d+', Number), (r'\S+', String), (r'\s+', Text), ], } def analyse_text(text): if text[:1] != '.': return False if text[:3] == '.\\"': return True if text[:4] == '.TH ': return True if text[1:3].isalnum() and text[3].isspace(): return 0.9
class AwkLexer(RegexLexer): """ For Awk scripts. .. versionadded:: 1.5 """ name = 'Awk' aliases = ['awk', 'gawk', 'mawk', 'nawk'] filenames = ['*.awk'] mimetypes = ['application/x-awk'] tokens = { 'commentsandwhitespace': [ (r'\s+', Text), (r'#.*$', Comment.Single) ], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'\B', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') ], 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|\|\||&&|in\b|\$|!?~|' r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(break|continue|do|while|exit|for|if|else|' r'return)\b', Keyword, 'slashstartsregex'), (r'function\b', Keyword.Declaration, 'slashstartsregex'), (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' r'fflush|getline|next|nextfile|print|printf|strftime|systime|' r'delete|system)\b', Keyword.Reserved), (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' r'RSTART|RT|SUBSEP)\b', Name.Builtin), (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class CapnProtoLexer(RegexLexer): """ For `Cap'n Proto <https://capnproto.org>`_ source. .. versionadded:: 2.2 """ name = 'Cap\'n Proto' filenames = ['*.capnp'] aliases = ['capnp'] flags = re.MULTILINE | re.UNICODE tokens = { 'root': [ (r'#.*?$', Comment.Single), (r'@[0-9a-zA-Z]*', Name.Decorator), (r'=', Literal, 'expression'), (r':', Name.Class, 'type'), (r'\$', Name.Attribute, 'annotation'), (r'(struct|enum|interface|union|import|using|const|annotation|' r'extends|in|of|on|as|with|from|fixed)\b', Keyword), (r'[\w.]+', Name), (r'[^#@=:$\w]+', Text), ], 'type': [ (r'[^][=;,(){}$]+', Name.Class), (r'[\[(]', Name.Class, 'parentype'), default('#pop'), ], 'parentype': [ (r'[^][;()]+', Name.Class), (r'[\[(]', Name.Class, '#push'), (r'[])]', Name.Class, '#pop'), default('#pop'), ], 'expression': [ (r'[^][;,(){}$]+', Literal), (r'[\[(]', Literal, 'parenexp'), default('#pop'), ], 'parenexp': [ (r'[^][;()]+', Literal), (r'[\[(]', Literal, '#push'), (r'[])]', Literal, '#pop'), default('#pop'), ], 'annotation': [ (r'[^][;,(){}=:]+', Name.Attribute), (r'[\[(]', Name.Attribute, 'annexp'), default('#pop'), ], 'annexp': [ (r'[^][;()]+', Name.Attribute), (r'[\[(]', Name.Attribute, '#push'), (r'[])]', Name.Attribute, '#pop'), default('#pop'), ], }
def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', 'cb'), (r'\[', r'\]', 'sb'), (r'\(', r'\)', 'pa'), (r'<', r'>', 'ab'), (r'/', r'/', 'slas'), (r'\|', r'\|', 'pipe'), ('"', '"', 'quot'), ("'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, token, interpol=False) return states
class CirruLexer(RegexLexer): r""" Syntax rules of Cirru can be found at: http://cirru.org/ * using ``()`` for expressions, but restricted in a same line * using ``""`` for strings, with ``\`` for escaping chars * using ``$`` as folding operator * using ``,`` as unfolding operator * using indentations for nested blocks .. versionadded:: 2.0 """ name = 'Cirru' aliases = ['cirru'] filenames = ['*.cirru'] mimetypes = ['text/x-cirru'] flags = re.MULTILINE tokens = { 'string': [ (r'[^"\\\n]', String), (r'\\', String.Escape, 'escape'), (r'"', String, '#pop'), ], 'escape': [ (r'.', String.Escape, '#pop'), ], 'function': [ (r'\,', Operator, '#pop'), (r'[^\s"()]+', Name.Function, '#pop'), (r'\)', Operator, '#pop'), (r'(?=\n)', Text, '#pop'), (r'\(', Operator, '#push'), (r'"', String, ('#pop', 'string')), (r'[ ]+', Text.Whitespace), ], 'line': [(r'(?<!\w)\$(?!\w)', Operator, 'function'), (r'\(', Operator, 'function'), (r'\)', Operator), (r'\n', Text, '#pop'), (r'"', String, 'string'), (r'[ ]+', Text.Whitespace), (r'[+-]?[\d.]+\b', Number), (r'[^\s"()]+', Name.Variable)], 'root': [ (r'^\n+', Text.Whitespace), default(('line', 'function')), ] }
class BSTLexer(RegexLexer): """ A lexer for BibTeX bibliography styles. .. versionadded:: 2.2 """ name = 'BST' aliases = ['bst', 'bst-pybtex'] filenames = ['*.bst'] flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ include('whitespace'), (words(['read', 'sort']), Keyword), (words(['execute', 'integers', 'iterate', 'reverse', 'strings']), Keyword, ('group')), (words(['function', 'macro']), Keyword, ('group', 'group')), (words(['entry']), Keyword, ('group', 'group', 'group')), ], 'group': [ include('whitespace'), (r'\{', Punctuation, ('#pop', 'group-end', 'body')), ], 'group-end': [ include('whitespace'), (r'\}', Punctuation, '#pop'), ], 'body': [ include('whitespace'), (r"\'[^#\"\{\}\s]+", Name.Function), (r'[^#\"\{\}\s]+\$', Name.Builtin), (r'[^#\"\{\}\s]+', Name.Variable), (r'"[^\"]*"', String), (r'#-?\d+', Number), (r'\{', Punctuation, ('group-end', 'body')), default('#pop'), ], 'whitespace': [ (r'\s+', Text), ('%.*?$', Comment.SingleLine), ], }
class FortranFixedLexer(RegexLexer): """ Lexer for fixed format Fortran. .. versionadded:: 2.1 """ name = 'FortranFixed' aliases = ['fortranfixed'] filenames = ['*.f', '*.F'] flags = re.IGNORECASE def _lex_fortran(self, match, ctx=None): """Lex a line just as free form fortran without line break.""" lexer = FortranLexer() text = match.group(0) + "\n" for index, token, value in lexer.get_tokens_unprocessed(text): value = value.replace('\n', '') if value != '': yield index, token, value tokens = { 'root': [ (r'[C*].*\n', Comment), (r'#.*\n', Comment.Preproc), (r' {0,4}!.*\n', Comment), (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), ('.', Generic.Strong, 'code'), ], 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), default('root'), ] }
class FantomLexer(RegexLexer): """ For Fantom source code. .. versionadded:: 1.5 """ name = 'Fantom' aliases = ['fan'] filenames = ['*.fan'] mimetypes = ['application/x-fantom'] # often used regexes def s(str): return Template(str).substitute( dict( pod=r'[\"\w\.]+', eos=r'\n|;', id=r'[a-zA-Z_]\w*', # all chars which can be part of type definition. Starts with # either letter, or [ (maps), or | (funcs) type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?', )) tokens = { 'comments': [ (r'(?s)/\*.*?\*/', Comment.Multiline), # Multiline (r'//.*?\n', Comment.Single), # Single line # TODO: highlight references in fandocs (r'\*\*.*?\n', Comment.Special), # Fandoc (r'#.*\n', Comment.Single) # Shell-style ], 'literals': [ (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration with dot (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float), # Float/Decimal (r'\b-?0x[0-9a-fA-F_]+', Number.Hex), # Hex (r'\b-?[\d_]+', Number.Integer), # Int (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), # Char (r'"', Punctuation, 'insideStr'), # Opening quote (r'`', Punctuation, 'insideUri'), # Opening accent (r'\b(true|false|null)\b', Keyword.Constant), # Bool & null ( r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)', # DSL bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation, String, Punctuation)), ( r'(?:(\w+)(::))?(\w+)?(#)(\w+)?', # Type/slot literal bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation, Name.Function)), (r'\[,\]', Literal), # Empty list ( s(r'($type)(\[,\])'), # Typed empty list bygroups(using(this, state='inType'), Literal)), (r'\[:\]', Literal), # Empty Map (s(r'($type)(\[:\])'), bygroups(using(this, state='inType'), Literal)), ], 'insideStr': [ (r'\\\\', String.Escape), # Escaped backslash (r'\\"', String.Escape), # Escaped " (r'\\`', String.Escape), # Escaped ` (r'\$\w+', String.Interpol), # Subst var (r'\$\{.*?\}', String.Interpol), # Subst expr (r'"', Punctuation, '#pop'), # Closing quot (r'.', String) # String content ], 'insideUri': [ # TODO: remove copy/paste str/uri (r'\\\\', String.Escape), # Escaped backslash (r'\\"', String.Escape), # Escaped " (r'\\`', String.Escape), # Escaped ` (r'\$\w+', String.Interpol), # Subst var (r'\$\{.*?\}', String.Interpol), # Subst expr (r'`', Punctuation, '#pop'), # Closing tick (r'.', String.Backtick) # URI content ], 'protectionKeywords': [ (r'\b(public|protected|private|internal)\b', Keyword), ], 'typeKeywords': [ (r'\b(abstract|final|const|native|facet|enum)\b', Keyword), ], 'methodKeywords': [ (r'\b(abstract|native|once|override|static|virtual|final)\b', Keyword), ], 'fieldKeywords': [(r'\b(abstract|const|final|native|override|static|virtual|' r'readonly)\b', Keyword)], 'otherKeywords': [ (words(('try', 'catch', 'throw', 'finally', 'for', 'if', 'else', 'while', 'as', 'is', 'isnot', 'switch', 'case', 'default', 'continue', 'break', 'do', 'return', 'get', 'set'), prefix=r'\b', suffix=r'\b'), Keyword), (r'\b(it|this|super)\b', Name.Builtin.Pseudo), ], 'operators': [(r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator)], 'inType': [ (r'[\[\]|\->:?]', Punctuation), (s(r'$id'), Name.Class), default('#pop'), ], 'root': [ include('comments'), include('protectionKeywords'), include('typeKeywords'), include('methodKeywords'), include('fieldKeywords'), include('literals'), include('otherKeywords'), include('operators'), (r'using\b', Keyword.Namespace, 'using'), # Using stmt (r'@\w+', Name.Decorator, 'facet'), # Symbol (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class), 'inheritance'), # Inheritance list # Type var := val (s(r'($type)([ \t]+)($id)(\s*)(:=)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Operator)), # var := val (s(r'($id)(\s*)(:=)'), bygroups(Name.Variable, Text, Operator)), # .someId( or ->someId( ### (s(r'(\.|(?:\->))($id)(\s*)(\()'), bygroups(Operator, Name.Function, Text, Punctuation), 'insideParen'), # .someId or ->someId (s(r'(\.|(?:\->))($id)'), bygroups(Operator, Name.Function)), # new makeXXX ( (r'(new)(\s+)(make\w*)(\s*)(\()', bygroups(Keyword, Text, Name.Function, Text, Punctuation), 'insideMethodDeclArgs'), # Type name ( ( s(r'($type)([ \t]+)' # Return type and whitespace r'($id)(\s*)(\()'), # method name + open brace bygroups(using(this, state='inType'), Text, Name.Function, Text, Punctuation), 'insideMethodDeclArgs'), # ArgType argName, (s(r'($type)(\s+)($id)(\s*)(,)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), # ArgType argName) # Covered in 'insideParen' state # ArgType argName -> ArgType| (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation, Text, using(this, state='inType'), Punctuation)), # ArgType argName| (s(r'($type)(\s+)($id)(\s*)(\|)'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), # Type var (s(r'($type)([ \t]+)($id)'), bygroups(using(this, state='inType'), Text, Name.Variable)), (r'\(', Punctuation, 'insideParen'), (r'\{', Punctuation, 'insideBrace'), (r'.', Text) ], 'insideParen': [ (r'\)', Punctuation, '#pop'), include('root'), ], 'insideMethodDeclArgs': [ (r'\)', Punctuation, '#pop'), (s(r'($type)(\s+)($id)(\s*)(\))'), bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation), '#pop'), include('root'), ], 'insideBrace': [ (r'\}', Punctuation, '#pop'), include('root'), ], 'inheritance': [ (r'\s+', Text), # Whitespace (r':|,', Punctuation), (r'(?:(\w+)(::))?(\w+)', bygroups(Name.Namespace, Punctuation, Name.Class)), (r'\{', Punctuation, '#pop') ], 'using': [ (r'[ \t]+', Text), # consume whitespaces (r'(\[)(\w+)(\])', bygroups(Punctuation, Comment.Special, Punctuation)), # ffi (r'(\")?([\w.]+)(\")?', bygroups(Punctuation, Name.Namespace, Punctuation)), # podname (r'::', Punctuation, 'usingClass'), default('#pop') ], 'usingClass': [ (r'[ \t]+', Text), # consume whitespaces (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Name.Class), '#pop:2'), (r'[\w$]+', Name.Class), default('#pop:2') # jump out to root state ], 'facet': [(r'\s+', Text), (r'\{', Punctuation, 'facetFields'), default('#pop')], 'facetFields': [ include('comments'), include('literals'), include('operators'), (r'\s+', Text), (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)), (r'\}', Punctuation, '#pop'), (r'.', Text) ], }
class ValaLexer(RegexLexer): """ For Vala source code with preprocessor directives. .. versionadded:: 1.1 """ name = 'Vala' aliases = ['vala', 'vapi'] filenames = ['*.vala', '*.vapi'] mimetypes = ['text/x-vala'] tokens = { 'whitespace': [ (r'^\s*#if\s+0', Comment.Preproc, 'if0'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'[L@]?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(?s)""".*?"""', String), # verbatim strings (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])', bygroups(Punctuation, Name.Decorator, Punctuation)), # TODO: "correctly" parse complex code attributes (r'(\[)(CCode|(?:Integer|Floating)Type)', bygroups(Punctuation, Name.Decorator)), (r'[()\[\],.]', Punctuation), (words( ('as', 'base', 'break', 'case', 'catch', 'construct', 'continue', 'default', 'delete', 'do', 'else', 'enum', 'finally', 'for', 'foreach', 'get', 'if', 'in', 'is', 'lock', 'new', 'out', 'params', 'return', 'set', 'sizeof', 'switch', 'this', 'throw', 'try', 'typeof', 'while', 'yield'), suffix=r'\b'), Keyword), (words(('abstract', 'const', 'delegate', 'dynamic', 'ensures', 'extern', 'inline', 'internal', 'override', 'owned', 'private', 'protected', 'public', 'ref', 'requires', 'signal', 'static', 'throws', 'unowned', 'var', 'virtual', 'volatile', 'weak', 'yields'), suffix=r'\b'), Keyword.Declaration), (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace, Text), 'namespace'), (r'(class|errordomain|interface|struct)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), # void is an actual keyword, others are in glib-2.0.vapi (words( ('void', 'bool', 'char', 'double', 'float', 'int', 'int8', 'int16', 'int32', 'int64', 'long', 'short', 'size_t', 'ssize_t', 'string', 'time_t', 'uchar', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'), suffix=r'\b'), Keyword.Type), (r'(true|false|null)\b', Name.Builtin), (r'[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ], 'class': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')], 'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')], }
class FSharpLexer(RegexLexer): """ For the `F# language <https://fsharp.org/>`_ (version 3.0). .. versionadded:: 1.5 """ name = 'F#' aliases = ['fsharp', 'f#'] filenames = ['*.fs', '*.fsi'] mimetypes = ['text/x-fsharp'] keywords = [ 'abstract', 'as', 'assert', 'base', 'begin', 'class', 'default', 'delegate', 'do!', 'do', 'done', 'downcast', 'downto', 'elif', 'else', 'end', 'exception', 'extern', 'false', 'finally', 'for', 'function', 'fun', 'global', 'if', 'inherit', 'inline', 'interface', 'internal', 'in', 'lazy', 'let!', 'let', 'match', 'member', 'module', 'mutable', 'namespace', 'new', 'null', 'of', 'open', 'override', 'private', 'public', 'rec', 'return!', 'return', 'select', 'static', 'struct', 'then', 'to', 'true', 'try', 'type', 'upcast', 'use!', 'use', 'val', 'void', 'when', 'while', 'with', 'yield!', 'yield', ] # Reserved words; cannot hurt to color them as keywords too. keywords += [ 'atomic', 'break', 'checked', 'component', 'const', 'constraint', 'constructor', 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', 'method', 'mixin', 'object', 'parallel', 'process', 'protected', 'pure', 'sealed', 'tailcall', 'trait', 'virtual', 'volatile', ] keyopts = [ '!=', '#', '&&', '&', r'\(', r'\)', r'\*', r'\+', ',', r'-\.', '->', '-', r'\.\.', r'\.', '::', ':=', ':>', ':', ';;', ';', '<-', r'<\]', '<', r'>\]', '>', r'\?\?', r'\?', r'\[<', r'\[\|', r'\[', r'\]', '_', '`', r'\{', r'\|\]', r'\|', r'\}', '~', '<@@', '<@', '=', '@>', '@@>', ] operators = r'[!$%&*+\./:<=>?@^|~-]' word_operators = ['and', 'or', 'not'] prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' primitives = [ 'sbyte', 'byte', 'char', 'nativeint', 'unativeint', 'float32', 'single', 'float', 'double', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64', 'decimal', 'unit', 'bool', 'string', 'list', 'exn', 'obj', 'enum', ] # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or # http://fsharp.org/about/files/spec.pdf for reference. Good luck. tokens = { 'escape-sequence': [ (r'\\[\\"\'ntbrafv]', String.Escape), (r'\\[0-9]{3}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), (r'\\U[0-9a-fA-F]{8}', String.Escape), ], 'root': [ (r'\s+', Text), (r'\(\)|\[\]', Name.Builtin.Pseudo), (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][\w\']*)', Name), (r'///.*?\n', String.Doc), (r'//.*?\n', Comment.Single), (r'\(\*(?!\))', Comment, 'comment'), (r'@"', String, 'lstring'), (r'"""', String, 'tqs'), (r'"', String, 'string'), (r'\b(open|module)(\s+)([\w.]+)', bygroups(Keyword, Text, Name.Namespace)), (r'\b(let!?)(\s+)(\w+)', bygroups(Keyword, Text, Name.Variable)), (r'\b(type)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class)), (r'\b(member|override)(\s+)(\w+)(\.)(\w+)', bygroups(Keyword, Text, Name, Punctuation, Name.Function)), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name), (r'(%s)' % '|'.join(keyopts), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n', Comment.Preproc), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex), (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct), (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'@?"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name.Variable), ], 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name, '#pop'), (r'[a-z_][\w\']*', Name, '#pop'), # e.g. dictionary index access default('#pop'), ], 'comment': [ (r'[^(*)@"]+', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), # comments cannot be closed within strings in comments (r'@"', String, 'lstring'), (r'"""', String, 'tqs'), (r'"', String, 'string'), (r'[(*)@]', Comment), ], 'string': [ (r'[^\\"]+', String), include('escape-sequence'), (r'\\\n', String), (r'\n', String), # newlines are allowed in any string (r'"B?', String, '#pop'), ], 'lstring': [ (r'[^"]+', String), (r'\n', String), (r'""', String), (r'"B?', String, '#pop'), ], 'tqs': [ (r'[^"]+', String), (r'\n', String), (r'"""B?', String, '#pop'), (r'"', String), ], }
class VbNetLexer(RegexLexer): """ For `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_ source code. """ name = 'VB.net' aliases = ['vb.net', 'vbnet'] filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \ '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*' flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^\s*<.*?>', Name.Attribute), (r'\s+', Text), (r'\n', Text), (r'rem\b.*?\n', Comment), (r"'.*?\n", Comment), (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#Else|#End\s+If|#Const|' r'#ExternalSource.*?\n|#End\s+ExternalSource|' r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc), (r'[(){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), (words( ('AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case', 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl', 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng', 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare', 'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else', 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False', 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo', 'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let', 'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase', 'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing', 'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator', 'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides', 'ParamArray', 'Partial', 'Private', 'Protected', 'Public', 'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume', 'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single', 'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To', 'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While', 'Widening', 'With', 'WithEvents', 'WriteOnly'), prefix=r'(?<!\.)', suffix=r'\b'), Keyword), (r'(?<!\.)End\b', Keyword, 'end'), (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'), (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(?<!\.)(Module|Namespace|Imports)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|' r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|' r'UShort)\b', Keyword.Type), (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|' r'Or|OrElse|TypeOf|Xor)\b', Operator.Word), (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|' r'<=|>=|<>|[-&*/\\^+=<>\[\]]', Operator), ('"', String, 'string'), (r'_\n', Text), # Line continuation (must be before Name) (uni_name + '[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)(F[+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer), ], 'string': [ (r'""', String), (r'"C?', String, '#pop'), (r'[^"]+', String), ], 'dim': [ (uni_name, Name.Variable, '#pop'), default('#pop'), # any other syntax ], 'funcname': [ (uni_name, Name.Function, '#pop'), ], 'classname': [ (uni_name, Name.Class, '#pop'), ], 'namespace': [ (uni_name, Name.Namespace), (r'\.', Name.Namespace), default('#pop'), ], 'end': [ (r'\s+', Text), (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b', Keyword, '#pop'), default('#pop'), ] } def analyse_text(text): if re.search(r'^\s*(#If|Module|Namespace)', text, re.MULTILINE): return 0.5
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. .. versionadded:: 0.8 """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': r'@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in iteritems(levels): tokens[levelname] = { 'root': [ # method names ( r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|async|await|base|break|by|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|let|lock|new|null|on|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value|alias|ascending|' r'descending|from|group|into|orderby|select|thenby|where|' r'join|equals)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|' r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [ (cs_ident, Name.Class, '#pop'), default('#pop'), ], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'), ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class SassLexer(ExtendedRegexLexer): """ For Sass stylesheets. .. versionadded:: 1.3 """ name = 'Sass' aliases = ['sass'] filenames = ['*.sass'] mimetypes = ['text/x-sass'] flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'content': [ (r'//[^\n]*', _starts_block(Comment.Single, 'single-comment'), 'root'), (r'/\*[^\n]*', _starts_block(Comment.Multiline, 'multi-comment'), 'root'), (r'@import', Keyword, 'import'), (r'@for', Keyword, 'for'), (r'@(debug|warn|if|while)', Keyword, 'value'), (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'), (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'), (r'@extend', Keyword, 'selector'), (r'@[\w-]+', Keyword, 'selector'), (r'=[\w-]+', Name.Function, 'value'), (r'\+[\w-]+', Name.Decorator, 'value'), (r'([!$][\w-]\w*)([ \t]*(?:(?:\|\|)?=|:))', bygroups(Name.Variable, Operator), 'value'), (r':', Name.Attribute, 'old-style-attr'), (r'(?=.+?[=:]([^a-z]|$))', Name.Attribute, 'new-style-attr'), default('selector'), ], 'single-comment': [ (r'.+', Comment.Single), (r'\n', Text, 'root'), ], 'multi-comment': [ (r'.+', Comment.Multiline), (r'\n', Text, 'root'), ], 'import': [ (r'[ \t]+', Text), (r'\S+', String), (r'\n', Text, 'root'), ], 'old-style-attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*=', Operator, 'value'), default('value'), ], 'new-style-attr': [ (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*[=:]', Operator, 'value'), ], 'inline-comment': [ (r"(\\#|#(?=[^\n{])|\*(?=[^\n/])|[^\n#*])+", Comment.Multiline), (r'#\{', String.Interpol, 'interpolation'), (r"\*/", Comment, '#pop'), ], } for group, common in iteritems(common_sass_tokens): tokens[group] = copy.copy(common) tokens['value'].append((r'\n', Text, 'root')) tokens['selector'].append((r'\n', Text, 'root'))
class NotmuchLexer(RegexLexer): """ For `Notmuch <https://notmuchmail.org/>`_ email text format. .. versionadded:: 2.5 Additional options accepted: `body_lexer` If given, highlight the contents of the message body with the specified lexer, else guess it according to the body content (default: ``None``). """ name = 'Notmuch' aliases = ['notmuch'] def _highlight_code(self, match): code = match.group(1) try: if self.body_lexer: lexer = get_lexer_by_name(self.body_lexer) else: lexer = guess_lexer(code.strip()) except ClassNotFound: lexer = get_lexer_by_name('text') for item in lexer.get_tokens_unprocessed(code): yield item tokens = { 'root': [ (r'\fmessage{\s*', Keyword, ('message', 'message-attr')), ], 'message-attr': [ (r'(\s*id:\s*)([^\s]+)', bygroups(Name.Attribute, String)), (r'(\s*(?:depth|match|excluded):\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), (r'(\s*filename:\s*)(.+\n)', bygroups(Name.Attribute, String)), default('#pop'), ], 'message': [ (r'\fmessage}\n', Keyword, '#pop'), (r'\fheader{\n', Keyword, 'header'), (r'\fbody{\n', Keyword, 'body'), ], 'header': [ (r'\fheader}\n', Keyword, '#pop'), (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)', bygroups(Name.Attribute, String)), (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)', bygroups(Generic.Strong, Literal, Name.Tag)), ], 'body': [ (r'\fpart{\n', Keyword, 'part'), (r'\f(part|attachment){\s*', Keyword, ('part', 'part-attr')), (r'\fbody}\n', Keyword, '#pop'), ], 'part-attr': [ (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)', bygroups(Punctuation, Name.Attribute, String)), (r'(,\s*)(Content-type:\s*)(.+\n)', bygroups(Punctuation, Name.Attribute, String)), default('#pop'), ], 'part': [ (r'\f(?:part|attachment)}\n', Keyword, '#pop'), (r'\f(?:part|attachment){\s*', Keyword, ('#push', 'part-attr')), (r'^Non-text part: .*\n', Comment), (r'(?s)(.*?(?=\f(?:part|attachment)}\n))', _highlight_code), ], } def analyse_text(text): return 1.0 if text.startswith('\fmessage{') else 0.0 def __init__(self, **options): self.body_lexer = options.get('body_lexer', None) RegexLexer.__init__(self, **options)
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words(( '@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool', '@import'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation (r'([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [ (r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop') ], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods (r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from testflows._core.contrib.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
class ActionScript3Lexer(RegexLexer): """ For ActionScript 3 source code. .. versionadded:: 0.11 """ name = 'ActionScript 3' aliases = ['as3', 'actionscript3'] filenames = ['*.as'] mimetypes = ['application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3'] identifier = r'[$a-zA-Z_]\w*' typeidentifier = identifier + r'(?:\.<\w+>)?' flags = re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'\s+', Text), (r'(function\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword.Declaration, Name.Function, Text, Operator), 'funcparams'), (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + typeidentifier + r')', bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text, Keyword.Type)), (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)', bygroups(Keyword, Text, Name.Namespace, Text)), (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()', bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex), (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' r'switch|import|include|as|is)\b', Keyword), (r'(class|public|final|internal|native|override|private|protected|' r'static|import|extends|implements|interface|intrinsic|return|super|' r'dynamic|function|const|get|namespace|package|set)\b', Keyword.Declaration), (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b', Keyword.Constant), (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|' r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|' r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|' r'unescape)\b', Name.Function), (identifier, Name), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'[~^*!%&<>|+=:;,/?\\{}\[\]().-]+', Operator), ], 'funcparams': [ (r'\s+', Text), (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' + typeidentifier + r'|\*)(\s*)', bygroups(Text, Punctuation, Name, Text, Operator, Text, Keyword.Type, Text), 'defval'), (r'\)', Operator, 'type') ], 'type': [ (r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)', bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'), (r'\s+', Text, '#pop:2'), default('#pop:2') ], 'defval': [ (r'(=)(\s*)([^(),]+)(\s*)(,?)', bygroups(Operator, Text, using(this), Text, Operator), '#pop'), (r',', Operator, '#pop'), default('#pop') ] } def analyse_text(text): if re.match(r'\w+\s*:\s*\w', text): return 0.3 return 0
class BibTeXLexer(ExtendedRegexLexer): """ A lexer for BibTeX bibliography data format. .. versionadded:: 2.2 """ name = 'BibTeX' aliases = ['bib', 'bibtex'] filenames = ['*.bib'] mimetypes = ["text/x-bibtex"] flags = re.IGNORECASE ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~' IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS) def open_brace_callback(self, match, ctx): opening_brace = match.group() ctx.opening_brace = opening_brace yield match.start(), Punctuation, opening_brace ctx.pos = match.end() def close_brace_callback(self, match, ctx): closing_brace = match.group() if (ctx.opening_brace == '{' and closing_brace != '}' or ctx.opening_brace == '(' and closing_brace != ')'): yield match.start(), Error, closing_brace else: yield match.start(), Punctuation, closing_brace del ctx.opening_brace ctx.pos = match.end() tokens = { 'root': [ include('whitespace'), ('@comment', Comment), ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')), ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')), ('@' + IDENTIFIER, Name.Class, ('closing-brace', 'command-body', 'opening-brace')), ('.+', Comment), ], 'opening-brace': [ include('whitespace'), (r'[{(]', open_brace_callback, '#pop'), ], 'closing-brace': [ include('whitespace'), (r'[})]', close_brace_callback, '#pop'), ], 'command-body': [ include('whitespace'), (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')), ], 'fields': [ include('whitespace'), (',', Punctuation, 'field'), default('#pop'), ], 'field': [ include('whitespace'), (IDENTIFIER, Name.Attribute, ('value', '=')), default('#pop'), ], '=': [ include('whitespace'), ('=', Punctuation, '#pop'), ], 'value': [ include('whitespace'), (IDENTIFIER, Name.Variable), ('"', String, 'quoted-string'), (r'\{', String, 'braced-string'), (r'[\d]+', Number), ('#', Punctuation), default('#pop'), ], 'quoted-string': [ (r'\{', String, 'braced-string'), ('"', String, '#pop'), (r'[^\{\"]+', String), ], 'braced-string': [ (r'\{', String, '#push'), (r'\}', String, '#pop'), (r'[^\{\}]+', String), ], 'whitespace': [ (r'\s+', Text), ], }
class AdlLexer(AtomsLexer): """ Lexer for ADL syntax. .. versionadded:: 2.1 """ name = 'ADL' aliases = ['adl'] filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx'] tokens = { 'whitespace': [ # blank line ends (r'\s*\n', Text), # comment-only line (r'^[ \t]*--.*$', Comment), ], 'odin_section': [ # repeating the following two rules from the root state enable multi-line # strings that start in the first column to be dealt with (r'^(language|description|ontology|terminology|annotations|' r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading), (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)), (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)), # template overlay delimiter (r'^----------*\n', Text, '#pop'), (r'^.*\n', String), default('#pop'), ], 'cadl_section': [ (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)), default('#pop'), ], 'rules_section': [ (r'^[ \t]+.*\n', using(CadlLexer)), default('#pop'), ], 'metadata': [ (r'\)', Punctuation, '#pop'), (r';', Punctuation), (r'([Tt]rue|[Ff]alse)', Literal), # numbers and version ids (r'\d+(\.\d+)*', Literal), # Guids (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal), (r'\w+', Name.Class), (r'"', String, 'string'), (r'=', Operator), (r'[ \t]+', Text), default('#pop'), ], 'root': [ (r'^(archetype|template_overlay|operational_template|template|' r'speciali[sz]e)', Generic.Heading), (r'^(language|description|ontology|terminology|annotations|' r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading, 'odin_section'), (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'), include('archetype_id'), (r'[ \t]*\(', Punctuation, 'metadata'), include('whitespace'), ], }
class HaxeLexer(ExtendedRegexLexer): """ For Haxe source code (http://haxe.org/). .. versionadded:: 1.3 """ name = 'Haxe' aliases = ['hx', 'haxe', 'hxsl'] filenames = ['*.hx', '*.hxsl'] mimetypes = ['text/haxe', 'text/x-haxe', 'text/x-hx'] # keywords extracted from lexer.mll in the haxe compiler source keyword = (r'(?:function|class|static|var|if|else|while|do|for|' r'break|return|continue|extends|implements|import|' r'switch|case|default|public|private|try|untyped|' r'catch|new|this|throw|extern|enum|in|interface|' r'cast|override|dynamic|typedef|package|' r'inline|using|null|true|false|abstract)\b') # idtype in lexer.mll typeid = r'_*[A-Z]\w*' # combined ident and dollar and idtype ident = r'(?:_*[a-z]\w*|_+[0-9]\w*|' + typeid + r'|_+|\$\w+)' binop = (r'(?:%=|&=|\|=|\^=|\+=|\-=|\*=|/=|<<=|>\s*>\s*=|>\s*>\s*>\s*=|==|' r'!=|<=|>\s*=|&&|\|\||<<|>>>|>\s*>|\.\.\.|<|>|%|&|\||\^|\+|\*|' r'/|\-|=>|=)') # ident except keywords ident_no_keyword = r'(?!' + keyword + ')' + ident flags = re.DOTALL | re.MULTILINE preproc_stack = [] def preproc_callback(self, match, ctx): proc = match.group(2) if proc == 'if': # store the current stack self.preproc_stack.append(ctx.stack[:]) elif proc in ['else', 'elseif']: # restore the stack back to right before #if if self.preproc_stack: ctx.stack = self.preproc_stack[-1][:] elif proc == 'end': # remove the saved stack of previous #if if self.preproc_stack: self.preproc_stack.pop() # #if and #elseif should follow by an expr if proc in ['if', 'elseif']: ctx.stack.append('preproc-expr') # #error can be optionally follow by the error msg if proc in ['error']: ctx.stack.append('preproc-error') yield match.start(), Comment.Preproc, u'#' + proc ctx.pos = match.end() tokens = { 'root': [ include('spaces'), include('meta'), (r'(?:package)\b', Keyword.Namespace, ('semicolon', 'package')), (r'(?:import)\b', Keyword.Namespace, ('semicolon', 'import')), (r'(?:using)\b', Keyword.Namespace, ('semicolon', 'using')), (r'(?:extern|private)\b', Keyword.Declaration), (r'(?:abstract)\b', Keyword.Declaration, 'abstract'), (r'(?:class|interface)\b', Keyword.Declaration, 'class'), (r'(?:enum)\b', Keyword.Declaration, 'enum'), (r'(?:typedef)\b', Keyword.Declaration, 'typedef'), # top-level expression # although it is not supported in haxe, but it is common to write # expression in web pages the positive lookahead here is to prevent # an infinite loop at the EOF (r'(?=.)', Text, 'expr-statement'), ], # space/tab/comment/preproc 'spaces': [ (r'\s+', Text), (r'//[^\n\r]*', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (r'(#)(if|elseif|else|end|error)\b', preproc_callback), ], 'string-single-interpol': [ (r'\$\{', String.Interpol, ('string-interpol-close', 'expr')), (r'\$\$', String.Escape), (r'\$(?=' + ident + ')', String.Interpol, 'ident'), include('string-single'), ], 'string-single': [ (r"'", String.Single, '#pop'), (r'\\.', String.Escape), (r'.', String.Single), ], 'string-double': [ (r'"', String.Double, '#pop'), (r'\\.', String.Escape), (r'.', String.Double), ], 'string-interpol-close': [ (r'\$'+ident, String.Interpol), (r'\}', String.Interpol, '#pop'), ], 'package': [ include('spaces'), (ident, Name.Namespace), (r'\.', Punctuation, 'import-ident'), default('#pop'), ], 'import': [ include('spaces'), (ident, Name.Namespace), (r'\*', Keyword), # wildcard import (r'\.', Punctuation, 'import-ident'), (r'in', Keyword.Namespace, 'ident'), default('#pop'), ], 'import-ident': [ include('spaces'), (r'\*', Keyword, '#pop'), # wildcard import (ident, Name.Namespace, '#pop'), ], 'using': [ include('spaces'), (ident, Name.Namespace), (r'\.', Punctuation, 'import-ident'), default('#pop'), ], 'preproc-error': [ (r'\s+', Comment.Preproc), (r"'", String.Single, ('#pop', 'string-single')), (r'"', String.Double, ('#pop', 'string-double')), default('#pop'), ], 'preproc-expr': [ (r'\s+', Comment.Preproc), (r'\!', Comment.Preproc), (r'\(', Comment.Preproc, ('#pop', 'preproc-parenthesis')), (ident, Comment.Preproc, '#pop'), # Float (r'\.[0-9]+', Number.Float), (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float), (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float), (r'[0-9]+\.[0-9]+', Number.Float), (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float), # Int (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), # String (r"'", String.Single, ('#pop', 'string-single')), (r'"', String.Double, ('#pop', 'string-double')), ], 'preproc-parenthesis': [ (r'\s+', Comment.Preproc), (r'\)', Comment.Preproc, '#pop'), default('preproc-expr-in-parenthesis'), ], 'preproc-expr-chain': [ (r'\s+', Comment.Preproc), (binop, Comment.Preproc, ('#pop', 'preproc-expr-in-parenthesis')), default('#pop'), ], # same as 'preproc-expr' but able to chain 'preproc-expr-chain' 'preproc-expr-in-parenthesis': [ (r'\s+', Comment.Preproc), (r'\!', Comment.Preproc), (r'\(', Comment.Preproc, ('#pop', 'preproc-expr-chain', 'preproc-parenthesis')), (ident, Comment.Preproc, ('#pop', 'preproc-expr-chain')), # Float (r'\.[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')), (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')), (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')), (r'[0-9]+\.[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')), (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, ('#pop', 'preproc-expr-chain')), # Int (r'0x[0-9a-fA-F]+', Number.Hex, ('#pop', 'preproc-expr-chain')), (r'[0-9]+', Number.Integer, ('#pop', 'preproc-expr-chain')), # String (r"'", String.Single, ('#pop', 'preproc-expr-chain', 'string-single')), (r'"', String.Double, ('#pop', 'preproc-expr-chain', 'string-double')), ], 'abstract': [ include('spaces'), default(('#pop', 'abstract-body', 'abstract-relation', 'abstract-opaque', 'type-param-constraint', 'type-name')), ], 'abstract-body': [ include('spaces'), (r'\{', Punctuation, ('#pop', 'class-body')), ], 'abstract-opaque': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'type')), default('#pop'), ], 'abstract-relation': [ include('spaces'), (r'(?:to|from)', Keyword.Declaration, 'type'), (r',', Punctuation), default('#pop'), ], 'meta': [ include('spaces'), (r'@', Name.Decorator, ('meta-body', 'meta-ident', 'meta-colon')), ], # optional colon 'meta-colon': [ include('spaces'), (r':', Name.Decorator, '#pop'), default('#pop'), ], # same as 'ident' but set token as Name.Decorator instead of Name 'meta-ident': [ include('spaces'), (ident, Name.Decorator, '#pop'), ], 'meta-body': [ include('spaces'), (r'\(', Name.Decorator, ('#pop', 'meta-call')), default('#pop'), ], 'meta-call': [ include('spaces'), (r'\)', Name.Decorator, '#pop'), default(('#pop', 'meta-call-sep', 'expr')), ], 'meta-call-sep': [ include('spaces'), (r'\)', Name.Decorator, '#pop'), (r',', Punctuation, ('#pop', 'meta-call')), ], 'typedef': [ include('spaces'), default(('#pop', 'typedef-body', 'type-param-constraint', 'type-name')), ], 'typedef-body': [ include('spaces'), (r'=', Operator, ('#pop', 'optional-semicolon', 'type')), ], 'enum': [ include('spaces'), default(('#pop', 'enum-body', 'bracket-open', 'type-param-constraint', 'type-name')), ], 'enum-body': [ include('spaces'), include('meta'), (r'\}', Punctuation, '#pop'), (ident_no_keyword, Name, ('enum-member', 'type-param-constraint')), ], 'enum-member': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'semicolon', 'flag', 'function-param')), default(('#pop', 'semicolon', 'flag')), ], 'class': [ include('spaces'), default(('#pop', 'class-body', 'bracket-open', 'extends', 'type-param-constraint', 'type-name')), ], 'extends': [ include('spaces'), (r'(?:extends|implements)\b', Keyword.Declaration, 'type'), (r',', Punctuation), # the comma is made optional here, since haxe2 # requires the comma but haxe3 does not allow it default('#pop'), ], 'bracket-open': [ include('spaces'), (r'\{', Punctuation, '#pop'), ], 'bracket-close': [ include('spaces'), (r'\}', Punctuation, '#pop'), ], 'class-body': [ include('spaces'), include('meta'), (r'\}', Punctuation, '#pop'), (r'(?:static|public|private|override|dynamic|inline|macro)\b', Keyword.Declaration), default('class-member'), ], 'class-member': [ include('spaces'), (r'(var)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'var')), (r'(function)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'class-method')), ], # local function, anonymous or not 'function-local': [ include('spaces'), (ident_no_keyword, Name.Function, ('#pop', 'optional-expr', 'flag', 'function-param', 'parenthesis-open', 'type-param-constraint')), default(('#pop', 'optional-expr', 'flag', 'function-param', 'parenthesis-open', 'type-param-constraint')), ], 'optional-expr': [ include('spaces'), include('expr'), default('#pop'), ], 'class-method': [ include('spaces'), (ident, Name.Function, ('#pop', 'optional-expr', 'flag', 'function-param', 'parenthesis-open', 'type-param-constraint')), ], # function arguments 'function-param': [ include('spaces'), (r'\)', Punctuation, '#pop'), (r'\?', Punctuation), (ident_no_keyword, Name, ('#pop', 'function-param-sep', 'assign', 'flag')), ], 'function-param-sep': [ include('spaces'), (r'\)', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'function-param')), ], 'prop-get-set': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'prop-get-set-opt', 'comma', 'prop-get-set-opt')), default('#pop'), ], 'prop-get-set-opt': [ include('spaces'), (r'(?:default|null|never|dynamic|get|set)\b', Keyword, '#pop'), (ident_no_keyword, Text, '#pop'), # custom getter/setter ], 'expr-statement': [ include('spaces'), # makes semicolon optional here, just to avoid checking the last # one is bracket or not. default(('#pop', 'optional-semicolon', 'expr')), ], 'expr': [ include('spaces'), (r'@', Name.Decorator, ('#pop', 'optional-expr', 'meta-body', 'meta-ident', 'meta-colon')), (r'(?:\+\+|\-\-|~(?!/)|!|\-)', Operator), (r'\(', Punctuation, ('#pop', 'expr-chain', 'parenthesis')), (r'(?:static|public|private|override|dynamic|inline)\b', Keyword.Declaration), (r'(?:function)\b', Keyword.Declaration, ('#pop', 'expr-chain', 'function-local')), (r'\{', Punctuation, ('#pop', 'expr-chain', 'bracket')), (r'(?:true|false|null)\b', Keyword.Constant, ('#pop', 'expr-chain')), (r'(?:this)\b', Keyword, ('#pop', 'expr-chain')), (r'(?:cast)\b', Keyword, ('#pop', 'expr-chain', 'cast')), (r'(?:try)\b', Keyword, ('#pop', 'catch', 'expr')), (r'(?:var)\b', Keyword.Declaration, ('#pop', 'var')), (r'(?:new)\b', Keyword, ('#pop', 'expr-chain', 'new')), (r'(?:switch)\b', Keyword, ('#pop', 'switch')), (r'(?:if)\b', Keyword, ('#pop', 'if')), (r'(?:do)\b', Keyword, ('#pop', 'do')), (r'(?:while)\b', Keyword, ('#pop', 'while')), (r'(?:for)\b', Keyword, ('#pop', 'for')), (r'(?:untyped|throw)\b', Keyword), (r'(?:return)\b', Keyword, ('#pop', 'optional-expr')), (r'(?:macro)\b', Keyword, ('#pop', 'macro')), (r'(?:continue|break)\b', Keyword, '#pop'), (r'(?:\$\s*[a-z]\b|\$(?!'+ident+'))', Name, ('#pop', 'dollar')), (ident_no_keyword, Name, ('#pop', 'expr-chain')), # Float (r'\.[0-9]+', Number.Float, ('#pop', 'expr-chain')), (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'expr-chain')), (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'expr-chain')), (r'[0-9]+\.[0-9]+', Number.Float, ('#pop', 'expr-chain')), (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, ('#pop', 'expr-chain')), # Int (r'0x[0-9a-fA-F]+', Number.Hex, ('#pop', 'expr-chain')), (r'[0-9]+', Number.Integer, ('#pop', 'expr-chain')), # String (r"'", String.Single, ('#pop', 'expr-chain', 'string-single-interpol')), (r'"', String.Double, ('#pop', 'expr-chain', 'string-double')), # EReg (r'~/(\\\\|\\/|[^/\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')), # Array (r'\[', Punctuation, ('#pop', 'expr-chain', 'array-decl')), ], 'expr-chain': [ include('spaces'), (r'(?:\+\+|\-\-)', Operator), (binop, Operator, ('#pop', 'expr')), (r'(?:in)\b', Keyword, ('#pop', 'expr')), (r'\?', Operator, ('#pop', 'expr', 'ternary', 'expr')), (r'(\.)(' + ident_no_keyword + ')', bygroups(Punctuation, Name)), (r'\[', Punctuation, 'array-access'), (r'\(', Punctuation, 'call'), default('#pop'), ], # macro reification 'macro': [ include('spaces'), include('meta'), (r':', Punctuation, ('#pop', 'type')), (r'(?:extern|private)\b', Keyword.Declaration), (r'(?:abstract)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'abstract')), (r'(?:class|interface)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'macro-class')), (r'(?:enum)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'enum')), (r'(?:typedef)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'typedef')), default(('#pop', 'expr')), ], 'macro-class': [ (r'\{', Punctuation, ('#pop', 'class-body')), include('class') ], # cast can be written as "cast expr" or "cast(expr, type)" 'cast': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'cast-type', 'expr')), default(('#pop', 'expr')), ], # optionally give a type as the 2nd argument of cast() 'cast-type': [ include('spaces'), (r',', Punctuation, ('#pop', 'type')), default('#pop'), ], 'catch': [ include('spaces'), (r'(?:catch)\b', Keyword, ('expr', 'function-param', 'parenthesis-open')), default('#pop'), ], # do-while loop 'do': [ include('spaces'), default(('#pop', 'do-while', 'expr')), ], # the while after do 'do-while': [ include('spaces'), (r'(?:while)\b', Keyword, ('#pop', 'parenthesis', 'parenthesis-open')), ], 'while': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'expr', 'parenthesis')), ], 'for': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'expr', 'parenthesis')), ], 'if': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'else', 'optional-semicolon', 'expr', 'parenthesis')), ], 'else': [ include('spaces'), (r'(?:else)\b', Keyword, ('#pop', 'expr')), default('#pop'), ], 'switch': [ include('spaces'), default(('#pop', 'switch-body', 'bracket-open', 'expr')), ], 'switch-body': [ include('spaces'), (r'(?:case|default)\b', Keyword, ('case-block', 'case')), (r'\}', Punctuation, '#pop'), ], 'case': [ include('spaces'), (r':', Punctuation, '#pop'), default(('#pop', 'case-sep', 'case-guard', 'expr')), ], 'case-sep': [ include('spaces'), (r':', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'case')), ], 'case-guard': [ include('spaces'), (r'(?:if)\b', Keyword, ('#pop', 'parenthesis', 'parenthesis-open')), default('#pop'), ], # optional multiple expr under a case 'case-block': [ include('spaces'), (r'(?!(?:case|default)\b|\})', Keyword, 'expr-statement'), default('#pop'), ], 'new': [ include('spaces'), default(('#pop', 'call', 'parenthesis-open', 'type')), ], 'array-decl': [ include('spaces'), (r'\]', Punctuation, '#pop'), default(('#pop', 'array-decl-sep', 'expr')), ], 'array-decl-sep': [ include('spaces'), (r'\]', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'array-decl')), ], 'array-access': [ include('spaces'), default(('#pop', 'array-access-close', 'expr')), ], 'array-access-close': [ include('spaces'), (r'\]', Punctuation, '#pop'), ], 'comma': [ include('spaces'), (r',', Punctuation, '#pop'), ], 'colon': [ include('spaces'), (r':', Punctuation, '#pop'), ], 'semicolon': [ include('spaces'), (r';', Punctuation, '#pop'), ], 'optional-semicolon': [ include('spaces'), (r';', Punctuation, '#pop'), default('#pop'), ], # identity that CAN be a Haxe keyword 'ident': [ include('spaces'), (ident, Name, '#pop'), ], 'dollar': [ include('spaces'), (r'\{', Punctuation, ('#pop', 'expr-chain', 'bracket-close', 'expr')), default(('#pop', 'expr-chain')), ], 'type-name': [ include('spaces'), (typeid, Name, '#pop'), ], 'type-full-name': [ include('spaces'), (r'\.', Punctuation, 'ident'), default('#pop'), ], 'type': [ include('spaces'), (r'\?', Punctuation), (ident, Name, ('#pop', 'type-check', 'type-full-name')), (r'\{', Punctuation, ('#pop', 'type-check', 'type-struct')), (r'\(', Punctuation, ('#pop', 'type-check', 'type-parenthesis')), ], 'type-parenthesis': [ include('spaces'), default(('#pop', 'parenthesis-close', 'type')), ], 'type-check': [ include('spaces'), (r'->', Punctuation, ('#pop', 'type')), (r'<(?!=)', Punctuation, 'type-param'), default('#pop'), ], 'type-struct': [ include('spaces'), (r'\}', Punctuation, '#pop'), (r'\?', Punctuation), (r'>', Punctuation, ('comma', 'type')), (ident_no_keyword, Name, ('#pop', 'type-struct-sep', 'type', 'colon')), include('class-body'), ], 'type-struct-sep': [ include('spaces'), (r'\}', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'type-struct')), ], # type-param can be a normal type or a constant literal... 'type-param-type': [ # Float (r'\.[0-9]+', Number.Float, '#pop'), (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, '#pop'), (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, '#pop'), (r'[0-9]+\.[0-9]+', Number.Float, '#pop'), (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, '#pop'), # Int (r'0x[0-9a-fA-F]+', Number.Hex, '#pop'), (r'[0-9]+', Number.Integer, '#pop'), # String (r"'", String.Single, ('#pop', 'string-single')), (r'"', String.Double, ('#pop', 'string-double')), # EReg (r'~/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex, '#pop'), # Array (r'\[', Operator, ('#pop', 'array-decl')), include('type'), ], # type-param part of a type # ie. the <A,B> path in Map<A,B> 'type-param': [ include('spaces'), default(('#pop', 'type-param-sep', 'type-param-type')), ], 'type-param-sep': [ include('spaces'), (r'>', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'type-param')), ], # optional type-param that may include constraint # ie. <T:Constraint, T2:(ConstraintA,ConstraintB)> 'type-param-constraint': [ include('spaces'), (r'<(?!=)', Punctuation, ('#pop', 'type-param-constraint-sep', 'type-param-constraint-flag', 'type-name')), default('#pop'), ], 'type-param-constraint-sep': [ include('spaces'), (r'>', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'type-param-constraint-sep', 'type-param-constraint-flag', 'type-name')), ], # the optional constraint inside type-param 'type-param-constraint-flag': [ include('spaces'), (r':', Punctuation, ('#pop', 'type-param-constraint-flag-type')), default('#pop'), ], 'type-param-constraint-flag-type': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'type-param-constraint-flag-type-sep', 'type')), default(('#pop', 'type')), ], 'type-param-constraint-flag-type-sep': [ include('spaces'), (r'\)', Punctuation, '#pop'), (r',', Punctuation, 'type'), ], # a parenthesis expr that contain exactly one expr 'parenthesis': [ include('spaces'), default(('#pop', 'parenthesis-close', 'flag', 'expr')), ], 'parenthesis-open': [ include('spaces'), (r'\(', Punctuation, '#pop'), ], 'parenthesis-close': [ include('spaces'), (r'\)', Punctuation, '#pop'), ], 'var': [ include('spaces'), (ident_no_keyword, Text, ('#pop', 'var-sep', 'assign', 'flag', 'prop-get-set')), ], # optional more var decl. 'var-sep': [ include('spaces'), (r',', Punctuation, ('#pop', 'var')), default('#pop'), ], # optional assignment 'assign': [ include('spaces'), (r'=', Operator, ('#pop', 'expr')), default('#pop'), ], # optional type flag 'flag': [ include('spaces'), (r':', Punctuation, ('#pop', 'type')), default('#pop'), ], # colon as part of a ternary operator (?:) 'ternary': [ include('spaces'), (r':', Operator, '#pop'), ], # function call 'call': [ include('spaces'), (r'\)', Punctuation, '#pop'), default(('#pop', 'call-sep', 'expr')), ], # after a call param 'call-sep': [ include('spaces'), (r'\)', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'call')), ], # bracket can be block or object 'bracket': [ include('spaces'), (r'(?!(?:\$\s*[a-z]\b|\$(?!'+ident+')))' + ident_no_keyword, Name, ('#pop', 'bracket-check')), (r"'", String.Single, ('#pop', 'bracket-check', 'string-single')), (r'"', String.Double, ('#pop', 'bracket-check', 'string-double')), default(('#pop', 'block')), ], 'bracket-check': [ include('spaces'), (r':', Punctuation, ('#pop', 'object-sep', 'expr')), # is object default(('#pop', 'block', 'optional-semicolon', 'expr-chain')), # is block ], # code block 'block': [ include('spaces'), (r'\}', Punctuation, '#pop'), default('expr-statement'), ], # object in key-value pairs 'object': [ include('spaces'), (r'\}', Punctuation, '#pop'), default(('#pop', 'object-sep', 'expr', 'colon', 'ident-or-string')) ], # a key of an object 'ident-or-string': [ include('spaces'), (ident_no_keyword, Name, '#pop'), (r"'", String.Single, ('#pop', 'string-single')), (r'"', String.Double, ('#pop', 'string-double')), ], # after a key-value pair in object 'object-sep': [ include('spaces'), (r'\}', Punctuation, '#pop'), (r',', Punctuation, ('#pop', 'object')), ], } def analyse_text(text): if re.match(r'\w+\s*:\s*\w', text): return 0.3
class CoqLexer(RegexLexer): """ For the `Coq <http://coq.inria.fr/>`_ theorem prover. .. versionadded:: 1.5 """ name = 'Coq' aliases = ['coq'] filenames = ['*.v'] mimetypes = ['text/x-coq'] keywords1 = ( # Vernacular commands 'Section', 'Module', 'End', 'Require', 'Import', 'Export', 'Variable', 'Variables', 'Parameter', 'Parameters', 'Axiom', 'Hypothesis', 'Hypotheses', 'Notation', 'Local', 'Tactic', 'Reserved', 'Scope', 'Open', 'Close', 'Bind', 'Delimit', 'Definition', 'Let', 'Ltac', 'Fixpoint', 'CoFixpoint', 'Morphism', 'Relation', 'Implicit', 'Arguments', 'Set', 'Unset', 'Contextual', 'Strict', 'Prenex', 'Implicits', 'Inductive', 'CoInductive', 'Record', 'Structure', 'Canonical', 'Coercion', 'Theorem', 'Lemma', 'Corollary', 'Proposition', 'Fact', 'Remark', 'Example', 'Proof', 'Goal', 'Save', 'Qed', 'Defined', 'Hint', 'Resolve', 'Rewrite', 'View', 'Search', 'Show', 'Print', 'Printing', 'All', 'Graph', 'Projections', 'inside', 'outside', 'Check', 'Global', 'Instance', 'Class', 'Existing', 'Universe', 'Polymorphic', 'Monomorphic', 'Context') keywords2 = ( # Gallina 'forall', 'exists', 'exists2', 'fun', 'fix', 'cofix', 'struct', 'match', 'end', 'in', 'return', 'let', 'if', 'is', 'then', 'else', 'for', 'of', 'nosimpl', 'with', 'as', ) keywords3 = ( # Sorts 'Type', 'Prop', ) keywords4 = ( # Tactics 'pose', 'set', 'move', 'case', 'elim', 'apply', 'clear', 'hnf', 'intro', 'intros', 'generalize', 'rename', 'pattern', 'after', 'destruct', 'induction', 'using', 'refine', 'inversion', 'injection', 'rewrite', 'congr', 'unlock', 'compute', 'ring', 'field', 'replace', 'fold', 'unfold', 'change', 'cutrewrite', 'simpl', 'have', 'suff', 'wlog', 'suffices', 'without', 'loss', 'nat_norm', 'assert', 'cut', 'trivial', 'revert', 'bool_congr', 'nat_congr', 'symmetry', 'transitivity', 'auto', 'split', 'left', 'right', 'autorewrite', 'tauto', 'setoid_rewrite', 'intuition', 'eauto', 'eapply', 'econstructor', 'etransitivity', 'constructor', 'erewrite', 'red', 'cbv', 'lazy', 'vm_compute', 'native_compute', 'subst', ) keywords5 = ( # Terminators 'by', 'done', 'exact', 'reflexivity', 'tauto', 'romega', 'omega', 'assumption', 'solve', 'contradiction', 'discriminate', 'congruence', ) keywords6 = ( # Control 'do', 'last', 'first', 'try', 'idtac', 'repeat', ) # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', # 'downto', 'else', 'end', 'exception', 'external', 'false', # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', # 'type', 'val', 'virtual', 'when', 'while', 'with' keyopts = ( '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', '<-', '<->', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~', '=>', r'/\\', r'\\/', r'\{\|', r'\|\}', u'Π', u'λ', ) operators = r'[!$%&*+\./:<=>?@^|~-]' prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' tokens = { 'root': [ (r'\s+', Text), (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), (r'\(\*', Comment, 'comment'), (words(keywords1, prefix=r'\b', suffix=r'\b'), Keyword.Namespace), (words(keywords2, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords3, prefix=r'\b', suffix=r'\b'), Keyword.Type), (words(keywords4, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords5, prefix=r'\b', suffix=r'\b'), Keyword.Pseudo), (words(keywords6, prefix=r'\b', suffix=r'\b'), Keyword.Reserved), # (r'\b([A-Z][\w\']*)(\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][\w\']*)', Name), (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), (r'0[oO][0-7][0-7_]*', Number.Oct), (r'0[bB][01][01_]*', Number.Bin), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name), ], 'comment': [ (r'[^(*)]+', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), (r'[(*)]', Comment), ], 'string': [ (r'[^"]+', String.Double), (r'""', String.Double), (r'"', String.Double, '#pop'), ], 'dotted': [(r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), default('#pop')], } def analyse_text(text): if text.startswith('(*'): return True
class PostScriptLexer(RegexLexer): """ Lexer for PostScript files. The PostScript Language Reference published by Adobe at <http://partners.adobe.com/public/developer/en/ps/PLRM.pdf> is the authority for this. .. versionadded:: 1.4 """ name = 'PostScript' aliases = ['postscript', 'postscr'] filenames = ['*.ps', '*.eps'] mimetypes = ['application/postscript'] delimiter = r'()<>\[\]{}/%\s' delimiter_end = r'(?=[%s])' % delimiter valid_name_chars = r'[^%s]' % delimiter valid_name = r"%s+%s" % (valid_name_chars, delimiter_end) tokens = { 'root': [ # All comment types (r'^%!.+\n', Comment.Preproc), (r'%%.*\n', Comment.Special), (r'(^%.*\n){2,}', Comment.Multiline), (r'%.*\n', Comment.Single), # String literals are awkward; enter separate state. (r'\(', String, 'stringliteral'), (r'[{}<>\[\]]', Punctuation), # Numbers (r'<[0-9A-Fa-f]+>' + delimiter_end, Number.Hex), # Slight abuse: use Oct to signify any explicit base system (r'[0-9]+\#(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)' r'((e|E)[0-9]+)?' + delimiter_end, Number.Oct), (r'(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)((e|E)[0-9]+)?' + delimiter_end, Number.Float), (r'(\-|\+)?[0-9]+' + delimiter_end, Number.Integer), # References (r'\/%s' % valid_name, Name.Variable), # Names (valid_name, Name.Function), # Anything else is executed # These keywords taken from # <http://www.math.ubc.ca/~cass/graphics/manual/pdf/a1.pdf> # Is there an authoritative list anywhere that doesn't involve # trawling documentation? (r'(false|true)' + delimiter_end, Keyword.Constant), # Conditionals / flow control (r'(eq|ne|g[et]|l[et]|and|or|not|if(?:else)?|for(?:all)?)' + delimiter_end, Keyword.Reserved), (words( ('abs', 'add', 'aload', 'arc', 'arcn', 'array', 'atan', 'begin', 'bind', 'ceiling', 'charpath', 'clip', 'closepath', 'concat', 'concatmatrix', 'copy', 'cos', 'currentlinewidth', 'currentmatrix', 'currentpoint', 'curveto', 'cvi', 'cvs', 'def', 'defaultmatrix', 'dict', 'dictstackoverflow', 'div', 'dtransform', 'dup', 'end', 'exch', 'exec', 'exit', 'exp', 'fill', 'findfont', 'floor', 'get', 'getinterval', 'grestore', 'gsave', 'gt', 'identmatrix', 'idiv', 'idtransform', 'index', 'invertmatrix', 'itransform', 'length', 'lineto', 'ln', 'load', 'log', 'loop', 'matrix', 'mod', 'moveto', 'mul', 'neg', 'newpath', 'pathforall', 'pathbbox', 'pop', 'print', 'pstack', 'put', 'quit', 'rand', 'rangecheck', 'rcurveto', 'repeat', 'restore', 'rlineto', 'rmoveto', 'roll', 'rotate', 'round', 'run', 'save', 'scale', 'scalefont', 'setdash', 'setfont', 'setgray', 'setlinecap', 'setlinejoin', 'setlinewidth', 'setmatrix', 'setrgbcolor', 'shfill', 'show', 'showpage', 'sin', 'sqrt', 'stack', 'stringwidth', 'stroke', 'strokepath', 'sub', 'syntaxerror', 'transform', 'translate', 'truncate', 'typecheck', 'undefined', 'undefinedfilename', 'undefinedresult'), suffix=delimiter_end), Name.Builtin), (r'\s+', Text), ], 'stringliteral': [ (r'[^()\\]+', String), (r'\\', String.Escape, 'escape'), (r'\(', String, '#push'), (r'\)', String, '#pop'), ], 'escape': [ (r'[0-8]{3}|n|r|t|b|f|\\|\(|\)', String.Escape, '#pop'), default('#pop'), ], }
class CrystalLexer(ExtendedRegexLexer): """ For `Crystal <http://crystal-lang.org>`_ source code. .. versionadded:: 2.2 """ name = 'Crystal' aliases = ['cr', 'crystal'] filenames = ['*.cr'] mimetypes = ['text/x-crystal'] flags = re.DOTALL | re.MULTILINE def heredoc_callback(self, match, ctx): # okay, this is the hardest part of parsing Crystal... # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line start = match.start(1) yield start, Operator, match.group(1) # <<-? yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` yield match.start(3), String.Delimiter, match.group(3) # heredoc name yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) heredocstack.append((match.group(1) == '<<-', match.group(3))) ctx.pos = match.start(5) ctx.end = match.end(5) # this may find other heredocs for i, t, v in self.get_tokens_unprocessed(context=ctx): yield i, t, v ctx.pos = match.end() if outermost: # this is the outer heredoc again, now we can process them all for tolerant, hdname in heredocstack: lines = [] for match in line_re.finditer(ctx.text, ctx.pos): if tolerant: check = match.group().strip() else: check = match.group().rstrip() if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group( ) yield match.start(), String.Delimiter, match.group() ctx.pos = match.end() break else: lines.append(match) else: # end of heredoc not found -- error! for amatch in lines: yield amatch.start(), Error, amatch.group() ctx.end = len(ctx.text) del heredocstack[:] def gen_crystalstrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group(4) # end[imsx]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), # This allows arbitrary text after '\ for simplicity (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), (r':"', String.Symbol, 'simple-sym'), # Crystal doesn't have "symbol:"s but this simplifies function args (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-' + name] = [ include('string-escaped' if name == 'sym' else 'string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name + '-intp-string'] = [ (r'\\[' + lbrace + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%' + lbrace, String.Other, name + '-intp-string')) states[name + '-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html states['strings'].append( (r'%[wi]' + lbrace, String.Other, name + '-string')) states[name + '-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[imsx]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append( (r'%r' + lbrace, String.Regex, name + '-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)', intp_regex_callback ), # regular fancy strings with qsw (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states tokens = { 'root': [ (r'#.*?$', Comment.Single), # keywords (words(''' abstract asm as begin break case do else elsif end ensure extend ifdef if include instance_sizeof next of pointerof private protected rescue return require sizeof super then typeof unless until when while with yield '''.split(), suffix=r'\b'), Keyword), (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant), # start of function, class and module names (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)), (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)', bygroups(Keyword, Text, Name.Namespace), 'funcname'), (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)', bygroups(Keyword, Text, Name.Namespace), 'classname'), (r'(self|out|uninitialized)\b|(is_a|responds_to)\?', Keyword.Pseudo), # macros (words(''' debugger record pp assert_responds_to spawn parallel getter setter property delegate def_hash def_equals def_equals_and_hash forward_missing_to '''.split(), suffix=r'\b'), Name.Builtin.Pseudo), (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b', Name.Builtin.Pseudo), # builtins # http://crystal-lang.org/api/toplevel.html (words(''' Object Value Struct Reference Proc Class Nil Symbol Enum Void Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float Float32 Float64 Char String Pointer Slice Range Exception Regex Mutex StaticArray Array Hash Set Tuple Deque Box Process File Dir Time Channel Concurrent Scheduler abort at_exit caller delay exit fork future get_stack_top gets lazy loop main p print printf puts raise rand read_line sleep sprintf system with_color '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), # normal heredocs (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback), # empty string heredocs (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), (r'__END__', Comment.Preproc, 'end-part'), # multiline regex (after keywords or assignments) (r'(?:^|(?<=[=<>~!:])|' r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' r'(?<=\.index\s)|' r'(?<=\.scan\s)|' r'(?<=\.sub\s)|' r'(?<=\.sub!\s)|' r'(?<=\.gsub\s)|' r'(?<=\.gsub!\s)|' r'(?<=\.match\s)|' r'(?<=(?:\s|;)if\s)|' r'(?<=(?:\s|;)elsif\s)|' r'(?<=^when\s)|' r'(?<=^index\s)|' r'(?<=^scan\s)|' r'(?<=^sub\s)|' r'(?<=^gsub\s)|' r'(?<=^sub!\s)|' r'(?<=^gsub!\s)|' r'(?<=^match\s)|' r'(?<=^if\s)|' r'(?<=^elsif\s)' r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'), # multiline regex (in method calls or subscripts) (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'), # multiline regex (this time the funny no whitespace rule) (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex), 'multiline-regex'), # lex numbers and ignore following regular expressions which # are division operators in fact (grrrr. i hate that. any # better ideas?) # since pygments 0.7 we also eat a "?" operator after numbers # so that the char operator does not work. Chars are not allowed # there so that you can use the ternary operator. # stupid example: # x>=0?n[x]:"" (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', bygroups(Number.Oct, Text, Operator)), (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', bygroups(Number.Hex, Text, Operator)), (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', bygroups(Number.Bin, Text, Operator)), # 3 separate expressions for floats because any of the 3 optional # parts makes it a float (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?' r'(?:_?f[0-9]+)?)(\s*)([/?])?', bygroups(Number.Float, Text, Operator)), (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)' r'(?:_?f[0-9]+)?)(\s*)([/?])?', bygroups(Number.Float, Text, Operator)), (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?' r'(?:_?f[0-9]+))(\s*)([/?])?', bygroups(Number.Float, Text, Operator)), (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), # Names (r'@@[a-zA-Z_]\w*', Name.Variable.Class), (r'@[a-zA-Z_]\w*', Name.Variable.Instance), (r'\$\w+', Name.Variable.Global), (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global), (r'\$-[0adFiIlpvw]', Name.Variable.Global), (r'::', Operator), include('strings'), # chars ( r'\?(\\[MC]-)*' # modifiers r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' r'(?!\w)', String.Char), (r'[A-Z][A-Z_]+\b', Name.Constant), # macro expansion (r'\{%', String.Interpol, 'in-macro-control'), (r'\{\{', String.Interpol, 'in-macro-expr'), # attributes (r'(@\[)(\s*)([A-Z]\w*)', bygroups(Operator, Text, Name.Decorator), 'in-attr'), # this is needed because Crystal attributes can look # like keywords (class) or like this: ` ?!? (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'), bygroups(Operator, Name.Operator)), (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])', bygroups(Operator, Name)), # Names can end with [!?] unless it's "!=" (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name), (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|' r'!~|&&?|\|\||\.{1,3})', Operator), (r'[-+/*%=<>&!^|~]=?', Operator), (r'[(){};,/?:\\]', Punctuation), (r'\s+', Text) ], 'funcname': [(r'(?:([a-zA-Z_]\w*)(\.))?' r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), default('#pop')], 'classname': [(r'[A-Z_]\w*', Name.Class), (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))', bygroups(Punctuation, Text, Name.Class, Text, Punctuation)), default('#pop')], 'in-intp': [ (r'\{', String.Interpol, '#push'), (r'\}', String.Interpol, '#pop'), include('root'), ], 'string-intp': [ (r'#\{', String.Interpol, 'in-intp'), ], 'string-escaped': [(r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)], 'string-intp-escaped': [ include('string-intp'), include('string-escaped'), ], 'interpolated-regex': [ include('string-intp'), (r'[\\#]', String.Regex), (r'[^\\#]+', String.Regex), ], 'interpolated-string': [ include('string-intp'), (r'[\\#]', String.Other), (r'[^\\#]+', String.Other), ], 'multiline-regex': [ include('string-intp'), (r'\\\\', String.Regex), (r'\\/', String.Regex), (r'[\\#]', String.Regex), (r'[^\\/#]+', String.Regex), (r'/[imsx]*', String.Regex, '#pop'), ], 'end-part': [(r'.+', Comment.Preproc, '#pop')], 'in-macro-control': [ (r'\{%', String.Interpol, '#push'), (r'%\}', String.Interpol, '#pop'), (r'for\b|in\b', Keyword), include('root'), ], 'in-macro-expr': [ (r'\{\{', String.Interpol, '#push'), (r'\}\}', String.Interpol, '#pop'), include('root'), ], 'in-attr': [ (r'\[', Operator, '#push'), (r'\]', Operator, '#pop'), include('root'), ], } tokens.update(gen_crystalstrings_rules())
class AsymptoteLexer(RegexLexer): """ For `Asymptote <http://asymptote.sf.net/>`_ source code. .. versionadded:: 1.2 """ name = 'Asymptote' aliases = ['asy', 'asymptote'] filenames = ['*.asy'] mimetypes = ['text/x-asymptote'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment), (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment), ], 'statements': [ # simple string (TeX friendly) (r'"(\\\\|\\"|[^"])*"', String), # C style string (with character escapes) (r"'", String, 'string'), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)), (r'(and|controls|tension|atleast|curl|if|else|while|for|do|' r'return|break|continue|struct|typedef|new|access|import|' r'unravel|from|include|quote|static|public|private|restricted|' r'this|explicit|true|false|null|cycle|newframe|operator)\b', Keyword), # Since an asy-type-name can be also an asy-function-name, # in the following we test if the string " [a-zA-Z]" follows # the Keyword.Type. # Of course it is not perfect ! (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|' r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|' r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|' r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|' r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|' r'path3|pen|picture|point|position|projection|real|revolution|' r'scaleT|scientific|segment|side|slice|splitface|string|surface|' r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|' r'transformation|tree|triangle|trilinear|triple|vector|' r'vertex|void)(?=\s+[a-zA-Z])', Keyword.Type), # Now the asy-type-name which are not asy-function-name # except yours ! # Perhaps useless (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|' r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|' r'picture|position|real|revolution|slice|splitface|ticksgridT|' r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type), (r'[a-zA-Z_]\w*:(?!:)', Name.Label), (r'[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|\*))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|\*))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r"'", String, '#pop'), (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'\n', String), (r"[^\\'\n]+", String), # all other characters (r'\\\n', String), (r'\\n', String), # line continuation (r'\\', String), # stray backslash ], } def get_tokens_unprocessed(self, text): from testflows._core.contrib.pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name and value in ASYFUNCNAME: token = Name.Function elif token is Name and value in ASYVARNAME: token = Name.Variable yield index, token, value
class Perl6Lexer(ExtendedRegexLexer): """ For `Perl 6 <http://www.perl6.org>`_ source code. .. versionadded:: 2.0 """ name = 'Perl6' aliases = ['perl6', 'pl6'] filenames = [ '*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t' ] mimetypes = ['text/x-perl6', 'application/x-perl6'] flags = re.MULTILINE | re.DOTALL | re.UNICODE PERL6_IDENTIFIER_RANGE = r"['\w:-]" PERL6_KEYWORDS = ( 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', 'KEEP', 'LAST', 'LEAVE', 'NEXT', 'POST', 'PRE', 'START', 'TEMP', 'UNDO', 'as', 'assoc', 'async', 'augment', 'binary', 'break', 'but', 'cached', 'category', 'class', 'constant', 'contend', 'continue', 'copy', 'deep', 'default', 'defequiv', 'defer', 'die', 'do', 'else', 'elsif', 'enum', 'equiv', 'exit', 'export', 'fail', 'fatal', 'for', 'gather', 'given', 'goto', 'grammar', 'handles', 'has', 'if', 'inline', 'irs', 'is', 'last', 'leave', 'let', 'lift', 'loop', 'looser', 'macro', 'make', 'maybe', 'method', 'module', 'multi', 'my', 'next', 'of', 'ofs', 'only', 'oo', 'ors', 'our', 'package', 'parsed', 'prec', 'proto', 'readonly', 'redo', 'ref', 'regex', 'reparsed', 'repeat', 'require', 'required', 'return', 'returns', 'role', 'rule', 'rw', 'self', 'slang', 'state', 'sub', 'submethod', 'subset', 'supersede', 'take', 'temp', 'tighter', 'token', 'trusts', 'try', 'unary', 'unless', 'until', 'use', 'warn', 'when', 'where', 'while', 'will', ) PERL6_BUILTINS = ( 'ACCEPTS', 'HOW', 'REJECTS', 'VAR', 'WHAT', 'WHENCE', 'WHERE', 'WHICH', 'WHO', 'abs', 'acos', 'acosec', 'acosech', 'acosh', 'acotan', 'acotanh', 'all', 'any', 'approx', 'arity', 'asec', 'asech', 'asin', 'asinh', 'assuming', 'atan', 'atan2', 'atanh', 'attr', 'bless', 'body', 'by', 'bytes', 'caller', 'callsame', 'callwith', 'can', 'capitalize', 'cat', 'ceiling', 'chars', 'chmod', 'chomp', 'chop', 'chr', 'chroot', 'circumfix', 'cis', 'classify', 'clone', 'close', 'cmp_ok', 'codes', 'comb', 'connect', 'contains', 'context', 'cos', 'cosec', 'cosech', 'cosh', 'cotan', 'cotanh', 'count', 'defined', 'delete', 'diag', 'dies_ok', 'does', 'e', 'each', 'eager', 'elems', 'end', 'eof', 'eval', 'eval_dies_ok', 'eval_elsewhere', 'eval_lives_ok', 'evalfile', 'exists', 'exp', 'first', 'flip', 'floor', 'flunk', 'flush', 'fmt', 'force_todo', 'fork', 'from', 'getc', 'gethost', 'getlogin', 'getpeername', 'getpw', 'gmtime', 'graphs', 'grep', 'hints', 'hyper', 'im', 'index', 'infix', 'invert', 'is_approx', 'is_deeply', 'isa', 'isa_ok', 'isnt', 'iterator', 'join', 'key', 'keys', 'kill', 'kv', 'lastcall', 'lazy', 'lc', 'lcfirst', 'like', 'lines', 'link', 'lives_ok', 'localtime', 'log', 'log10', 'map', 'max', 'min', 'minmax', 'name', 'new', 'nextsame', 'nextwith', 'nfc', 'nfd', 'nfkc', 'nfkd', 'nok_error', 'nonce', 'none', 'normalize', 'not', 'nothing', 'ok', 'once', 'one', 'open', 'opendir', 'operator', 'ord', 'p5chomp', 'p5chop', 'pack', 'pair', 'pairs', 'pass', 'perl', 'pi', 'pick', 'plan', 'plan_ok', 'polar', 'pop', 'pos', 'postcircumfix', 'postfix', 'pred', 'prefix', 'print', 'printf', 'push', 'quasi', 'quotemeta', 'rand', 're', 'read', 'readdir', 'readline', 'reduce', 'reverse', 'rewind', 'rewinddir', 'rindex', 'roots', 'round', 'roundrobin', 'run', 'runinstead', 'sameaccent', 'samecase', 'say', 'sec', 'sech', 'sech', 'seek', 'shape', 'shift', 'sign', 'signature', 'sin', 'sinh', 'skip', 'skip_rest', 'sleep', 'slurp', 'sort', 'splice', 'split', 'sprintf', 'sqrt', 'srand', 'strand', 'subst', 'substr', 'succ', 'sum', 'symlink', 'tan', 'tanh', 'throws_ok', 'time', 'times', 'to', 'todo', 'trim', 'trim_end', 'trim_start', 'true', 'truncate', 'uc', 'ucfirst', 'undef', 'undefine', 'uniq', 'unlike', 'unlink', 'unpack', 'unpolar', 'unshift', 'unwrap', 'use_ok', 'value', 'values', 'vec', 'version_lt', 'void', 'wait', 'want', 'wrap', 'write', 'zip', ) PERL6_BUILTIN_CLASSES = ( 'Abstraction', 'Any', 'AnyChar', 'Array', 'Associative', 'Bag', 'Bit', 'Blob', 'Block', 'Bool', 'Buf', 'Byte', 'Callable', 'Capture', 'Char', 'Class', 'Code', 'Codepoint', 'Comparator', 'Complex', 'Decreasing', 'Exception', 'Failure', 'False', 'Grammar', 'Grapheme', 'Hash', 'IO', 'Increasing', 'Int', 'Junction', 'KeyBag', 'KeyExtractor', 'KeyHash', 'KeySet', 'KitchenSink', 'List', 'Macro', 'Mapping', 'Match', 'Matcher', 'Method', 'Module', 'Num', 'Object', 'Ordered', 'Ordering', 'OrderingPair', 'Package', 'Pair', 'Positional', 'Proxy', 'Range', 'Rat', 'Regex', 'Role', 'Routine', 'Scalar', 'Seq', 'Set', 'Signature', 'Str', 'StrLen', 'StrPos', 'Sub', 'Submethod', 'True', 'UInt', 'Undef', 'Version', 'Void', 'Whatever', 'bit', 'bool', 'buf', 'buf1', 'buf16', 'buf2', 'buf32', 'buf4', 'buf64', 'buf8', 'complex', 'int', 'int1', 'int16', 'int2', 'int32', 'int4', 'int64', 'int8', 'num', 'rat', 'rat1', 'rat16', 'rat2', 'rat32', 'rat4', 'rat64', 'rat8', 'uint', 'uint1', 'uint16', 'uint2', 'uint32', 'uint4', 'uint64', 'uint8', 'utf16', 'utf32', 'utf8', ) PERL6_OPERATORS = ( 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', 'not', '<==', '==>', '<<==', '==>>', ) # Perl 6 has a *lot* of possible bracketing characters # this list was lifted from STD.pm6 (https://github.com/perl6/std) PERL6_BRACKETS = { u'\u0028': u'\u0029', u'\u003c': u'\u003e', u'\u005b': u'\u005d', u'\u007b': u'\u007d', u'\u00ab': u'\u00bb', u'\u0f3a': u'\u0f3b', u'\u0f3c': u'\u0f3d', u'\u169b': u'\u169c', u'\u2018': u'\u2019', u'\u201a': u'\u2019', u'\u201b': u'\u2019', u'\u201c': u'\u201d', u'\u201e': u'\u201d', u'\u201f': u'\u201d', u'\u2039': u'\u203a', u'\u2045': u'\u2046', u'\u207d': u'\u207e', u'\u208d': u'\u208e', u'\u2208': u'\u220b', u'\u2209': u'\u220c', u'\u220a': u'\u220d', u'\u2215': u'\u29f5', u'\u223c': u'\u223d', u'\u2243': u'\u22cd', u'\u2252': u'\u2253', u'\u2254': u'\u2255', u'\u2264': u'\u2265', u'\u2266': u'\u2267', u'\u2268': u'\u2269', u'\u226a': u'\u226b', u'\u226e': u'\u226f', u'\u2270': u'\u2271', u'\u2272': u'\u2273', u'\u2274': u'\u2275', u'\u2276': u'\u2277', u'\u2278': u'\u2279', u'\u227a': u'\u227b', u'\u227c': u'\u227d', u'\u227e': u'\u227f', u'\u2280': u'\u2281', u'\u2282': u'\u2283', u'\u2284': u'\u2285', u'\u2286': u'\u2287', u'\u2288': u'\u2289', u'\u228a': u'\u228b', u'\u228f': u'\u2290', u'\u2291': u'\u2292', u'\u2298': u'\u29b8', u'\u22a2': u'\u22a3', u'\u22a6': u'\u2ade', u'\u22a8': u'\u2ae4', u'\u22a9': u'\u2ae3', u'\u22ab': u'\u2ae5', u'\u22b0': u'\u22b1', u'\u22b2': u'\u22b3', u'\u22b4': u'\u22b5', u'\u22b6': u'\u22b7', u'\u22c9': u'\u22ca', u'\u22cb': u'\u22cc', u'\u22d0': u'\u22d1', u'\u22d6': u'\u22d7', u'\u22d8': u'\u22d9', u'\u22da': u'\u22db', u'\u22dc': u'\u22dd', u'\u22de': u'\u22df', u'\u22e0': u'\u22e1', u'\u22e2': u'\u22e3', u'\u22e4': u'\u22e5', u'\u22e6': u'\u22e7', u'\u22e8': u'\u22e9', u'\u22ea': u'\u22eb', u'\u22ec': u'\u22ed', u'\u22f0': u'\u22f1', u'\u22f2': u'\u22fa', u'\u22f3': u'\u22fb', u'\u22f4': u'\u22fc', u'\u22f6': u'\u22fd', u'\u22f7': u'\u22fe', u'\u2308': u'\u2309', u'\u230a': u'\u230b', u'\u2329': u'\u232a', u'\u23b4': u'\u23b5', u'\u2768': u'\u2769', u'\u276a': u'\u276b', u'\u276c': u'\u276d', u'\u276e': u'\u276f', u'\u2770': u'\u2771', u'\u2772': u'\u2773', u'\u2774': u'\u2775', u'\u27c3': u'\u27c4', u'\u27c5': u'\u27c6', u'\u27d5': u'\u27d6', u'\u27dd': u'\u27de', u'\u27e2': u'\u27e3', u'\u27e4': u'\u27e5', u'\u27e6': u'\u27e7', u'\u27e8': u'\u27e9', u'\u27ea': u'\u27eb', u'\u2983': u'\u2984', u'\u2985': u'\u2986', u'\u2987': u'\u2988', u'\u2989': u'\u298a', u'\u298b': u'\u298c', u'\u298d': u'\u298e', u'\u298f': u'\u2990', u'\u2991': u'\u2992', u'\u2993': u'\u2994', u'\u2995': u'\u2996', u'\u2997': u'\u2998', u'\u29c0': u'\u29c1', u'\u29c4': u'\u29c5', u'\u29cf': u'\u29d0', u'\u29d1': u'\u29d2', u'\u29d4': u'\u29d5', u'\u29d8': u'\u29d9', u'\u29da': u'\u29db', u'\u29f8': u'\u29f9', u'\u29fc': u'\u29fd', u'\u2a2b': u'\u2a2c', u'\u2a2d': u'\u2a2e', u'\u2a34': u'\u2a35', u'\u2a3c': u'\u2a3d', u'\u2a64': u'\u2a65', u'\u2a79': u'\u2a7a', u'\u2a7d': u'\u2a7e', u'\u2a7f': u'\u2a80', u'\u2a81': u'\u2a82', u'\u2a83': u'\u2a84', u'\u2a8b': u'\u2a8c', u'\u2a91': u'\u2a92', u'\u2a93': u'\u2a94', u'\u2a95': u'\u2a96', u'\u2a97': u'\u2a98', u'\u2a99': u'\u2a9a', u'\u2a9b': u'\u2a9c', u'\u2aa1': u'\u2aa2', u'\u2aa6': u'\u2aa7', u'\u2aa8': u'\u2aa9', u'\u2aaa': u'\u2aab', u'\u2aac': u'\u2aad', u'\u2aaf': u'\u2ab0', u'\u2ab3': u'\u2ab4', u'\u2abb': u'\u2abc', u'\u2abd': u'\u2abe', u'\u2abf': u'\u2ac0', u'\u2ac1': u'\u2ac2', u'\u2ac3': u'\u2ac4', u'\u2ac5': u'\u2ac6', u'\u2acd': u'\u2ace', u'\u2acf': u'\u2ad0', u'\u2ad1': u'\u2ad2', u'\u2ad3': u'\u2ad4', u'\u2ad5': u'\u2ad6', u'\u2aec': u'\u2aed', u'\u2af7': u'\u2af8', u'\u2af9': u'\u2afa', u'\u2e02': u'\u2e03', u'\u2e04': u'\u2e05', u'\u2e09': u'\u2e0a', u'\u2e0c': u'\u2e0d', u'\u2e1c': u'\u2e1d', u'\u2e20': u'\u2e21', u'\u3008': u'\u3009', u'\u300a': u'\u300b', u'\u300c': u'\u300d', u'\u300e': u'\u300f', u'\u3010': u'\u3011', u'\u3014': u'\u3015', u'\u3016': u'\u3017', u'\u3018': u'\u3019', u'\u301a': u'\u301b', u'\u301d': u'\u301e', u'\ufd3e': u'\ufd3f', u'\ufe17': u'\ufe18', u'\ufe35': u'\ufe36', u'\ufe37': u'\ufe38', u'\ufe39': u'\ufe3a', u'\ufe3b': u'\ufe3c', u'\ufe3d': u'\ufe3e', u'\ufe3f': u'\ufe40', u'\ufe41': u'\ufe42', u'\ufe43': u'\ufe44', u'\ufe47': u'\ufe48', u'\ufe59': u'\ufe5a', u'\ufe5b': u'\ufe5c', u'\ufe5d': u'\ufe5e', u'\uff08': u'\uff09', u'\uff1c': u'\uff1e', u'\uff3b': u'\uff3d', u'\uff5b': u'\uff5d', u'\uff5f': u'\uff60', u'\uff62': u'\uff63', } def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''): if boundary_regex_fragment is None: return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \ suffix + r')\b' else: return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \ boundary_regex_fragment + r')' def brackets_callback(token_class): def callback(lexer, match, context): groups = match.groupdict() opening_chars = groups['delimiter'] n_chars = len(opening_chars) adverbs = groups.get('adverbs') closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) text = context.text if closer is None: # it's not a mirrored character, which means we # just need to look for the next occurrence end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) else: # we need to look for the corresponding closing character, # keep nesting in mind closing_chars = closer * n_chars nesting_level = 1 search_pos = match.start('delimiter') while nesting_level > 0: next_open_pos = text.find(opening_chars, search_pos + n_chars) next_close_pos = text.find(closing_chars, search_pos + n_chars) if next_close_pos == -1: next_close_pos = len(text) nesting_level = 0 elif next_open_pos != -1 and next_open_pos < next_close_pos: nesting_level += 1 search_pos = next_open_pos else: # next_close_pos < next_open_pos nesting_level -= 1 search_pos = next_close_pos end_pos = next_close_pos if end_pos < 0: # if we didn't find a closer, just highlight the # rest of the text in this class end_pos = len(text) if adverbs is not None and re.search(r':to\b', adverbs): heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos] end_heredoc = re.search( r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[end_pos:], re.MULTILINE) if end_heredoc: end_pos += end_heredoc.end() else: end_pos = len(text) yield match.start(), token_class, text[match.start():end_pos + n_chars] context.pos = end_pos + n_chars return callback def opening_brace_callback(lexer, match, context): stack = context.stack yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() # if we encounter an opening brace and we're one level # below a token state, it means we need to increment # the nesting level for braces so we know later when # we should return to the token rules. if len(stack) > 2 and stack[-2] == 'token': context.perl6_token_nesting_level += 1 def closing_brace_callback(lexer, match, context): stack = context.stack yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() # if we encounter a free closing brace and we're one level # below a token state, it means we need to check the nesting # level to see if we need to return to the token state. if len(stack) > 2 and stack[-2] == 'token': context.perl6_token_nesting_level -= 1 if context.perl6_token_nesting_level == 0: stack.pop() def embedded_perl6_callback(lexer, match, context): context.perl6_token_nesting_level = 1 yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() context.stack.append('root') # If you're modifying these rules, be careful if you need to process '{' or '}' # characters. We have special logic for processing these characters (due to the fact # that you can nest Perl 6 code in regex blocks), so if you need to process one of # them, make sure you also process the corresponding one! tokens = { 'common': [ (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), (r'#[^\n]*$', Comment.Single), (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), (r'^=.*?\n\s*?\n', Comment.Multiline), (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), # deal with a special case in the Perl 6 grammar (role q { ... }) (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'), Name.Builtin), (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), # copied from PerlLexer (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'::\?\w+', Name.Variable.Global), (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'\$(?:<.*?>)+', Name.Variable), (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])' r'(?P=first_char)*)', brackets_callback(String)), # copied from PerlLexer (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), (r'0b[01]+(_[01]+)*', Number.Bin), (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), (r'\d+(_\d+)*', Number.Integer), (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'm\w+(?=\()', Name), (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])' r'(?P=first_char)*)', brackets_callback(String.Regex)), (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), (r'<[^\s=].*?\S>', String), (_build_word_match(PERL6_OPERATORS), Operator), (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name), (r"'(\\\\|\\[^\\]|[^'\\])*'", String), (r'"(\\\\|\\[^\\]|[^"\\])*"', String), ], 'root': [ include('common'), (r'\{', opening_brace_callback), (r'\}', closing_brace_callback), (r'.+?', Text), ], 'pre-token': [ include('common'), (r'\{', Text, ('#pop', 'token')), (r'.+?', Text), ], 'token-sym-brackets': [ (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), default(('#pop', 'pre-token')), ], 'token': [ (r'\}', Text, '#pop'), (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), # make sure that quotes in character classes aren't treated as strings (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), # make sure that '#' characters in quotes aren't treated as comments (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), (r'#.*?$', Comment.Single), (r'\{', embedded_perl6_callback), ('.+?', String.Regex), ], } def analyse_text(text): def strip_pod(lines): in_pod = False stripped_lines = [] for line in lines: if re.match(r'^=(?:end|cut)', line): in_pod = False elif re.match(r'^=\w+', line): in_pod = True elif not in_pod: stripped_lines.append(line) return stripped_lines # XXX handle block comments lines = text.splitlines() lines = strip_pod(lines) text = '\n'.join(lines) if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): return True saw_perl_decl = False rating = False # check for my/our/has declarations if re.search( r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + r"+\s+)?[$@%&(]", text): rating = 0.8 saw_perl_decl = True for line in lines: line = re.sub('#.*', '', line) if re.match(r'^\s*$', line): continue # match v6; use v6; use v6.0; use v6.0.0; if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): return True # match class, module, role, enum, grammar declarations class_decl = re.match( r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) if class_decl: if saw_perl_decl or class_decl.group('scope') is not None: return True rating = 0.05 continue break return rating def __init__(self, **options): super(Perl6Lexer, self).__init__(**options) self.encoding = options.get('encoding', 'utf-8')
class GnuplotLexer(RegexLexer): """ For `Gnuplot <http://gnuplot.info/>`_ plotting scripts. .. versionadded:: 0.11 """ name = 'Gnuplot' aliases = ['gnuplot'] filenames = ['*.plot', '*.plt'] mimetypes = ['text/x-gnuplot'] tokens = { 'root': [ include('whitespace'), (_shortened('bi$nd'), Keyword, 'bind'), (_shortened_many('ex$it', 'q$uit'), Keyword, 'quit'), (_shortened('f$it'), Keyword, 'fit'), (r'(if)(\s*)(\()', bygroups(Keyword, Text, Punctuation), 'if'), (r'else\b', Keyword), (_shortened('pa$use'), Keyword, 'pause'), (_shortened_many('p$lot', 'rep$lot', 'sp$lot'), Keyword, 'plot'), (_shortened('sa$ve'), Keyword, 'save'), (_shortened('se$t'), Keyword, ('genericargs', 'optionarg')), (_shortened_many('sh$ow', 'uns$et'), Keyword, ('noargs', 'optionarg')), (_shortened_many('low$er', 'ra$ise', 'ca$ll', 'cd$', 'cl$ear', 'h$elp', '\\?$', 'hi$story', 'l$oad', 'pr$int', 'pwd$', 're$read', 'res$et', 'scr$eendump', 'she$ll', 'sy$stem', 'up$date'), Keyword, 'genericargs'), (_shortened_many('pwd$', 're$read', 'res$et', 'scr$eendump', 'she$ll', 'test$'), Keyword, 'noargs'), (r'([a-zA-Z_]\w*)(\s*)(=)', bygroups(Name.Variable, Text, Operator), 'genericargs'), (r'([a-zA-Z_]\w*)(\s*\(.*?\)\s*)(=)', bygroups(Name.Function, Text, Operator), 'genericargs'), (r'@[a-zA-Z_]\w*', Name.Constant), # macros (r';', Keyword), ], 'comment': [ (r'[^\\\n]', Comment), (r'\\\n', Comment), (r'\\', Comment), # don't add the newline to the Comment token default('#pop'), ], 'whitespace': [ ('#', Comment, 'comment'), (r'[ \t\v\f]+', Text), ], 'noargs': [ include('whitespace'), # semicolon and newline end the argument list (r';', Punctuation, '#pop'), (r'\n', Text, '#pop'), ], 'dqstring': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash (r'\n', String, '#pop'), # newline ends the string too ], 'sqstring': [ (r"''", String), # escaped single quote (r"'", String, '#pop'), (r"[^\\'\n]+", String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # normal backslash (r'\n', String, '#pop'), # newline ends the string too ], 'genericargs': [ include('noargs'), (r'"', String, 'dqstring'), (r"'", String, 'sqstring'), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), (r'(\d+\.\d*|\.\d+)', Number.Float), (r'-?\d+', Number.Integer), ('[,.~!%^&*+=|?:<>/-]', Operator), (r'[{}()\[\]]', Punctuation), (r'(eq|ne)\b', Operator.Word), (r'([a-zA-Z_]\w*)(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[a-zA-Z_]\w*', Name), (r'@[a-zA-Z_]\w*', Name.Constant), # macros (r'\\\n', Text), ], 'optionarg': [ include('whitespace'), (_shortened_many( "a$ll", "an$gles", "ar$row", "au$toscale", "b$ars", "bor$der", "box$width", "cl$abel", "c$lip", "cn$trparam", "co$ntour", "da$ta", "data$file", "dg$rid3d", "du$mmy", "enc$oding", "dec$imalsign", "fit$", "font$path", "fo$rmat", "fu$nction", "fu$nctions", "g$rid", "hid$den3d", "his$torysize", "is$osamples", "k$ey", "keyt$itle", "la$bel", "li$nestyle", "ls$", "loa$dpath", "loc$ale", "log$scale", "mac$ros", "map$ping", "map$ping3d", "mar$gin", "lmar$gin", "rmar$gin", "tmar$gin", "bmar$gin", "mo$use", "multi$plot", "mxt$ics", "nomxt$ics", "mx2t$ics", "nomx2t$ics", "myt$ics", "nomyt$ics", "my2t$ics", "nomy2t$ics", "mzt$ics", "nomzt$ics", "mcbt$ics", "nomcbt$ics", "of$fsets", "or$igin", "o$utput", "pa$rametric", "pm$3d", "pal$ette", "colorb$ox", "p$lot", "poi$ntsize", "pol$ar", "pr$int", "obj$ect", "sa$mples", "si$ze", "st$yle", "su$rface", "table$", "t$erminal", "termo$ptions", "ti$cs", "ticsc$ale", "ticsl$evel", "timef$mt", "tim$estamp", "tit$le", "v$ariables", "ve$rsion", "vi$ew", "xyp$lane", "xda$ta", "x2da$ta", "yda$ta", "y2da$ta", "zda$ta", "cbda$ta", "xl$abel", "x2l$abel", "yl$abel", "y2l$abel", "zl$abel", "cbl$abel", "xti$cs", "noxti$cs", "x2ti$cs", "nox2ti$cs", "yti$cs", "noyti$cs", "y2ti$cs", "noy2ti$cs", "zti$cs", "nozti$cs", "cbti$cs", "nocbti$cs", "xdti$cs", "noxdti$cs", "x2dti$cs", "nox2dti$cs", "ydti$cs", "noydti$cs", "y2dti$cs", "noy2dti$cs", "zdti$cs", "nozdti$cs", "cbdti$cs", "nocbdti$cs", "xmti$cs", "noxmti$cs", "x2mti$cs", "nox2mti$cs", "ymti$cs", "noymti$cs", "y2mti$cs", "noy2mti$cs", "zmti$cs", "nozmti$cs", "cbmti$cs", "nocbmti$cs", "xr$ange", "x2r$ange", "yr$ange", "y2r$ange", "zr$ange", "cbr$ange", "rr$ange", "tr$ange", "ur$ange", "vr$ange", "xzeroa$xis", "x2zeroa$xis", "yzeroa$xis", "y2zeroa$xis", "zzeroa$xis", "zeroa$xis", "z$ero"), Name.Builtin, '#pop'), ], 'bind': [ ('!', Keyword, '#pop'), (_shortened('all$windows'), Name.Builtin), include('genericargs'), ], 'quit': [ (r'gnuplot\b', Keyword), include('noargs'), ], 'fit': [ (r'via\b', Name.Builtin), include('plot'), ], 'if': [ (r'\)', Punctuation, '#pop'), include('genericargs'), ], 'pause': [ (r'(mouse|any|button1|button2|button3)\b', Name.Builtin), (_shortened('key$press'), Name.Builtin), include('genericargs'), ], 'plot': [ (_shortened_many('ax$es', 'axi$s', 'bin$ary', 'ev$ery', 'i$ndex', 'mat$rix', 's$mooth', 'thru$', 't$itle', 'not$itle', 'u$sing', 'w$ith'), Name.Builtin), include('genericargs'), ], 'save': [ (_shortened_many('f$unctions', 's$et', 't$erminal', 'v$ariables'), Name.Builtin), include('genericargs'), ], }