Beispiel #1
0
class ProtoBufLexer(RegexLexer):
    """
    Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_
    definition files.

    .. versionadded:: 1.4
    """

    name = 'Protocol Buffer'
    aliases = ['protobuf', 'proto']
    filenames = ['*.proto']

    tokens = {
        'root': [
            (r'[ \t]+', Text),
            (r'[,;{}\[\]()<>]', Punctuation),
            (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
            (words((
                'import', 'option', 'optional', 'required', 'repeated', 'default',
                'packed', 'ctype', 'extensions', 'to', 'max', 'rpc', 'returns',
                'oneof'), prefix=r'\b', suffix=r'\b'),
             Keyword),
            (words((
                'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
                'fixed32', 'fixed64', 'sfixed32', 'sfixed64',
                'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'),
             Keyword.Type),
            (r'(true|false)\b', Keyword.Constant),
            (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'),
            (r'(message|extend)(\s+)',
             bygroups(Keyword.Declaration, Text), 'message'),
            (r'(enum|group|service)(\s+)',
             bygroups(Keyword.Declaration, Text), 'type'),
            (r'\".*?\"', String),
            (r'\'.*?\'', String),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'(\-?(inf|nan))\b', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'[+-=]', Operator),
            (r'([a-zA-Z_][\w.]*)([ \t]*)(=)',
             bygroups(Name.Attribute, Text, Operator)),
            (r'[a-zA-Z_][\w.]*', Name),
        ],
        'package': [
            (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'message': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'type': [
            (r'[a-zA-Z_]\w*', Name, '#pop'),
            default('#pop'),
        ],
    }
Beispiel #2
0
class ScssLexer(RegexLexer):
    """
    For SCSS stylesheets.
    """

    name = 'SCSS'
    aliases = ['scss']
    filenames = ['*.scss']
    mimetypes = ['text/x-scss']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'@import', Keyword, 'value'),
            (r'@for', Keyword, 'for'),
            (r'@(debug|warn|if|while)', Keyword, 'value'),
            (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'),
            (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'),
            (r'@extend', Keyword, 'selector'),
            (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'),
            (r'@[\w-]+', Keyword, 'selector'),
            (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'),
            # TODO: broken, and prone to infinite loops.
            # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'),
            # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'),
            default('selector'),
        ],

        'attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*:', Operator, 'value'),
            default('#pop'),
        ],

        'inline-comment': [
            (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline),
            (r'#\{', String.Interpol, 'interpolation'),
            (r"\*/", Comment, '#pop'),
        ],
    }
    for group, common in common_sass_tokens.items():
        tokens[group] = copy.copy(common)
    tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
    tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
Beispiel #3
0
class KernelLogLexer(RegexLexer):
    """
    For Linux Kernel log ("dmesg") output.

    .. versionadded:: 2.6
    """
    name = 'Kernel log'
    aliases = ['kmsg', 'dmesg']
    filenames = ['*.kmsg', '*.dmesg']

    tokens = {
        'root': [
            (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
            (r'^[^:]+:info  : (?=\[)', Text, 'info'),
            (r'^[^:]+:warn  : (?=\[)', Text, 'warn'),
            (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
            (r'^[^:]+:err   : (?=\[)', Text, 'error'),
            (r'^[^:]+:crit  : (?=\[)', Text, 'error'),
            (r'^(?=\[)', Text, 'unknown'),
        ],
        'unknown': [
            (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
            (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
            default('info'),
        ],
        'base': [
            (r'\[[0-9. ]+\] ', Number),
            (r'(?<=\] ).+?:', Keyword),
            (r'\n', Text, '#pop'),
        ],
        'debug': [include('base'), (r'.+\n', Comment, '#pop')],
        'info': [include('base'), (r'.+\n', Text, '#pop')],
        'warn': [include('base'), (r'.+\n', Generic.Strong, '#pop')],
        'error': [include('base'), (r'.+\n', Generic.Error, '#pop')]
    }
Beispiel #4
0
class ZephirLexer(RegexLexer):
    """
    For `Zephir language <http://zephir-lang.com/>`_ source code.

    Zephir is a compiled high level language aimed
    to the creation of C-extensions for PHP.

    .. versionadded:: 2.0
    """

    name = 'Zephir'
    aliases = ['zephir']
    filenames = ['*.zep']

    zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
    zephir_type = ['bit', 'bits', 'string']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'), (r'/', Operator, '#pop'),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'^(?=\s|/)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
             r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
             r'empty)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
             r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
             r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
             r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
             r'window)\b', Name.Builtin),
            (r'[$a-zA-Z_][\w\\]*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
        ]
    }
Beispiel #5
0
class DebianControlLexer(RegexLexer):
    """
    Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.

    .. versionadded:: 0.9
    """
    name = 'Debian Control file'
    aliases = ['control', 'debcontrol']
    filenames = ['control']

    tokens = {
        'root': [
            (r'^(Description)', Keyword, 'description'),
            (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
            (r'^((Build-)?Depends)', Keyword, 'depends'),
            (r'^((?:Python-)?Version)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^((?:Installed-)?Size)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
             bygroups(Keyword, Whitespace, String)),
        ],
        'maintainer': [
            (r'<[^>]+>', Generic.Strong),
            (r'<[^>]+>$', Generic.Strong, '#pop'),
            (r',\n?', Text),
            (r'.', Text),
        ],
        'description': [
            (r'(.*)(Homepage)(: )(\S+)',
             bygroups(Text, String, Name, Name.Class)),
            (r':.*\n', Generic.Strong),
            (r' .*\n', Text),
            default('#pop'),
        ],
        'depends': [
            (r':\s*', Text),
            (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
            (r'\(', Text, 'depend_vers'),
            (r',', Text),
            (r'\|', Operator),
            (r'[\s]+', Text),
            (r'[})]\s*$', Text, '#pop'),
            (r'\}', Text),
            (r'[^,]$', Name.Function, '#pop'),
            (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)),
            (r'\[.*?\]', Name.Entity),
        ],
        'depend_vers': [
            (r'\),', Text, '#pop'),
            (r'\)[^,]', Text, '#pop:2'),
            (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number))
        ]
    }
Beispiel #6
0
class TexLexer(RegexLexer):
    """
    Lexer for the TeX and LaTeX typesetting languages.
    """

    name = 'TeX'
    aliases = ['tex', 'latex']
    filenames = ['*.tex', '*.aux', '*.toc']
    mimetypes = ['text/x-tex', 'text/x-latex']

    tokens = {
        'general': [
            (r'%.*?\n', Comment),
            (r'[{}]', Name.Builtin),
            (r'[&_^]', Name.Builtin),
        ],
        'root': [
            (r'\\\[', String.Backtick, 'displaymath'),
            (r'\\\(', String, 'inlinemath'),
            (r'\$\$', String.Backtick, 'displaymath'),
            (r'\$', String, 'inlinemath'),
            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
            (r'\\$', Keyword),
            include('general'),
            (r'[^\\$%&_^{}]+', Text),
        ],
        'math': [
            (r'\\([a-zA-Z]+|.)', Name.Variable),
            include('general'),
            (r'[0-9]+', Number),
            (r'[-=!+*/()\[\]]', Operator),
            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
        ],
        'inlinemath': [
            (r'\\\)', String, '#pop'),
            (r'\$', String, '#pop'),
            include('math'),
        ],
        'displaymath': [
            (r'\\\]', String, '#pop'),
            (r'\$\$', String, '#pop'),
            (r'\$', Name.Builtin),
            include('math'),
        ],
        'command': [
            (r'\[.*?\]', Name.Attribute),
            (r'\*', Keyword),
            default('#pop'),
        ],
    }

    def analyse_text(text):
        for start in ("\\documentclass", "\\input", "\\documentstyle",
                      "\\relax"):
            if text[:len(start)] == start:
                return True
Beispiel #7
0
class GroffLexer(RegexLexer):
    """
    Lexer for the (g)roff typesetting language, supporting groff
    extensions. Mainly useful for highlighting manpage sources.

    .. versionadded:: 0.6
    """

    name = 'Groff'
    aliases = ['groff', 'nroff', 'man']
    filenames = ['*.[1234567]', '*.man']
    mimetypes = ['application/x-troff', 'text/troff']

    tokens = {
        'root': [
            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
            (r'\.', Punctuation, 'request'),
            # Regular characters, slurp till we find a backslash or newline
            (r'[^\\\n]+', Text, 'textline'),
            default('textline'),
        ],
        'textline': [
            include('escapes'),
            (r'[^\\\n]+', Text),
            (r'\n', Text, '#pop'),
        ],
        'escapes': [
            # groff has many ways to write escapes.
            (r'\\"[^\n]*', Comment),
            (r'\\[fn]\w', String.Escape),
            (r'\\\(.{2}', String.Escape),
            (r'\\.\[.*\]', String.Escape),
            (r'\\.', String.Escape),
            (r'\\\n', Text, 'request'),
        ],
        'request': [
            (r'\n', Text, '#pop'),
            include('escapes'),
            (r'"[^\n"]+"', String.Double),
            (r'\d+', Number),
            (r'\S+', String),
            (r'\s+', Text),
        ],
    }

    def analyse_text(text):
        if text[:1] != '.':
            return False
        if text[:3] == '.\\"':
            return True
        if text[:4] == '.TH ':
            return True
        if text[1:3].isalnum() and text[3].isspace():
            return 0.9
Beispiel #8
0
class CirruLexer(RegexLexer):
    r"""
    Syntax rules of Cirru can be found at:
    http://cirru.org/

    * using ``()`` for expressions, but restricted in a same line
    * using ``""`` for strings, with ``\`` for escaping chars
    * using ``$`` as folding operator
    * using ``,`` as unfolding operator
    * using indentations for nested blocks

    .. versionadded:: 2.0
    """

    name = 'Cirru'
    aliases = ['cirru']
    filenames = ['*.cirru']
    mimetypes = ['text/x-cirru']
    flags = re.MULTILINE

    tokens = {
        'string': [
            (r'[^"\\\n]', String),
            (r'\\', String.Escape, 'escape'),
            (r'"', String, '#pop'),
        ],
        'escape': [
            (r'.', String.Escape, '#pop'),
        ],
        'function': [
            (r'\,', Operator, '#pop'),
            (r'[^\s"()]+', Name.Function, '#pop'),
            (r'\)', Operator, '#pop'),
            (r'(?=\n)', Text, '#pop'),
            (r'\(', Operator, '#push'),
            (r'"', String, ('#pop', 'string')),
            (r'[ ]+', Text.Whitespace),
        ],
        'line': [
            (r'(?<!\w)\$(?!\w)', Operator, 'function'),
            (r'\(', Operator, 'function'),
            (r'\)', Operator),
            (r'\n', Text, '#pop'),
            (r'"', String, 'string'),
            (r'[ ]+', Text.Whitespace),
            (r'[+-]?[\d.]+\b', Number),
            (r'[^\s"()]+', Name.Variable)
        ],
        'root': [
            (r'^\n+', Text.Whitespace),
            default(('line', 'function')),
        ]
    }
Beispiel #9
0
class AwkLexer(RegexLexer):
    """
    For Awk scripts.

    .. versionadded:: 1.5
    """

    name = 'Awk'
    aliases = ['awk', 'gawk', 'mawk', 'nawk']
    filenames = ['*.awk']
    mimetypes = ['application/x-awk']

    tokens = {
        'commentsandwhitespace': [
            (r'\s+', Text),
            (r'#.*$', Comment.Single)
        ],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'\B', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [
            (r'\n', Text, '#pop')
        ],
        'root': [
            (r'^(?=\s|/)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
             r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(break|continue|do|while|exit|for|if|else|'
             r'return)\b', Keyword, 'slashstartsregex'),
            (r'function\b', Keyword.Declaration, 'slashstartsregex'),
            (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
             r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
             r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
             r'delete|system)\b', Keyword.Reserved),
            (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
             r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
             r'RSTART|RT|SUBSEP)\b', Name.Builtin),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
        ]
    }
Beispiel #10
0
class CapnProtoLexer(RegexLexer):
    """
    For `Cap'n Proto <https://capnproto.org>`_ source.

    .. versionadded:: 2.2
    """
    name = 'Cap\'n Proto'
    filenames = ['*.capnp']
    aliases = ['capnp']

    flags = re.MULTILINE | re.UNICODE

    tokens = {
        'root': [
            (r'#.*?$', Comment.Single),
            (r'@[0-9a-zA-Z]*', Name.Decorator),
            (r'=', Literal, 'expression'),
            (r':', Name.Class, 'type'),
            (r'\$', Name.Attribute, 'annotation'),
            (r'(struct|enum|interface|union|import|using|const|annotation|'
             r'extends|in|of|on|as|with|from|fixed)\b',
             Keyword),
            (r'[\w.]+', Name),
            (r'[^#@=:$\w]+', Text),
        ],
        'type': [
            (r'[^][=;,(){}$]+', Name.Class),
            (r'[\[(]', Name.Class, 'parentype'),
            default('#pop'),
        ],
        'parentype': [
            (r'[^][;()]+', Name.Class),
            (r'[\[(]', Name.Class, '#push'),
            (r'[])]', Name.Class, '#pop'),
            default('#pop'),
        ],
        'expression': [
            (r'[^][;,(){}$]+', Literal),
            (r'[\[(]', Literal, 'parenexp'),
            default('#pop'),
        ],
        'parenexp': [
            (r'[^][;()]+', Literal),
            (r'[\[(]', Literal, '#push'),
            (r'[])]', Literal, '#pop'),
            default('#pop'),
        ],
        'annotation': [
            (r'[^][;,(){}=:]+', Name.Attribute),
            (r'[\[(]', Name.Attribute, 'annexp'),
            default('#pop'),
        ],
        'annexp': [
            (r'[^][;()]+', Name.Attribute),
            (r'[\[(]', Name.Attribute, '#push'),
            (r'[])]', Name.Attribute, '#pop'),
            default('#pop'),
        ],
    }
Beispiel #11
0
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', '}', 'cb'),
            (r'\[', r'\]', r'\]', 'sb'),
            (r'\(', r'\)', ')', 'pa'),
            ('<', '>', '>', 'ab'),
            ('/', '/', '/', 'slas'),
            (r'\|', r'\|', '|', 'pipe'),
            ('"', '"', '"', 'quot'),
            ("'", "'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term, ), bygroups(token, String.Heredoc),
                 (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term, ), bygroups(token, String.Heredoc),
                 (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, rterm_class, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = \
                gen_elixir_sigstr_rules(rterm, rterm_class, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False)

        return states
Beispiel #12
0
class BSTLexer(RegexLexer):
    """
    A lexer for BibTeX bibliography styles.

    .. versionadded:: 2.2
    """

    name = 'BST'
    aliases = ['bst', 'bst-pybtex']
    filenames = ['*.bst']
    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            include('whitespace'),
            (words(['read', 'sort']), Keyword),
            (words(['execute', 'integers', 'iterate', 'reverse', 'strings']),
             Keyword, ('group')),
            (words(['function', 'macro']), Keyword, ('group', 'group')),
            (words(['entry']), Keyword, ('group', 'group', 'group')),
        ],
        'group': [
            include('whitespace'),
            (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
        ],
        'group-end': [
            include('whitespace'),
            (r'\}', Punctuation, '#pop'),
        ],
        'body': [
            include('whitespace'),
            (r"\'[^#\"\{\}\s]+", Name.Function),
            (r'[^#\"\{\}\s]+\$', Name.Builtin),
            (r'[^#\"\{\}\s]+', Name.Variable),
            (r'"[^\"]*"', String),
            (r'#-?\d+', Number),
            (r'\{', Punctuation, ('group-end', 'body')),
            default('#pop'),
        ],
        'whitespace': [
            (r'\s+', Text),
            ('%.*?$', Comment.SingleLine),
        ],
    }
Beispiel #13
0
class FortranFixedLexer(RegexLexer):
    """
    Lexer for fixed format Fortran.

    .. versionadded:: 2.1
    """
    name = 'FortranFixed'
    aliases = ['fortranfixed']
    filenames = ['*.f', '*.F']

    flags = re.IGNORECASE

    def _lex_fortran(self, match, ctx=None):
        """Lex a line just as free form fortran without line break."""
        lexer = FortranLexer()
        text = match.group(0) + "\n"
        for index, token, value in lexer.get_tokens_unprocessed(text):
            value = value.replace('\n', '')
            if value != '':
                yield index, token, value

    tokens = {
        'root': [
            (r'[C*].*\n', Comment),
            (r'#.*\n', Comment.Preproc),
            (r' {0,4}!.*\n', Comment),
            (r'(.{5})', Name.Label, 'cont-char'),
            (r'.*\n', using(FortranLexer)),
        ],
        'cont-char': [
            (' ', Text, 'code'),
            ('0', Comment, 'code'),
            ('.', Generic.Strong, 'code'),
        ],
        'code': [
            (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment,
                                          Text), 'root'),
            (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'),
            default('root'),
        ]
    }
Beispiel #14
0
class RegeditLexer(RegexLexer):
    """
    Lexer for `Windows Registry
    <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced
    by regedit.

    .. versionadded:: 1.6
    """

    name = 'reg'
    aliases = ['registry']
    filenames = ['*.reg']
    mimetypes = ['text/x-windows-registry']

    tokens = {
        'root': [
            (r'Windows Registry Editor.*', Text),
            (r'\s+', Text),
            (r'[;#].*', Comment.Single),
            (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$',
             bygroups(Keyword, Operator, Name.Builtin, Keyword)),
            # String keys, which obey somewhat normal escaping
            (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)',
             bygroups(Name.Attribute, Text, Operator, Text), 'value'),
            # Bare keys (includes @)
            (r'(.*?)([ \t]*)(=)([ \t]*)',
             bygroups(Name.Attribute, Text, Operator, Text), 'value'),
        ],
        'value': [
            (r'-', Operator, '#pop'),  # delete value
            (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)',
             bygroups(Name.Variable, Punctuation, Number), '#pop'),
            # As far as I know, .reg files do not support line continuation.
            (r'.+', String, '#pop'),
            default('#pop'),
        ]
    }

    def analyse_text(text):
        return text.startswith('Windows Registry Editor')
Beispiel #15
0
class GDScriptLexer(RegexLexer):
    """
    For `GDScript source code <https://www.godotengine.org>`_.
    """

    name = "GDScript"
    aliases = ["gdscript", "gd"]
    filenames = ["*.gd"]
    mimetypes = ["text/x-gdscript", "application/x-gdscript"]

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting
            (
                r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?"
                "[hlL]?[E-GXc-giorsux%]",
                String.Interpol,
            ),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r"%", ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        "root": [
            (r"\n", Text),
            (
                r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
                bygroups(Text, String.Affix, String.Doc),
            ),
            (
                r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
                bygroups(Text, String.Affix, String.Doc),
            ),
            (r"[^\S\n]+", Text),
            (r"#.*$", Comment.Single),
            (r"[]{}:(),;[]", Punctuation),
            (r"\\\n", Text),
            (r"\\", Text),
            (r"(in|and|or|not)\b", Operator.Word),
            (
                r"!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|$]",
                Operator,
            ),
            include("keywords"),
            (r"(func)((?:\s|\\\s)+)", bygroups(Keyword, Text), "funcname"),
            (r"(class)((?:\s|\\\s)+)", bygroups(Keyword, Text), "classname"),
            include("builtins"),
            (
                '([rR]|[uUbB][rR]|[rR][uUbB])(""")',
                bygroups(String.Affix, String.Double),
                "tdqs",
            ),
            (
                "([rR]|[uUbB][rR]|[rR][uUbB])(''')",
                bygroups(String.Affix, String.Single),
                "tsqs",
            ),
            (
                '([rR]|[uUbB][rR]|[rR][uUbB])(")',
                bygroups(String.Affix, String.Double),
                "dqs",
            ),
            (
                "([rR]|[uUbB][rR]|[rR][uUbB])(')",
                bygroups(String.Affix, String.Single),
                "sqs",
            ),
            (
                '([uUbB]?)(""")',
                bygroups(String.Affix, String.Double),
                combined("stringescape", "tdqs"),
            ),
            (
                "([uUbB]?)(''')",
                bygroups(String.Affix, String.Single),
                combined("stringescape", "tsqs"),
            ),
            (
                '([uUbB]?)(")',
                bygroups(String.Affix, String.Double),
                combined("stringescape", "dqs"),
            ),
            (
                "([uUbB]?)(')",
                bygroups(String.Affix, String.Single),
                combined("stringescape", "sqs"),
            ),
            include("name"),
            include("numbers"),
        ],
        "keywords": [
            (
                words(
                    (
                        "and",
                        "in",
                        "not",
                        "or",
                        "as",
                        "breakpoint",
                        "class",
                        "class_name",
                        "extends",
                        "is",
                        "func",
                        "setget",
                        "signal",
                        "tool",
                        "const",
                        "enum",
                        "export",
                        "onready",
                        "static",
                        "var",
                        "break",
                        "continue",
                        "if",
                        "elif",
                        "else",
                        "for",
                        "pass",
                        "return",
                        "match",
                        "while",
                        "remote",
                        "master",
                        "puppet",
                        "remotesync",
                        "mastersync",
                        "puppetsync",
                    ),
                    suffix=r"\b",
                ),
                Keyword,
            ),
        ],
        "builtins": [
            (
                words(
                    (
                        "Color8",
                        "ColorN",
                        "abs",
                        "acos",
                        "asin",
                        "assert",
                        "atan",
                        "atan2",
                        "bytes2var",
                        "ceil",
                        "char",
                        "clamp",
                        "convert",
                        "cos",
                        "cosh",
                        "db2linear",
                        "decimals",
                        "dectime",
                        "deg2rad",
                        "dict2inst",
                        "ease",
                        "exp",
                        "floor",
                        "fmod",
                        "fposmod",
                        "funcref",
                        "hash",
                        "inst2dict",
                        "instance_from_id",
                        "is_inf",
                        "is_nan",
                        "lerp",
                        "linear2db",
                        "load",
                        "log",
                        "max",
                        "min",
                        "nearest_po2",
                        "pow",
                        "preload",
                        "print",
                        "print_stack",
                        "printerr",
                        "printraw",
                        "prints",
                        "printt",
                        "rad2deg",
                        "rand_range",
                        "rand_seed",
                        "randf",
                        "randi",
                        "randomize",
                        "range",
                        "round",
                        "seed",
                        "sign",
                        "sin",
                        "sinh",
                        "sqrt",
                        "stepify",
                        "str",
                        "str2var",
                        "tan",
                        "tan",
                        "tanh",
                        "type_exist",
                        "typeof",
                        "var2bytes",
                        "var2str",
                        "weakref",
                        "yield",
                    ),
                    prefix=r"(?<!\.)",
                    suffix=r"\b",
                ),
                Name.Builtin,
            ),
            (r"((?<!\.)(self|false|true)|(PI|TAU|NAN|INF)"
             r")\b", Name.Builtin.Pseudo),
            (
                words(
                    (
                        "bool",
                        "int",
                        "float",
                        "String",
                        "NodePath",
                        "Vector2",
                        "Rect2",
                        "Transform2D",
                        "Vector3",
                        "Rect3",
                        "Plane",
                        "Quat",
                        "Basis",
                        "Transform",
                        "Color",
                        "RID",
                        "Object",
                        "NodePath",
                        "Dictionary",
                        "Array",
                        "PackedByteArray",
                        "PackedInt32Array",
                        "PackedInt64Array",
                        "PackedFloat32Array",
                        "PackedFloat64Array",
                        "PackedStringArray",
                        "PackedVector2Array",
                        "PackedVector3Array",
                        "PackedColorArray",
                        "null",
                    ),
                    prefix=r"(?<!\.)",
                    suffix=r"\b",
                ),
                Name.Builtin.Type,
            ),
        ],
        "numbers": [
            (r"(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?", Number.Float),
            (r"\d+[eE][+-]?[0-9]+j?", Number.Float),
            (r"0[xX][a-fA-F0-9]+", Number.Hex),
            (r"\d+j?", Number.Integer),
        ],
        "name": [(r"[a-zA-Z_]\w*", Name)],
        "funcname": [(r"[a-zA-Z_]\w*", Name.Function, "#pop"),
                     default("#pop")],
        "classname": [(r"[a-zA-Z_]\w*", Name.Class, "#pop")],
        "stringescape": [(
            r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
            r"U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})",
            String.Escape,
        )],
        "strings-single":
        innerstring_rules(String.Single),
        "strings-double":
        innerstring_rules(String.Double),
        "dqs": [
            (r'"', String.Double, "#pop"),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include("strings-double"),
        ],
        "sqs": [
            (r"'", String.Single, "#pop"),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include("strings-single"),
        ],
        "tdqs": [
            (r'"""', String.Double, "#pop"),
            include("strings-double"),
            (r"\n", String.Double),
        ],
        "tsqs": [
            (r"'''", String.Single, "#pop"),
            include("strings-single"),
            (r"\n", String.Single),
        ],
    }

    def analyse_text(text):
        score = 0.0

        if re.search(r"func (_ready|_init|_input|_process|_unhandled_input)",
                     text):
            score += 0.8

        if re.search(
                r"(extends |class_name |onready |preload|load|setget|func [^_])",
                text):
            score += 0.4

        if re.search(r"(var|const|enum|export|signal|tool)", text):
            score += 0.2

        return min(score, 1.0)
Beispiel #16
0
class VBScriptLexer(RegexLexer):
    """
    VBScript is scripting language that is modeled on Visual Basic.

    .. versionadded:: 2.4
    """
    name = 'VBScript'
    aliases = ['vbscript']
    filenames = ['*.vbs', '*.VBS']
    flags = re.IGNORECASE

    tokens = {
        'root': [
            (r"'[^\n]*", Comment.Single),
            (r'\s+', Whitespace),
            ('"', String.Double, 'string'),
            ('&h[0-9a-f]+', Number.Hex),
            # Float variant 1, for example: 1., 1.e2, 1.2e3
            (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
            (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),  # Float variant 2, for example: .1, .1e2
            (r'[0-9]+e[+-]?[0-9]+', Number.Float),  # Float variant 3, for example: 123e45
            (r'[0-9]+', Number.Integer),
            ('#.+#', String),  # date or time value
            (r'(dim)(\s+)([a-z_][a-z0-9_]*)',
             bygroups(Keyword.Declaration, Whitespace, Name.Variable), 'dim_more'),
            (r'(function|sub)(\s+)([a-z_][a-z0-9_]*)',
             bygroups(Keyword.Declaration, Whitespace, Name.Function)),
            (r'(class)(\s+)([a-z_][a-z0-9_]*)',
             bygroups(Keyword.Declaration, Whitespace, Name.Class)),
            (r'(const)(\s+)([a-z_][a-z0-9_]*)',
             bygroups(Keyword.Declaration, Whitespace, Name.Constant)),
            (r'(end)(\s+)(class|function|if|property|sub|with)',
             bygroups(Keyword, Whitespace, Keyword)),
            (r'(on)(\s+)(error)(\s+)(goto)(\s+)(0)',
             bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Number.Integer)),
            (r'(on)(\s+)(error)(\s+)(resume)(\s+)(next)',
             bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Keyword)),
            (r'(option)(\s+)(explicit)', bygroups(Keyword, Whitespace, Keyword)),
            (r'(property)(\s+)(get|let|set)(\s+)([a-z_][a-z0-9_]*)',
             bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace, Name.Property)),
            (r'rem\s.*[^\n]*', Comment.Single),
            (words(_vbscript_builtins.KEYWORDS, suffix=r'\b'), Keyword),
            (words(_vbscript_builtins.OPERATORS), Operator),
            (words(_vbscript_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
            (words(_vbscript_builtins.BUILTIN_CONSTANTS, suffix=r'\b'), Name.Constant),
            (words(_vbscript_builtins.BUILTIN_FUNCTIONS, suffix=r'\b'), Name.Builtin),
            (words(_vbscript_builtins.BUILTIN_VARIABLES, suffix=r'\b'), Name.Builtin),
            (r'[a-z_][a-z0-9_]*', Name),
            (r'\b_\n', Operator),
            (words(r'(),.:'), Punctuation),
            (r'.+(\n)?', Error)
        ],
        'dim_more': [
            (r'(\s*)(,)(\s*)([a-z_][a-z0-9]*)',
             bygroups(Whitespace, Punctuation, Whitespace, Name.Variable)),
            default('#pop'),
        ],
        'string': [
            (r'[^"\n]+', String.Double),
            (r'\"\"', String.Double),
            (r'"', String.Double, '#pop'),
            (r'\n', Error, '#pop'),  # Unterminated string
        ],
    }
Beispiel #17
0
class BBCBasicLexer(RegexLexer):
    """
    BBC Basic was supplied on the BBC Micro, and later Acorn RISC OS.
    It is also used by BBC Basic For Windows.

    .. versionadded:: 2.4
    """
    base_keywords = ['OTHERWISE', 'AND', 'DIV', 'EOR', 'MOD', 'OR', 'ERROR',
                     'LINE', 'OFF', 'STEP', 'SPC', 'TAB', 'ELSE', 'THEN',
                     'OPENIN', 'PTR', 'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'ABS',
                     'ACS', 'ADVAL', 'ASC', 'ASN', 'ATN', 'BGET', 'COS', 'COUNT',
                     'DEG', 'ERL', 'ERR', 'EVAL', 'EXP', 'EXT', 'FALSE', 'FN',
                     'GET', 'INKEY', 'INSTR', 'INT', 'LEN', 'LN', 'LOG', 'NOT',
                     'OPENUP', 'OPENOUT', 'PI', 'POINT', 'POS', 'RAD', 'RND',
                     'SGN', 'SIN', 'SQR', 'TAN', 'TO', 'TRUE', 'USR', 'VAL',
                     'VPOS', 'CHR$', 'GET$', 'INKEY$', 'LEFT$', 'MID$',
                     'RIGHT$', 'STR$', 'STRING$', 'EOF', 'PTR', 'PAGE', 'TIME',
                     'LOMEM', 'HIMEM', 'SOUND', 'BPUT', 'CALL', 'CHAIN', 'CLEAR',
                     'CLOSE', 'CLG', 'CLS', 'DATA', 'DEF', 'DIM', 'DRAW', 'END',
                     'ENDPROC', 'ENVELOPE', 'FOR', 'GOSUB', 'GOTO', 'GCOL', 'IF',
                     'INPUT', 'LET', 'LOCAL', 'MODE', 'MOVE', 'NEXT', 'ON',
                     'VDU', 'PLOT', 'PRINT', 'PROC', 'READ', 'REM', 'REPEAT',
                     'REPORT', 'RESTORE', 'RETURN', 'RUN', 'STOP', 'COLOUR',
                     'TRACE', 'UNTIL', 'WIDTH', 'OSCLI']

    basic5_keywords = ['WHEN', 'OF', 'ENDCASE', 'ENDIF', 'ENDWHILE', 'CASE',
                       'CIRCLE', 'FILL', 'ORIGIN', 'POINT', 'RECTANGLE', 'SWAP',
                       'WHILE', 'WAIT', 'MOUSE', 'QUIT', 'SYS', 'INSTALL',
                       'LIBRARY', 'TINT', 'ELLIPSE', 'BEATS', 'TEMPO', 'VOICES',
                       'VOICE', 'STEREO', 'OVERLAY', 'APPEND', 'AUTO', 'CRUNCH',
                       'DELETE', 'EDIT', 'HELP', 'LIST', 'LOAD', 'LVAR', 'NEW',
                       'OLD', 'RENUMBER', 'SAVE', 'TEXTLOAD', 'TEXTSAVE',
                       'TWIN', 'TWINO', 'INSTALL', 'SUM', 'BEAT']


    name = 'BBC Basic'
    aliases = ['bbcbasic']
    filenames = ['*.bbc']

    tokens = {
        'root': [
            (r"[0-9]+", Name.Label),
            (r"(\*)([^\n]*)",
             bygroups(Keyword.Pseudo, Comment.Special)),
            default('code'),
        ],

        'code': [
            (r"(REM)([^\n]*)",
             bygroups(Keyword.Declaration, Comment.Single)),
            (r'\n', Whitespace, 'root'),
            (r'\s+', Whitespace),
            (r':', Comment.Preproc),

            # Some special cases to make functions come out nicer
            (r'(DEF)(\s*)(FN|PROC)([A-Za-z_@][\w@]*)',
             bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Name.Function)),
            (r'(FN|PROC)([A-Za-z_@][\w@]*)',
             bygroups(Keyword, Name.Function)),

            (r'(GOTO|GOSUB|THEN|RESTORE)(\s*)(\d+)',
             bygroups(Keyword, Whitespace, Name.Label)),

            (r'(TRUE|FALSE)', Keyword.Constant),
            (r'(PAGE|LOMEM|HIMEM|TIME|WIDTH|ERL|ERR|REPORT\$|POS|VPOS|VOICES)', Keyword.Pseudo),

            (words(base_keywords), Keyword),
            (words(basic5_keywords), Keyword),

            ('"', String.Double, 'string'),

            ('%[01]{1,32}', Number.Bin),
            ('&[0-9a-f]{1,8}', Number.Hex),

            (r'[+-]?[0-9]+\.[0-9]*(E[+-]?[0-9]+)?', Number.Float),
            (r'[+-]?\.[0-9]+(E[+-]?[0-9]+)?', Number.Float),
            (r'[+-]?[0-9]+E[+-]?[0-9]+', Number.Float),
            (r'[+-]?\d+', Number.Integer),

            (r'([A-Za-z_@][\w@]*[%$]?)', Name.Variable),
            (r'([+\-]=|[$!|?+\-*/%^=><();]|>=|<=|<>|<<|>>|>>>|,)', Operator),
        ],
        'string': [
            (r'[^"\n]+', String.Double),
            (r'"', String.Double, '#pop'),
            (r'\n', Error, 'root'),  # Unterminated string
        ],
    }

    def analyse_text(text):
        if text.startswith('10REM >') or text.startswith('REM >'):
            return 0.9
Beispiel #18
0
class KokaLexer(RegexLexer):
    """
    Lexer for the `Koka <http://koka.codeplex.com>`_
    language.

    .. versionadded:: 1.6
    """

    name = 'Koka'
    aliases = ['koka']
    filenames = ['*.kk', '*.kki']
    mimetypes = ['text/x-koka']

    keywords = [
        'infix',
        'infixr',
        'infixl',
        'type',
        'cotype',
        'rectype',
        'alias',
        'struct',
        'con',
        'fun',
        'function',
        'val',
        'var',
        'external',
        'if',
        'then',
        'else',
        'elif',
        'return',
        'match',
        'private',
        'public',
        'private',
        'module',
        'import',
        'as',
        'include',
        'inline',
        'rec',
        'try',
        'yield',
        'enum',
        'interface',
        'instance',
    ]

    # keywords that are followed by a type
    typeStartKeywords = [
        'type',
        'cotype',
        'rectype',
        'alias',
        'struct',
        'enum',
    ]

    # keywords valid in a type
    typekeywords = [
        'forall',
        'exists',
        'some',
        'with',
    ]

    # builtin names and special names
    builtin = [
        'for',
        'while',
        'repeat',
        'foreach',
        'foreach-indexed',
        'error',
        'catch',
        'finally',
        'cs',
        'js',
        'file',
        'ref',
        'assigned',
    ]

    # symbols that can be in an operator
    symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'

    # symbol boundary: an operator keyword should not be followed by any of these
    sboundary = '(?!' + symbols + ')'

    # name boundary: a keyword should not be followed by any of these
    boundary = r'(?![\w/])'

    # koka token abstractions
    tokenType = Name.Attribute
    tokenTypeDef = Name.Class
    tokenConstructor = Generic.Emph

    # main lexer
    tokens = {
        'root': [
            include('whitespace'),

            # go into type mode
            (r'::?' + sboundary, tokenType, 'type'),
            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text,
                                                  tokenTypeDef), 'alias-type'),
            (r'(struct)(\s+)([a-z]\w*)?',
             bygroups(Keyword, Text, tokenTypeDef), 'struct-type'),
            ((r'(%s)' % '|'.join(typeStartKeywords)) + r'(\s+)([a-z]\w*)?',
             bygroups(Keyword, Text, tokenTypeDef), 'type'),

            # special sequences of tokens (we use ?: for non-capturing group as
            # required by 'bygroups')
            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
             bygroups(Keyword, Text, Keyword, Name.Namespace)),
            (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
             r'((?:[a-z]\w*/)*[a-z]\w*))?',
             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
                      Keyword, Name.Namespace)),
            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
             r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
             bygroups(Keyword, Text, Name.Function)),
            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
             r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
             bygroups(Keyword, Text, Keyword, Name.Function)),

            # keywords
            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
            (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
            (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
            (r'::?|:=|\->|[=.]' + sboundary, Keyword),

            # names
            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
             bygroups(Name.Namespace, tokenConstructor)),
            (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
            (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
             bygroups(Name.Namespace, Name)),
            (r'_\w*', Name.Variable),

            # literal string
            (r'@"', String.Double, 'litstring'),

            # operators
            (symbols + "|/(?![*/])", Operator),
            (r'`', Operator),
            (r'[{}()\[\];,]', Punctuation),

            # literals. No check for literal characters with len > 1
            (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
            (r'0[xX][0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r"'", String.Char, 'char'),
            (r'"', String.Double, 'string'),
        ],

        # type started by alias
        'alias-type': [(r'=', Keyword), include('type')],

        # type started by struct
        'struct-type': [(r'(?=\((?!,*\)))', Punctuation, '#pop'),
                        include('type')],

        # type started by colon
        'type': [(r'[(\[<]', tokenType, 'type-nested'),
                 include('type-content')],

        # type nested in brackets: can contain parameters, comma etc.
        'type-nested': [
            (r'[)\]>]', tokenType, '#pop'),
            (r'[(\[<]', tokenType, 'type-nested'),
            (r',', tokenType),
            (r'([a-z]\w*)(\s*)(:)(?!:)',
             bygroups(Name, Text, tokenType)),  # parameter name
            include('type-content')
        ],

        # shared contents of a type
        'type-content': [
            include('whitespace'),

            # keywords
            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
            (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', Keyword,
             '#pop'),  # need to match because names overlap...

            # kinds
            (r'[EPHVX]' + boundary, tokenType),

            # type names
            (r'[a-z][0-9]*(?![\w/])', tokenType),
            (r'_\w*', tokenType.Variable),  # Generic.Emph
            (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace,
                                                     tokenType)),
            (r'((?:[a-z]\w*/)*)([a-z]\w+)', bygroups(Name.Namespace,
                                                     tokenType)),

            # type keyword operators
            (r'::|->|[.:|]', tokenType),

            # catchall
            default('#pop')
        ],

        # comments and literals
        'whitespace': [(r'\n\s*#.*$', Comment.Preproc), (r'\s+', Text),
                       (r'/\*', Comment.Multiline, 'comment'),
                       (r'//.*$', Comment.Single)],
        'comment': [
            (r'[^/*]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'litstring': [
            (r'[^"]+', String.Double),
            (r'""', String.Escape),
            (r'"', String.Double, '#pop'),
        ],
        'string': [
            (r'[^\\"\n]+', String.Double),
            include('escape-sequence'),
            (r'["\n]', String.Double, '#pop'),
        ],
        'char': [
            (r'[^\\\'\n]+', String.Char),
            include('escape-sequence'),
            (r'[\'\n]', String.Char, '#pop'),
        ],
        'escape-sequence': [
            (r'\\[nrt\\"\']', String.Escape),
            (r'\\x[0-9a-fA-F]{2}', String.Escape),
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            # Yes, \U literals are 6 hex digits.
            (r'\\U[0-9a-fA-F]{6}', String.Escape)
        ]
    }
Beispiel #19
0
class MonkeyLexer(RegexLexer):
    """
    For
    `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_
    source code.

    .. versionadded:: 1.6
    """

    name = 'Monkey'
    aliases = ['monkey']
    filenames = ['*.monkey']
    mimetypes = ['text/x-monkey']

    name_variable = r'[a-z_]\w*'
    name_function = r'[A-Z]\w*'
    name_constant = r'[A-Z_][A-Z0-9_]*'
    name_class = r'[A-Z]\w*'
    name_module = r'[a-z0-9_]*'

    keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)'
    # ? == Bool // % == Int // # == Float // $ == String
    keyword_type_special = r'[?%#$]'

    flags = re.MULTILINE

    tokens = {
        'root': [
            # Text
            (r'\s+', Text),
            # Comments
            (r"'.*", Comment),
            (r'(?i)^#rem\b', Comment.Multiline, 'comment'),
            # preprocessor directives
            (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b', Comment.Preproc),
            # preprocessor variable (any line starting with '#' that is not a directive)
            (r'^#', Comment.Preproc, 'variables'),
            # String
            ('"', String.Double, 'string'),
            # Numbers
            (r'[0-9]+\.[0-9]*(?!\.)', Number.Float),
            (r'\.[0-9]+(?!\.)', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'\$[0-9a-fA-Z]+', Number.Hex),
            (r'\%[10]+', Number.Bin),
            # Native data types
            (r'\b%s\b' % keyword_type, Keyword.Type),
            # Exception handling
            (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved),
            (r'Throwable', Name.Exception),
            # Builtins
            (r'(?i)\b(?:Null|True|False)\b', Name.Builtin),
            (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo),
            (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant),
            # Keywords
            (r'(?i)^(Import)(\s+)(.*)(\n)',
             bygroups(Keyword.Namespace, Text, Name.Namespace, Text)),
            (r'(?i)^Strict\b.*\n', Keyword.Reserved),
            (r'(?i)(Const|Local|Global|Field)(\s+)',
             bygroups(Keyword.Declaration, Text), 'variables'),
            (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)',
             bygroups(Keyword.Reserved, Text), 'classname'),
            (r'(?i)(Function|Method)(\s+)',
             bygroups(Keyword.Reserved, Text), 'funcname'),
            (r'(?i)(?:End|Return|Public|Private|Extern|Property|'
             r'Final|Abstract)\b', Keyword.Reserved),
            # Flow Control stuff
            (r'(?i)(?:If|Then|Else|ElseIf|EndIf|'
             r'Select|Case|Default|'
             r'While|Wend|'
             r'Repeat|Until|Forever|'
             r'For|To|Until|Step|EachIn|Next|'
             r'Exit|Continue)\s+', Keyword.Reserved),
            # not used yet
            (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved),
            # Array
            (r'[\[\]]', Punctuation),
            # Other
            (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator),
            (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word),
            (r'[(){}!#,.:]', Punctuation),
            # catch the rest
            (r'%s\b' % name_constant, Name.Constant),
            (r'%s\b' % name_function, Name.Function),
            (r'%s\b' % name_variable, Name.Variable),
        ],
        'funcname': [
            (r'(?i)%s\b' % name_function, Name.Function),
            (r':', Punctuation, 'classname'),
            (r'\s+', Text),
            (r'\(', Punctuation, 'variables'),
            (r'\)', Punctuation, '#pop')
        ],
        'classname': [
            (r'%s\.' % name_module, Name.Namespace),
            (r'%s\b' % keyword_type, Keyword.Type),
            (r'%s\b' % name_class, Name.Class),
            # array (of given size)
            (r'(\[)(\s*)(\d*)(\s*)(\])',
             bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)),
            # generics
            (r'\s+(?!<)', Text, '#pop'),
            (r'<', Punctuation, '#push'),
            (r'>', Punctuation, '#pop'),
            (r'\n', Text, '#pop'),
            default('#pop')
        ],
        'variables': [
            (r'%s\b' % name_constant, Name.Constant),
            (r'%s\b' % name_variable, Name.Variable),
            (r'%s' % keyword_type_special, Keyword.Type),
            (r'\s+', Text),
            (r':', Punctuation, 'classname'),
            (r',', Punctuation, '#push'),
            default('#pop')
        ],
        'string': [
            (r'[^"~]+', String.Double),
            (r'~q|~n|~r|~t|~z|~~', String.Escape),
            (r'"', String.Double, '#pop'),
        ],
        'comment': [
            (r'(?i)^#rem.*?', Comment.Multiline, "#push"),
            (r'(?i)^#end.*?', Comment.Multiline, "#pop"),
            (r'\n', Comment.Multiline),
            (r'.+', Comment.Multiline),
        ],
    }
Beispiel #20
0
class QVToLexer(RegexLexer):
    """
    For the `QVT Operational Mapping language <http://www.omg.org/spec/QVT/1.1/>`_.

    Reference for implementing this: «Meta Object Facility (MOF) 2.0
    Query/View/Transformation Specification», Version 1.1 - January 2011
    (http://www.omg.org/spec/QVT/1.1/), see §8.4, «Concrete Syntax» in
    particular.

    Notable tokens assignments:

    - Name.Class is assigned to the identifier following any of the following
      keywords: metamodel, class, exception, primitive, enum, transformation
      or library

    - Name.Function is assigned to the names of mappings and queries

    - Name.Builtin.Pseudo is assigned to the pre-defined variables 'this',
      'self' and 'result'.
    """
    # With obvious borrowings & inspiration from the Java, Python and C lexers

    name = 'QVTO'
    aliases = ['qvto', 'qvt']
    filenames = ['*.qvto']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'[^\S\n]+', Text),
            (r'(--|//)(\s*)(directive:)?(.*)$',
             bygroups(Comment, Comment, Comment.Preproc, Comment)),
            # Uncomment the following if you want to distinguish between
            # '/*' and '/**', à la javadoc
            # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'\\\n', Text),
            (r'(and|not|or|xor|##?)\b', Operator.Word),
            (r'(:{1,2}=|[-+]=)\b', Operator.Word),
            (r'(@|<<|>>)\b', Keyword),  # stereotypes
            (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator),
            (r'[]{}:(),;[]', Punctuation),
            (r'(true|false|unlimited|null)\b', Keyword.Constant),
            (r'(this|self|result)\b', Name.Builtin.Pseudo),
            (r'(var)\b', Keyword.Declaration),
            (r'(from|import)\b', Keyword.Namespace, 'fromimport'),
            (r'(metamodel|class|exception|primitive|enum|transformation|'
             r'library)(\s+)(\w+)',
             bygroups(Keyword.Word, Text, Name.Class)),
            (r'(exception)(\s+)(\w+)',
             bygroups(Keyword.Word, Text, Name.Exception)),
            (r'(main)\b', Name.Function),
            (r'(mapping|helper|query)(\s+)',
             bygroups(Keyword.Declaration, Text), 'operation'),
            (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'),
            (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b',
             Keyword.Type),
            include('keywords'),
            ('"', String, combined('stringescape', 'dqs')),
            ("'", String, combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
            # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)',
            # bygroups(Text, Text, Text)),
        ],

        'fromimport': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            default('#pop'),
        ],

        'operation': [
            (r'::', Text),
            (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()',
             bygroups(Text, Name.Function, Text, Punctuation), '#pop')
        ],

        'assert': [
            (r'(warning|error|fatal)\b', Keyword, '#pop'),
            default('#pop'),  # all else: go back
        ],

        'keywords': [
            (words((
                'abstract', 'access', 'any', 'assert', 'blackbox', 'break',
                'case', 'collect', 'collectNested', 'collectOne', 'collectselect',
                'collectselectOne', 'composes', 'compute', 'configuration',
                'constructor', 'continue', 'datatype', 'default', 'derived',
                'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except',
                'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if',
                'implies', 'in', 'inherits', 'init', 'inout', 'intermediate',
                'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn',
                'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map',
                'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out',
                'package', 'population', 'property', 'raise', 'readonly',
                'references', 'refines', 'reject', 'resolve', 'resolveIn',
                'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne',
                'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef',
                'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect',
                'xmap', 'xselect'), suffix=r'\b'), Keyword),
        ],

        # There is no need to distinguish between String.Single and
        # String.Double: 'strings' is factorised for 'dqs' and 'sqs'
        'strings': [
            (r'[^\\\'"\n]+', String),
            # quotes, percents and backslashes must be parsed one at a time
            (r'[\'"\\]', String),
        ],
        'stringescape': [
            (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape)
        ],
        'dqs': [  # double-quoted string
            (r'"', String, '#pop'),
            (r'\\\\|\\"', String.Escape),
            include('strings')
        ],
        'sqs': [  # single-quoted string
            (r"'", String, '#pop'),
            (r"\\\\|\\'", String.Escape),
            include('strings')
        ],
        'name': [
            (r'[a-zA-Z_]\w*', Name),
        ],
        # numbers: excerpt taken from the python lexer
        'numbers': [
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
            (r'\d+[eE][+-]?[0-9]+', Number.Float),
            (r'\d+', Number.Integer)
        ],
    }
Beispiel #21
0
class LlvmMirLexer(RegexLexer):
    """
    Lexer for the overall LLVM MIR document format.

    MIR is a human readable serialization format that's used to represent LLVM's
    machine specific intermediate representation. It allows LLVM's developers to
    see the state of the compilation process at various points, as well as test
    individual pieces of the compiler.

    For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.

    .. versionadded:: 2.6
    """
    name = 'LLVM-MIR'
    aliases = ['llvm-mir']
    filenames = ['*.mir']

    tokens = {
        'root': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents starting with | are LLVM-IR
            (r'--- \|$', Keyword, 'llvm_ir'),
            # Other documents are MIR
            (r'---', Keyword, 'llvm_mir'),
            # Consume everything else in one token for efficiency
            (r'[^-#]+|.', Text),
        ],
        'llvm_ir': [
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Delegate to the LlvmLexer
            (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
        ],
        'llvm_mir': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Handle the simple attributes
            (r'name:', Keyword, 'name'),
            (words(('alignment', ), suffix=':'), Keyword, 'number'),
            (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
                    'selected', 'exposesReturnsTwice'),
                   suffix=':'), Keyword, 'boolean'),
            # Handle the attributes don't highlight inside
            (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
                    'machineFunctionInfo'),
                   suffix=':'), Keyword),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'body: *\|', Keyword, 'llvm_mir_body'),
            # Consume everything else
            (r'.+', Text),
            (r'\n', Text),
        ],
        'name': [
            (r'[^\n]+', Name),
            default('#pop'),
        ],
        'boolean': [
            (r' *(true|false)', Name.Builtin),
            default('#pop'),
        ],
        'number': [
            (r' *[0-9]+', Number),
            default('#pop'),
        ],
        'llvm_mir_body': [
            # Documents end with '...' or '---'.
            # We have to pop llvm_mir_body and llvm_mir
            (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
            # The '...' is optional. If we didn't already find it then it isn't
            # there. There might be a '---' instead though.
            (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
        ],
    }
Beispiel #22
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
                 String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words(('@selector', '@private', '@protected', '@public',
                        '@encode', '@synchronized', '@try', '@throw', '@catch',
                        '@finally', '@end', '@property', '@synthesize',
                        '__bridge', '__bridge_transfer', '__autoreleasing',
                        '__block', '__weak', '__strong', 'weak', 'strong',
                        'copy', 'retain', 'assign', 'unsafe_unretained',
                        'atomic', 'nonatomic', 'readonly', 'readwrite',
                        'setter', 'getter', 'typeof', 'in', 'out', 'inout',
                        'release', 'class', '@dynamic', '@optional',
                        '@required', '@autoreleasepool', '@import'),
                       suffix=r'\b'), Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'),
                       suffix=r'\b'), Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b',
                 Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                (r'([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname':
            [(r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text),
              'oc_forward_classname'),
             (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class,
                                                     Text), '#pop')],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (
                    r'^([-+])(\s*)'  # method marker
                    r'(\(.*?\))?(\s*)'  # return type
                    r'([a-zA-Z$_][\w$]*:?)',  # begin of method name
                    bygroups(Punctuation, Text, using(this), Text,
                             Name.Function),
                    'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from typecode._vendor.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Beispiel #23
0
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('asm', 'auto', 'break', 'case', 'const', 'continue',
                 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                 'if', 'register', 'restricted', 'return', 'sizeof', 'static',
                 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'),
                suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')([^\n]+)',
             bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = set(
        ('size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
         'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
         'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'))
    c99_types = set(
        ('_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
         'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
         'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t',
         'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t',
         'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t',
         'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t',
         'uintptr_t', 'intmax_t', 'uintmax_t'))
    linux_types = set(
        ('clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
         'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
         'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
         'timer_t', 'uid_t'))

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Beispiel #24
0
class SassLexer(ExtendedRegexLexer):
    """
    For Sass stylesheets.

    .. versionadded:: 1.3
    """

    name = 'Sass'
    aliases = ['sass']
    filenames = ['*.sass']
    mimetypes = ['text/x-sass']

    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],

        'content': [
            (r'//[^\n]*', _starts_block(Comment.Single, 'single-comment'),
             'root'),
            (r'/\*[^\n]*', _starts_block(Comment.Multiline, 'multi-comment'),
             'root'),
            (r'@import', Keyword, 'import'),
            (r'@for', Keyword, 'for'),
            (r'@(debug|warn|if|while)', Keyword, 'value'),
            (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'),
            (r'(@include)( [\w-]+)', bygroups(Keyword, Name.Decorator), 'value'),
            (r'@extend', Keyword, 'selector'),
            (r'@[\w-]+', Keyword, 'selector'),
            (r'=[\w-]+', Name.Function, 'value'),
            (r'\+[\w-]+', Name.Decorator, 'value'),
            (r'([!$][\w-]\w*)([ \t]*(?:(?:\|\|)?=|:))',
             bygroups(Name.Variable, Operator), 'value'),
            (r':', Name.Attribute, 'old-style-attr'),
            (r'(?=.+?[=:]([^a-z]|$))', Name.Attribute, 'new-style-attr'),
            default('selector'),
        ],

        'single-comment': [
            (r'.+', Comment.Single),
            (r'\n', Text, 'root'),
        ],

        'multi-comment': [
            (r'.+', Comment.Multiline),
            (r'\n', Text, 'root'),
        ],

        'import': [
            (r'[ \t]+', Text),
            (r'\S+', String),
            (r'\n', Text, 'root'),
        ],

        'old-style-attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*=', Operator, 'value'),
            default('value'),
        ],

        'new-style-attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*[=:]', Operator, 'value'),
        ],

        'inline-comment': [
            (r"(\\#|#(?=[^\n{])|\*(?=[^\n/])|[^\n#*])+", Comment.Multiline),
            (r'#\{', String.Interpol, 'interpolation'),
            (r"\*/", Comment, '#pop'),
        ],
    }
    for group, common in common_sass_tokens.items():
        tokens[group] = copy.copy(common)
    tokens['value'].append((r'\n', Text, 'root'))
    tokens['selector'].append((r'\n', Text, 'root'))
Beispiel #25
0
class KconfigLexer(RegexLexer):
    """
    For Linux-style Kconfig files.

    .. versionadded:: 1.6
    """

    name = 'Kconfig'
    aliases = ['kconfig', 'menuconfig', 'linux-config', 'kernel-config']
    # Adjust this if new kconfig file names appear in your environment
    filenames = [
        'Kconfig*', '*Config.in*', 'external.in*', 'standard-modules.in'
    ]
    mimetypes = ['text/x-kconfig']
    # No re.MULTILINE, indentation-aware help text needs line-by-line handling
    flags = 0

    def call_indent(level):
        # If indentation >= {level} is detected, enter state 'indent{level}'
        return (_rx_indent(level), String.Doc, 'indent%s' % level)

    def do_indent(level):
        # Print paragraphs of indentation level >= {level} as String.Doc,
        # ignoring blank lines. Then return to 'root' state.
        return [(_rx_indent(level), String.Doc), (r'\s*\n', Text),
                default('#pop:2')]

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'#.*?\n', Comment.Single),
            (words(('mainmenu', 'config', 'menuconfig', 'choice', 'endchoice',
                    'comment', 'menu', 'endmenu', 'visible if', 'if', 'endif',
                    'source', 'prompt', 'select', 'depends on', 'default',
                    'range', 'option'),
                   suffix=r'\b'), Keyword),
            (r'(---help---|help)[\t ]*\n', Keyword, 'help'),
            (r'(bool|tristate|string|hex|int|defconfig_list|modules|env)\b',
             Name.Builtin),
            (r'[!=&|]', Operator),
            (r'[()]', Punctuation),
            (r'[0-9]+', Number.Integer),
            (r"'(''|[^'])*'", String.Single),
            (r'"(""|[^"])*"', String.Double),
            (r'\S+', Text),
        ],
        # Help text is indented, multi-line and ends when a lower indentation
        # level is detected.
        'help': [
            # Skip blank lines after help token, if any
            (r'\s*\n', Text),
            # Determine the first help line's indentation level heuristically(!).
            # Attention: this is not perfect, but works for 99% of "normal"
            # indentation schemes up to a max. indentation level of 7.
            call_indent(7),
            call_indent(6),
            call_indent(5),
            call_indent(4),
            call_indent(3),
            call_indent(2),
            call_indent(1),
            default('#pop'),  # for incomplete help sections without text
        ],
        # Handle text for indentation levels 7 to 1
        'indent7':
        do_indent(7),
        'indent6':
        do_indent(6),
        'indent5':
        do_indent(5),
        'indent4':
        do_indent(4),
        'indent3':
        do_indent(3),
        'indent2':
        do_indent(2),
        'indent1':
        do_indent(1),
    }
Beispiel #26
0
class CssLexer(RegexLexer):
    """
    For CSS (Cascading Style Sheets).
    """

    name = 'CSS'
    aliases = ['css']
    filenames = ['*.css']
    mimetypes = ['text/css']

    tokens = {
        'root': [
            include('basics'),
        ],
        'basics': [
            (r'\s+', Text),
            (r'/\*(?:.|\n)*?\*/', Comment),
            (r'\{', Punctuation, 'content'),
            (r'(\:{1,2})([\w-]+)', bygroups(Punctuation, Name.Decorator)),
            (r'(\.)([\w-]+)', bygroups(Punctuation, Name.Class)),
            (r'(\#)([\w-]+)', bygroups(Punctuation, Name.Namespace)),
            (r'(@)([\w-]+)', bygroups(Punctuation, Keyword), 'atrule'),
            (r'[\w-]+', Name.Tag),
            (r'[~^*!%&$\[\]()<>|+=@:;,./?-]', Operator),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
        ],
        'atrule': [
            (r'\{', Punctuation, 'atcontent'),
            (r';', Punctuation, '#pop'),
            include('basics'),
        ],
        'atcontent': [
            include('basics'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'content': [
            (r'\s+', Text),
            (r'\}', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'^@.*?$', Comment.Preproc),

            (words(_vendor_prefixes,), Keyword.Pseudo),
            (r'('+r'|'.join(_css_properties)+r')(\s*)(\:)',
             bygroups(Keyword, Text, Punctuation), 'value-start'),
            (r'([-]+[a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name.Variable, Text, Punctuation),
             'value-start'),
            (r'([a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name, Text, Punctuation),
             'value-start'),

            (r'/\*(?:.|\n)*?\*/', Comment),
        ],
        'value-start': [
            (r'\s+', Text),
            (words(_vendor_prefixes,), Name.Builtin.Pseudo),
            include('urls'),
            (r'('+r'|'.join(_functional_notation_keyword_values)+r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()',
             bygroups(Name.Function, Punctuation), 'function-start'),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),
            # for transition-property etc.
            (words(_css_properties, suffix=r'\b'), Keyword),
            (r'\!important', Comment.Preproc),
            (r'/\*(?:.|\n)*?\*/', Comment),

            include('numeric-values'),

            (r'[~^*!%&<>|+=@:./?-]+', Operator),
            (r'[\[\](),]+', Punctuation),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
            (r'[a-zA-Z_][\w-]*', Name),
            (r';', Punctuation, '#pop'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'function-start': [
            (r'\s+', Text),
            (r'[-]+([\w+]+[-]*)+', Name.Variable),
            include('urls'),
            (words(_vendor_prefixes,), Keyword.Pseudo),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),

            # function-start may be entered recursively
            (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()',
             bygroups(Name.Function, Punctuation), 'function-start'),

            (r'/\*(?:.|\n)*?\*/', Comment),
            include('numeric-values'),
            (r'[*+/-]', Operator),
            (r',', Punctuation),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
            (r'[a-zA-Z_-]\w*', Name),
            (r'\)', Punctuation, '#pop'),
        ],
        'urls': [
            (r'(url)(\()(".*?")(\))', bygroups(Name.Builtin, Punctuation,
                                               String.Double, Punctuation)),
            (r"(url)(\()('.*?')(\))", bygroups(Name.Builtin, Punctuation,
                                               String.Single, Punctuation)),
            (r'(url)(\()(.*?)(\))', bygroups(Name.Builtin, Punctuation,
                                             String.Other, Punctuation)),
        ],
        'numeric-values': [
            (r'\#[a-zA-Z0-9]{1,6}', Number.Hex),
            (r'[+\-]?[0-9]*[.][0-9]+', Number.Float, 'numeric-end'),
            (r'[+\-]?[0-9]+', Number.Integer, 'numeric-end'),
        ],
        'numeric-end': [
            (words(_all_units, suffix=r'\b'), Keyword.Type),
            (r'%', Keyword.Type),
            default('#pop'),
        ],
    }
Beispiel #27
0
    'string-single': [
        (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Single),
        (r'#\{', String.Interpol, 'interpolation'),
        (r"'", String.Single, '#pop'),
    ],

    'string-url': [
        (r'(\\#|#(?=[^\n{])|[^\n#)])+', String.Other),
        (r'#\{', String.Interpol, 'interpolation'),
        (r'\)', String.Other, '#pop'),
    ],

    'pseudo-class': [
        (r'[\w-]+', Name.Decorator),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'class': [
        (r'[\w-]+', Name.Class),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'id': [
        (r'[\w-]+', Name.Namespace),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'for': [
Beispiel #28
0
class LimboLexer(RegexLexer):
    """
    Lexer for `Limbo programming language <http://www.vitanuova.com/inferno/limbo.html>`_

    TODO:
        - maybe implement better var declaration highlighting
        - some simple syntax error highlighting

    .. versionadded:: 2.0
    """
    name = 'Limbo'
    aliases = ['limbo']
    filenames = ['*.b']
    mimetypes = ['text/limbo']

    tokens = {
        'whitespace': [
            (r'^(\s*)([a-zA-Z_]\w*:(\s*)\n)', bygroups(Text, Name.Label)),
            (r'\n', Text),
            (r'\s+', Text),
            (r'#(\n|(.|\n)*?[^\\]\n)', Comment.Single),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\', String),  # stray backslash
        ],
        'statements': [
            (r'"', String, 'string'),
            (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])', Number.Float),
            (r'16r[0-9a-fA-F]+', Number.Hex),
            (r'8r[0-7]+', Number.Oct),
            (r'((([1-3]\d)|([2-9]))r)?(\d+)', Number.Integer),
            (r'[()\[\],.]', Punctuation),
            (r'[~!%^&*+=|?:<>/-]|(->)|(<-)|(=>)|(::)', Operator),
            (r'(alt|break|case|continue|cyclic|do|else|exit'
             r'for|hd|if|implement|import|include|len|load|or'
             r'pick|return|spawn|tagof|tl|to|while)\b', Keyword),
            (r'(byte|int|big|real|string|array|chan|list|adt'
             r'|fn|ref|of|module|self|type)\b', Keyword.Type),
            (r'(con|iota|nil)\b', Keyword.Constant),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'root': [
            include('whitespace'),
            default('statement'),
        ],
    }

    def analyse_text(text):
        # Any limbo module implements something
        if re.search(r'^implement \w+;', text, re.MULTILINE):
            return 0.7
Beispiel #29
0
 def do_indent(level):
     # Print paragraphs of indentation level >= {level} as String.Doc,
     # ignoring blank lines. Then return to 'root' state.
     return [(_rx_indent(level), String.Doc), (r'\s*\n', Text),
             default('#pop:2')]
Beispiel #30
0
class SquidConfLexer(RegexLexer):
    """
    Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.

    .. versionadded:: 0.9
    """

    name = 'SquidConf'
    aliases = ['squidconf', 'squid.conf', 'squid']
    filenames = ['squid.conf']
    mimetypes = ['text/x-squidconf']
    flags = re.IGNORECASE

    keywords = (
        "access_log",
        "acl",
        "always_direct",
        "announce_host",
        "announce_period",
        "announce_port",
        "announce_to",
        "anonymize_headers",
        "append_domain",
        "as_whois_server",
        "auth_param_basic",
        "authenticate_children",
        "authenticate_program",
        "authenticate_ttl",
        "broken_posts",
        "buffered_logs",
        "cache_access_log",
        "cache_announce",
        "cache_dir",
        "cache_dns_program",
        "cache_effective_group",
        "cache_effective_user",
        "cache_host",
        "cache_host_acl",
        "cache_host_domain",
        "cache_log",
        "cache_mem",
        "cache_mem_high",
        "cache_mem_low",
        "cache_mgr",
        "cachemgr_passwd",
        "cache_peer",
        "cache_peer_access",
        "cahce_replacement_policy",
        "cache_stoplist",
        "cache_stoplist_pattern",
        "cache_store_log",
        "cache_swap",
        "cache_swap_high",
        "cache_swap_log",
        "cache_swap_low",
        "client_db",
        "client_lifetime",
        "client_netmask",
        "connect_timeout",
        "coredump_dir",
        "dead_peer_timeout",
        "debug_options",
        "delay_access",
        "delay_class",
        "delay_initial_bucket_level",
        "delay_parameters",
        "delay_pools",
        "deny_info",
        "dns_children",
        "dns_defnames",
        "dns_nameservers",
        "dns_testnames",
        "emulate_httpd_log",
        "err_html_text",
        "fake_user_agent",
        "firewall_ip",
        "forwarded_for",
        "forward_snmpd_port",
        "fqdncache_size",
        "ftpget_options",
        "ftpget_program",
        "ftp_list_width",
        "ftp_passive",
        "ftp_user",
        "half_closed_clients",
        "header_access",
        "header_replace",
        "hierarchy_stoplist",
        "high_response_time_warning",
        "high_page_fault_warning",
        "hosts_file",
        "htcp_port",
        "http_access",
        "http_anonymizer",
        "httpd_accel",
        "httpd_accel_host",
        "httpd_accel_port",
        "httpd_accel_uses_host_header",
        "httpd_accel_with_proxy",
        "http_port",
        "http_reply_access",
        "icp_access",
        "icp_hit_stale",
        "icp_port",
        "icp_query_timeout",
        "ident_lookup",
        "ident_lookup_access",
        "ident_timeout",
        "incoming_http_average",
        "incoming_icp_average",
        "inside_firewall",
        "ipcache_high",
        "ipcache_low",
        "ipcache_size",
        "local_domain",
        "local_ip",
        "logfile_rotate",
        "log_fqdn",
        "log_icp_queries",
        "log_mime_hdrs",
        "maximum_object_size",
        "maximum_single_addr_tries",
        "mcast_groups",
        "mcast_icp_query_timeout",
        "mcast_miss_addr",
        "mcast_miss_encode_key",
        "mcast_miss_port",
        "memory_pools",
        "memory_pools_limit",
        "memory_replacement_policy",
        "mime_table",
        "min_http_poll_cnt",
        "min_icp_poll_cnt",
        "minimum_direct_hops",
        "minimum_object_size",
        "minimum_retry_timeout",
        "miss_access",
        "negative_dns_ttl",
        "negative_ttl",
        "neighbor_timeout",
        "neighbor_type_domain",
        "netdb_high",
        "netdb_low",
        "netdb_ping_period",
        "netdb_ping_rate",
        "never_direct",
        "no_cache",
        "passthrough_proxy",
        "pconn_timeout",
        "pid_filename",
        "pinger_program",
        "positive_dns_ttl",
        "prefer_direct",
        "proxy_auth",
        "proxy_auth_realm",
        "query_icmp",
        "quick_abort",
        "quick_abort_max",
        "quick_abort_min",
        "quick_abort_pct",
        "range_offset_limit",
        "read_timeout",
        "redirect_children",
        "redirect_program",
        "redirect_rewrites_host_header",
        "reference_age",
        "refresh_pattern",
        "reload_into_ims",
        "request_body_max_size",
        "request_size",
        "request_timeout",
        "shutdown_lifetime",
        "single_parent_bypass",
        "siteselect_timeout",
        "snmp_access",
        "snmp_incoming_address",
        "snmp_port",
        "source_ping",
        "ssl_proxy",
        "store_avg_object_size",
        "store_objects_per_bucket",
        "strip_query_terms",
        "swap_level1_dirs",
        "swap_level2_dirs",
        "tcp_incoming_address",
        "tcp_outgoing_address",
        "tcp_recv_bufsize",
        "test_reachability",
        "udp_hit_obj",
        "udp_hit_obj_size",
        "udp_incoming_address",
        "udp_outgoing_address",
        "unique_hostname",
        "unlinkd_program",
        "uri_whitespace",
        "useragent_log",
        "visible_hostname",
        "wais_relay",
        "wais_relay_host",
        "wais_relay_port",
    )

    opts = (
        "proxy-only",
        "weight",
        "ttl",
        "no-query",
        "default",
        "round-robin",
        "multicast-responder",
        "on",
        "off",
        "all",
        "deny",
        "allow",
        "via",
        "parent",
        "no-digest",
        "heap",
        "lru",
        "realm",
        "children",
        "q1",
        "q2",
        "credentialsttl",
        "none",
        "disable",
        "offline_toggle",
        "diskd",
    )

    actions = (
        "shutdown",
        "info",
        "parameter",
        "server_list",
        "client_list",
        r'squid.conf',
    )

    actions_stats = (
        "objects",
        "vm_objects",
        "utilization",
        "ipcache",
        "fqdncache",
        "dns",
        "redirector",
        "io",
        "reply_headers",
        "filedescriptors",
        "netdb",
    )

    actions_log = ("status", "enable", "disable", "clear")

    acls = (
        "url_regex",
        "urlpath_regex",
        "referer_regex",
        "port",
        "proto",
        "req_mime_type",
        "rep_mime_type",
        "method",
        "browser",
        "user",
        "src",
        "dst",
        "time",
        "dstdomain",
        "ident",
        "snmp_community",
    )

    ip_re = (
        r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|'
        r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|'
        r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|'
        r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}'
        r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|'
        r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|'
        r'[1-9]?\d)){3}))')

    tokens = {
        'root': [
            (r'\s+', Whitespace),
            (r'#', Comment, 'comment'),
            (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(opts, prefix=r'\b', suffix=r'\b'), Name.Constant),
            # Actions
            (words(actions, prefix=r'\b', suffix=r'\b'), String),
            (words(actions_stats, prefix=r'stats/', suffix=r'\b'), String),
            (words(actions_log, prefix=r'log/', suffix=r'='), String),
            (words(acls, prefix=r'\b', suffix=r'\b'), Keyword),
            (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float),
            (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number),
            (r'\S+', Text),
        ],
        'comment': [
            (r'\s*TAG:.*', String.Escape, '#pop'),
            (r'.+', Comment, '#pop'),
            default('#pop'),
        ],
    }