Example #1
0
class ProtoBufLexer(RegexLexer):
    """
    Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_
    definition files.

    .. versionadded:: 1.4
    """

    name = 'Protocol Buffer'
    aliases = ['protobuf', 'proto']
    filenames = ['*.proto']

    tokens = {
        'root': [
            (r'[ \t]+', Text),
            (r'[,;{}\[\]()<>]', Punctuation),
            (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
            (words(('import', 'option', 'optional', 'required', 'repeated',
                    'reserved', 'default', 'packed', 'ctype', 'extensions',
                    'to', 'max', 'rpc', 'returns', 'oneof'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (words(('int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
                    'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'float',
                    'double', 'bool', 'string', 'bytes'),
                   suffix=r'\b'), Keyword.Type),
            (r'(true|false)\b', Keyword.Constant),
            (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'),
            (r'(message|extend)(\s+)', bygroups(Keyword.Declaration,
                                                Text), 'message'),
            (r'(enum|group|service)(\s+)', bygroups(Keyword.Declaration,
                                                    Text), 'type'),
            (r'\".*?\"', String),
            (r'\'.*?\'', String),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'(\-?(inf|nan))\b', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'[+-=]', Operator),
            (r'([a-zA-Z_][\w.]*)([ \t]*)(=)',
             bygroups(Name.Attribute, Text, Operator)),
            (r'[a-zA-Z_][\w.]*', Name),
        ],
        'package': [
            (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'message': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'type': [
            (r'[a-zA-Z_]\w*', Name, '#pop'),
            default('#pop'),
        ],
    }
Example #2
0
class ScssLexer(RegexLexer):
    """
    For SCSS stylesheets.
    """

    name = 'SCSS'
    aliases = ['scss']
    filenames = ['*.scss']
    mimetypes = ['text/x-scss']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'@import', Keyword, 'value'),
            (r'@for', Keyword, 'for'),
            (r'@(debug|warn|if|while)', Keyword, 'value'),
            (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'),
            (r'(@include)( [\w-]+)', bygroups(Keyword,
                                              Name.Decorator), 'value'),
            (r'@extend', Keyword, 'selector'),
            (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'),
            (r'@[\w-]+', Keyword, 'selector'),
            (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable,
                                                Operator), 'value'),
            # TODO: broken, and prone to infinite loops.
            # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'),
            # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'),
            default('selector'),
        ],
        'attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*:', Operator, 'value'),
            default('#pop'),
        ],
        'inline-comment': [
            (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline),
            (r'#\{', String.Interpol, 'interpolation'),
            (r"\*/", Comment, '#pop'),
        ],
    }
    for group, common in iteritems(common_sass_tokens):
        tokens[group] = copy.copy(common)
    tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
    tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
Example #3
0
class ZephirLexer(RegexLexer):
    """
    For `Zephir language <http://zephir-lang.com/>`_ source code.

    Zephir is a compiled high level language aimed
    to the creation of C-extensions for PHP.

    .. versionadded:: 2.0
    """

    name = 'Zephir'
    aliases = ['zephir']
    filenames = ['*.zep']

    zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
    zephir_type = ['bit', 'bits', 'string']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
             r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
             r'empty)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
             r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
             r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
             r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
             r'window)\b', Name.Builtin),
            (r'[$a-zA-Z_][\w\\]*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Example #4
0
class TexLexer(RegexLexer):
    """
    Lexer for the TeX and LaTeX typesetting languages.
    """

    name = 'TeX'
    aliases = ['tex', 'latex']
    filenames = ['*.tex', '*.aux', '*.toc']
    mimetypes = ['text/x-tex', 'text/x-latex']

    tokens = {
        'general': [
            (r'%.*?\n', Comment),
            (r'[{}]', Name.Builtin),
            (r'[&_^]', Name.Builtin),
        ],
        'root': [
            (r'\\\[', String.Backtick, 'displaymath'),
            (r'\\\(', String, 'inlinemath'),
            (r'\$\$', String.Backtick, 'displaymath'),
            (r'\$', String, 'inlinemath'),
            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
            (r'\\$', Keyword),
            include('general'),
            (r'[^\\$%&_^{}]+', Text),
        ],
        'math': [
            (r'\\([a-zA-Z]+|.)', Name.Variable),
            include('general'),
            (r'[0-9]+', Number),
            (r'[-=!+*/()\[\]]', Operator),
            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
        ],
        'inlinemath': [
            (r'\\\)', String, '#pop'),
            (r'\$', String, '#pop'),
            include('math'),
        ],
        'displaymath': [
            (r'\\\]', String, '#pop'),
            (r'\$\$', String, '#pop'),
            (r'\$', Name.Builtin),
            include('math'),
        ],
        'command': [
            (r'\[.*?\]', Name.Attribute),
            (r'\*', Keyword),
            default('#pop'),
        ],
    }

    def analyse_text(text):
        for start in ("\\documentclass", "\\input", "\\documentstyle",
                      "\\relax"):
            if text[:len(start)] == start:
                return True
Example #5
0
class DebianControlLexer(RegexLexer):
    """
    Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.

    .. versionadded:: 0.9
    """
    name = 'Debian Control file'
    aliases = ['control', 'debcontrol']
    filenames = ['control']

    tokens = {
        'root': [
            (r'^(Description)', Keyword, 'description'),
            (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
            (r'^((Build-)?Depends)', Keyword, 'depends'),
            (r'^((?:Python-)?Version)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^((?:Installed-)?Size)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
             bygroups(Keyword, Whitespace, String)),
        ],
        'maintainer': [
            (r'<[^>]+>', Generic.Strong),
            (r'<[^>]+>$', Generic.Strong, '#pop'),
            (r',\n?', Text),
            (r'.', Text),
        ],
        'description': [
            (r'(.*)(Homepage)(: )(\S+)',
             bygroups(Text, String, Name, Name.Class)),
            (r':.*\n', Generic.Strong),
            (r' .*\n', Text),
            default('#pop'),
        ],
        'depends': [
            (r':\s*', Text),
            (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text,
                                                  Name.Entity)),
            (r'\(', Text, 'depend_vers'),
            (r',', Text),
            (r'\|', Operator),
            (r'[\s]+', Text),
            (r'[})]\s*$', Text, '#pop'),
            (r'\}', Text),
            (r'[^,]$', Name.Function, '#pop'),
            (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)),
            (r'\[.*?\]', Name.Entity),
        ],
        'depend_vers':
        [(r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'),
         (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number))]
    }
Example #6
0
class GroffLexer(RegexLexer):
    """
    Lexer for the (g)roff typesetting language, supporting groff
    extensions. Mainly useful for highlighting manpage sources.

    .. versionadded:: 0.6
    """

    name = 'Groff'
    aliases = ['groff', 'nroff', 'man']
    filenames = ['*.[1234567]', '*.man']
    mimetypes = ['application/x-troff', 'text/troff']

    tokens = {
        'root': [
            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
            (r'\.', Punctuation, 'request'),
            # Regular characters, slurp till we find a backslash or newline
            (r'[^\\\n]+', Text, 'textline'),
            default('textline'),
        ],
        'textline': [
            include('escapes'),
            (r'[^\\\n]+', Text),
            (r'\n', Text, '#pop'),
        ],
        'escapes': [
            # groff has many ways to write escapes.
            (r'\\"[^\n]*', Comment),
            (r'\\[fn]\w', String.Escape),
            (r'\\\(.{2}', String.Escape),
            (r'\\.\[.*\]', String.Escape),
            (r'\\.', String.Escape),
            (r'\\\n', Text, 'request'),
        ],
        'request': [
            (r'\n', Text, '#pop'),
            include('escapes'),
            (r'"[^\n"]+"', String.Double),
            (r'\d+', Number),
            (r'\S+', String),
            (r'\s+', Text),
        ],
    }

    def analyse_text(text):
        if text[:1] != '.':
            return False
        if text[:3] == '.\\"':
            return True
        if text[:4] == '.TH ':
            return True
        if text[1:3].isalnum() and text[3].isspace():
            return 0.9
Example #7
0
class AwkLexer(RegexLexer):
    """
    For Awk scripts.

    .. versionadded:: 1.5
    """

    name = 'Awk'
    aliases = ['awk', 'gawk', 'mawk', 'nawk']
    filenames = ['*.awk']
    mimetypes = ['application/x-awk']

    tokens = {
        'commentsandwhitespace': [
            (r'\s+', Text),
            (r'#.*$', Comment.Single)
        ],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'\B', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [
            (r'\n', Text, '#pop')
        ],
        'root': [
            (r'^(?=\s|/)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
             r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(break|continue|do|while|exit|for|if|else|'
             r'return)\b', Keyword, 'slashstartsregex'),
            (r'function\b', Keyword.Declaration, 'slashstartsregex'),
            (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
             r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
             r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
             r'delete|system)\b', Keyword.Reserved),
            (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
             r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
             r'RSTART|RT|SUBSEP)\b', Name.Builtin),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Example #8
0
class CapnProtoLexer(RegexLexer):
    """
    For `Cap'n Proto <https://capnproto.org>`_ source.

    .. versionadded:: 2.2
    """
    name = 'Cap\'n Proto'
    filenames = ['*.capnp']
    aliases = ['capnp']

    flags = re.MULTILINE | re.UNICODE

    tokens = {
        'root': [
            (r'#.*?$', Comment.Single),
            (r'@[0-9a-zA-Z]*', Name.Decorator),
            (r'=', Literal, 'expression'),
            (r':', Name.Class, 'type'),
            (r'\$', Name.Attribute, 'annotation'),
            (r'(struct|enum|interface|union|import|using|const|annotation|'
             r'extends|in|of|on|as|with|from|fixed)\b', Keyword),
            (r'[\w.]+', Name),
            (r'[^#@=:$\w]+', Text),
        ],
        'type': [
            (r'[^][=;,(){}$]+', Name.Class),
            (r'[\[(]', Name.Class, 'parentype'),
            default('#pop'),
        ],
        'parentype': [
            (r'[^][;()]+', Name.Class),
            (r'[\[(]', Name.Class, '#push'),
            (r'[])]', Name.Class, '#pop'),
            default('#pop'),
        ],
        'expression': [
            (r'[^][;,(){}$]+', Literal),
            (r'[\[(]', Literal, 'parenexp'),
            default('#pop'),
        ],
        'parenexp': [
            (r'[^][;()]+', Literal),
            (r'[\[(]', Literal, '#push'),
            (r'[])]', Literal, '#pop'),
            default('#pop'),
        ],
        'annotation': [
            (r'[^][;,(){}=:]+', Name.Attribute),
            (r'[\[(]', Name.Attribute, 'annexp'),
            default('#pop'),
        ],
        'annexp': [
            (r'[^][;()]+', Name.Attribute),
            (r'[\[(]', Name.Attribute, '#push'),
            (r'[])]', Name.Attribute, '#pop'),
            default('#pop'),
        ],
    }
Example #9
0
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
Example #10
0
class CirruLexer(RegexLexer):
    r"""
    Syntax rules of Cirru can be found at:
    http://cirru.org/

    * using ``()`` for expressions, but restricted in a same line
    * using ``""`` for strings, with ``\`` for escaping chars
    * using ``$`` as folding operator
    * using ``,`` as unfolding operator
    * using indentations for nested blocks

    .. versionadded:: 2.0
    """

    name = 'Cirru'
    aliases = ['cirru']
    filenames = ['*.cirru']
    mimetypes = ['text/x-cirru']
    flags = re.MULTILINE

    tokens = {
        'string': [
            (r'[^"\\\n]', String),
            (r'\\', String.Escape, 'escape'),
            (r'"', String, '#pop'),
        ],
        'escape': [
            (r'.', String.Escape, '#pop'),
        ],
        'function': [
            (r'\,', Operator, '#pop'),
            (r'[^\s"()]+', Name.Function, '#pop'),
            (r'\)', Operator, '#pop'),
            (r'(?=\n)', Text, '#pop'),
            (r'\(', Operator, '#push'),
            (r'"', String, ('#pop', 'string')),
            (r'[ ]+', Text.Whitespace),
        ],
        'line': [(r'(?<!\w)\$(?!\w)', Operator, 'function'),
                 (r'\(', Operator, 'function'), (r'\)', Operator),
                 (r'\n', Text, '#pop'), (r'"', String, 'string'),
                 (r'[ ]+', Text.Whitespace), (r'[+-]?[\d.]+\b', Number),
                 (r'[^\s"()]+', Name.Variable)],
        'root': [
            (r'^\n+', Text.Whitespace),
            default(('line', 'function')),
        ]
    }
Example #11
0
class BSTLexer(RegexLexer):
    """
    A lexer for BibTeX bibliography styles.

    .. versionadded:: 2.2
    """

    name = 'BST'
    aliases = ['bst', 'bst-pybtex']
    filenames = ['*.bst']
    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            include('whitespace'),
            (words(['read', 'sort']), Keyword),
            (words(['execute', 'integers', 'iterate', 'reverse',
                    'strings']), Keyword, ('group')),
            (words(['function', 'macro']), Keyword, ('group', 'group')),
            (words(['entry']), Keyword, ('group', 'group', 'group')),
        ],
        'group': [
            include('whitespace'),
            (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
        ],
        'group-end': [
            include('whitespace'),
            (r'\}', Punctuation, '#pop'),
        ],
        'body': [
            include('whitespace'),
            (r"\'[^#\"\{\}\s]+", Name.Function),
            (r'[^#\"\{\}\s]+\$', Name.Builtin),
            (r'[^#\"\{\}\s]+', Name.Variable),
            (r'"[^\"]*"', String),
            (r'#-?\d+', Number),
            (r'\{', Punctuation, ('group-end', 'body')),
            default('#pop'),
        ],
        'whitespace': [
            (r'\s+', Text),
            ('%.*?$', Comment.SingleLine),
        ],
    }
Example #12
0
class FortranFixedLexer(RegexLexer):
    """
    Lexer for fixed format Fortran.

    .. versionadded:: 2.1
    """
    name = 'FortranFixed'
    aliases = ['fortranfixed']
    filenames = ['*.f', '*.F']

    flags = re.IGNORECASE

    def _lex_fortran(self, match, ctx=None):
        """Lex a line just as free form fortran without line break."""
        lexer = FortranLexer()
        text = match.group(0) + "\n"
        for index, token, value in lexer.get_tokens_unprocessed(text):
            value = value.replace('\n', '')
            if value != '':
                yield index, token, value

    tokens = {
        'root': [
            (r'[C*].*\n', Comment),
            (r'#.*\n', Comment.Preproc),
            (r' {0,4}!.*\n', Comment),
            (r'(.{5})', Name.Label, 'cont-char'),
            (r'.*\n', using(FortranLexer)),
        ],
        'cont-char': [
            (' ', Text, 'code'),
            ('0', Comment, 'code'),
            ('.', Generic.Strong, 'code'),
        ],
        'code': [
            (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment,
                                          Text), 'root'),
            (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'),
            default('root'),
        ]
    }
Example #13
0
class FantomLexer(RegexLexer):
    """
    For Fantom source code.

    .. versionadded:: 1.5
    """
    name = 'Fantom'
    aliases = ['fan']
    filenames = ['*.fan']
    mimetypes = ['application/x-fantom']

    # often used regexes
    def s(str):
        return Template(str).substitute(
            dict(
                pod=r'[\"\w\.]+',
                eos=r'\n|;',
                id=r'[a-zA-Z_]\w*',
                # all chars which can be part of type definition. Starts with
                # either letter, or [ (maps), or | (funcs)
                type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?',
            ))

    tokens = {
        'comments': [
            (r'(?s)/\*.*?\*/', Comment.Multiline),  # Multiline
            (r'//.*?\n', Comment.Single),  # Single line
            # TODO: highlight references in fandocs
            (r'\*\*.*?\n', Comment.Special),  # Fandoc
            (r'#.*\n', Comment.Single)  # Shell-style
        ],
        'literals': [
            (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number),  # Duration
            (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)',
             Number),  # Duration with dot
            (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float),  # Float/Decimal
            (r'\b-?0x[0-9a-fA-F_]+', Number.Hex),  # Hex
            (r'\b-?[\d_]+', Number.Integer),  # Int
            (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),  # Char
            (r'"', Punctuation, 'insideStr'),  # Opening quote
            (r'`', Punctuation, 'insideUri'),  # Opening accent
            (r'\b(true|false|null)\b', Keyword.Constant),  # Bool & null
            (
                r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)',  # DSL
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         String, Punctuation)),
            (
                r'(?:(\w+)(::))?(\w+)?(#)(\w+)?',  # Type/slot literal
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         Name.Function)),
            (r'\[,\]', Literal),  # Empty list
            (
                s(r'($type)(\[,\])'),  # Typed empty list
                bygroups(using(this, state='inType'), Literal)),
            (r'\[:\]', Literal),  # Empty Map
            (s(r'($type)(\[:\])'),
             bygroups(using(this, state='inType'), Literal)),
        ],
        'insideStr': [
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'"', Punctuation, '#pop'),  # Closing quot
            (r'.', String)  # String content
        ],
        'insideUri': [  # TODO: remove copy/paste str/uri
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'`', Punctuation, '#pop'),  # Closing tick
            (r'.', String.Backtick)  # URI content
        ],
        'protectionKeywords': [
            (r'\b(public|protected|private|internal)\b', Keyword),
        ],
        'typeKeywords': [
            (r'\b(abstract|final|const|native|facet|enum)\b', Keyword),
        ],
        'methodKeywords': [
            (r'\b(abstract|native|once|override|static|virtual|final)\b',
             Keyword),
        ],
        'fieldKeywords':
        [(r'\b(abstract|const|final|native|override|static|virtual|'
          r'readonly)\b', Keyword)],
        'otherKeywords': [
            (words(('try', 'catch', 'throw', 'finally', 'for', 'if', 'else',
                    'while', 'as', 'is', 'isnot', 'switch', 'case', 'default',
                    'continue', 'break', 'do', 'return', 'get', 'set'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (r'\b(it|this|super)\b', Name.Builtin.Pseudo),
        ],
        'operators':
        [(r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator)],
        'inType': [
            (r'[\[\]|\->:?]', Punctuation),
            (s(r'$id'), Name.Class),
            default('#pop'),
        ],
        'root': [
            include('comments'),
            include('protectionKeywords'),
            include('typeKeywords'),
            include('methodKeywords'),
            include('fieldKeywords'),
            include('literals'),
            include('otherKeywords'),
            include('operators'),
            (r'using\b', Keyword.Namespace, 'using'),  # Using stmt
            (r'@\w+', Name.Decorator, 'facet'),  # Symbol
            (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class),
             'inheritance'),  # Inheritance list

            # Type var := val
            (s(r'($type)([ \t]+)($id)(\s*)(:=)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Operator)),

            # var := val
            (s(r'($id)(\s*)(:=)'), bygroups(Name.Variable, Text, Operator)),

            # .someId( or ->someId( ###
            (s(r'(\.|(?:\->))($id)(\s*)(\()'),
             bygroups(Operator, Name.Function, Text,
                      Punctuation), 'insideParen'),

            # .someId  or ->someId
            (s(r'(\.|(?:\->))($id)'), bygroups(Operator, Name.Function)),

            # new makeXXX (
            (r'(new)(\s+)(make\w*)(\s*)(\()',
             bygroups(Keyword, Text, Name.Function, Text,
                      Punctuation), 'insideMethodDeclArgs'),

            # Type name (
            (
                s(r'($type)([ \t]+)'  # Return type and whitespace
                  r'($id)(\s*)(\()'),  # method name + open brace
                bygroups(using(this, state='inType'), Text, Name.Function,
                         Text, Punctuation),
                'insideMethodDeclArgs'),

            # ArgType argName,
            (s(r'($type)(\s+)($id)(\s*)(,)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # ArgType argName)
            # Covered in 'insideParen' state

            # ArgType argName -> ArgType|
            (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'),
             bygroups(using(this, state='inType'), Text,
                      Name.Variable, Text, Punctuation, Text,
                      using(this, state='inType'), Punctuation)),

            # ArgType argName|
            (s(r'($type)(\s+)($id)(\s*)(\|)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # Type var
            (s(r'($type)([ \t]+)($id)'),
             bygroups(using(this, state='inType'), Text, Name.Variable)),
            (r'\(', Punctuation, 'insideParen'),
            (r'\{', Punctuation, 'insideBrace'),
            (r'.', Text)
        ],
        'insideParen': [
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
        'insideMethodDeclArgs': [
            (r'\)', Punctuation, '#pop'),
            (s(r'($type)(\s+)($id)(\s*)(\))'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation), '#pop'),
            include('root'),
        ],
        'insideBrace': [
            (r'\}', Punctuation, '#pop'),
            include('root'),
        ],
        'inheritance': [
            (r'\s+', Text),  # Whitespace
            (r':|,', Punctuation),
            (r'(?:(\w+)(::))?(\w+)',
             bygroups(Name.Namespace, Punctuation, Name.Class)),
            (r'\{', Punctuation, '#pop')
        ],
        'using': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(\[)(\w+)(\])',
             bygroups(Punctuation, Comment.Special, Punctuation)),  # ffi
            (r'(\")?([\w.]+)(\")?',
             bygroups(Punctuation, Name.Namespace, Punctuation)),  # podname
            (r'::', Punctuation, 'usingClass'),
            default('#pop')
        ],
        'usingClass': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text,
                                         Name.Class), '#pop:2'),
            (r'[\w$]+', Name.Class),
            default('#pop:2')  # jump out to root state
        ],
        'facet': [(r'\s+', Text), (r'\{', Punctuation, 'facetFields'),
                  default('#pop')],
        'facetFields': [
            include('comments'),
            include('literals'),
            include('operators'), (r'\s+', Text),
            (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)),
            (r'\}', Punctuation, '#pop'), (r'.', Text)
        ],
    }
Example #14
0
class ValaLexer(RegexLexer):
    """
    For Vala source code with preprocessor directives.

    .. versionadded:: 1.1
    """
    name = 'Vala'
    aliases = ['vala', 'vapi']
    filenames = ['*.vala', '*.vapi']
    mimetypes = ['text/x-vala']

    tokens = {
        'whitespace': [
            (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
        ],
        'statements': [
            (r'[L@]?"', String, 'string'),
            (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'(?s)""".*?"""', String),  # verbatim strings
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])',
             bygroups(Punctuation, Name.Decorator, Punctuation)),
            # TODO: "correctly" parse complex code attributes
            (r'(\[)(CCode|(?:Integer|Floating)Type)',
             bygroups(Punctuation, Name.Decorator)),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('as', 'base', 'break', 'case', 'catch', 'construct',
                 'continue', 'default', 'delete', 'do', 'else', 'enum',
                 'finally', 'for', 'foreach', 'get', 'if', 'in', 'is', 'lock',
                 'new', 'out', 'params', 'return', 'set', 'sizeof', 'switch',
                 'this', 'throw', 'try', 'typeof', 'while', 'yield'),
                suffix=r'\b'), Keyword),
            (words(('abstract', 'const', 'delegate', 'dynamic', 'ensures',
                    'extern', 'inline', 'internal', 'override', 'owned',
                    'private', 'protected', 'public', 'ref', 'requires',
                    'signal', 'static', 'throws', 'unowned', 'var', 'virtual',
                    'volatile', 'weak', 'yields'),
                   suffix=r'\b'), Keyword.Declaration),
            (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace,
                                                 Text), 'namespace'),
            (r'(class|errordomain|interface|struct)(\s+)',
             bygroups(Keyword.Declaration, Text), 'class'),
            (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)),
            # void is an actual keyword, others are in glib-2.0.vapi
            (words(
                ('void', 'bool', 'char', 'double', 'float', 'int', 'int8',
                 'int16', 'int32', 'int64', 'long', 'short', 'size_t',
                 'ssize_t', 'string', 'time_t', 'uchar', 'uint', 'uint8',
                 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'),
                suffix=r'\b'), Keyword.Type),
            (r'(true|false|null)\b', Name.Builtin),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ],
        'class': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')],
        'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')],
    }
Example #15
0
class FSharpLexer(RegexLexer):
    """
    For the `F# language <https://fsharp.org/>`_ (version 3.0).

    .. versionadded:: 1.5
    """

    name = 'F#'
    aliases = ['fsharp', 'f#']
    filenames = ['*.fs', '*.fsi']
    mimetypes = ['text/x-fsharp']

    keywords = [
        'abstract',
        'as',
        'assert',
        'base',
        'begin',
        'class',
        'default',
        'delegate',
        'do!',
        'do',
        'done',
        'downcast',
        'downto',
        'elif',
        'else',
        'end',
        'exception',
        'extern',
        'false',
        'finally',
        'for',
        'function',
        'fun',
        'global',
        'if',
        'inherit',
        'inline',
        'interface',
        'internal',
        'in',
        'lazy',
        'let!',
        'let',
        'match',
        'member',
        'module',
        'mutable',
        'namespace',
        'new',
        'null',
        'of',
        'open',
        'override',
        'private',
        'public',
        'rec',
        'return!',
        'return',
        'select',
        'static',
        'struct',
        'then',
        'to',
        'true',
        'try',
        'type',
        'upcast',
        'use!',
        'use',
        'val',
        'void',
        'when',
        'while',
        'with',
        'yield!',
        'yield',
    ]
    # Reserved words; cannot hurt to color them as keywords too.
    keywords += [
        'atomic',
        'break',
        'checked',
        'component',
        'const',
        'constraint',
        'constructor',
        'continue',
        'eager',
        'event',
        'external',
        'fixed',
        'functor',
        'include',
        'method',
        'mixin',
        'object',
        'parallel',
        'process',
        'protected',
        'pure',
        'sealed',
        'tailcall',
        'trait',
        'virtual',
        'volatile',
    ]
    keyopts = [
        '!=',
        '#',
        '&&',
        '&',
        r'\(',
        r'\)',
        r'\*',
        r'\+',
        ',',
        r'-\.',
        '->',
        '-',
        r'\.\.',
        r'\.',
        '::',
        ':=',
        ':>',
        ':',
        ';;',
        ';',
        '<-',
        r'<\]',
        '<',
        r'>\]',
        '>',
        r'\?\?',
        r'\?',
        r'\[<',
        r'\[\|',
        r'\[',
        r'\]',
        '_',
        '`',
        r'\{',
        r'\|\]',
        r'\|',
        r'\}',
        '~',
        '<@@',
        '<@',
        '=',
        '@>',
        '@@>',
    ]

    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ['and', 'or', 'not']
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = [
        'sbyte',
        'byte',
        'char',
        'nativeint',
        'unativeint',
        'float32',
        'single',
        'float',
        'double',
        'int8',
        'uint8',
        'int16',
        'uint16',
        'int32',
        'uint32',
        'int64',
        'uint64',
        'decimal',
        'unit',
        'bool',
        'string',
        'list',
        'exn',
        'obj',
        'enum',
    ]

    # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or
    # http://fsharp.org/about/files/spec.pdf for reference.  Good luck.

    tokens = {
        'escape-sequence': [
            (r'\\[\\"\'ntbrafv]', String.Escape),
            (r'\\[0-9]{3}', String.Escape),
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\U[0-9a-fA-F]{8}', String.Escape),
        ],
        'root': [
            (r'\s+', Text),
            (r'\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name),
            (r'///.*?\n', String.Doc),
            (r'//.*?\n', Comment.Single),
            (r'\(\*(?!\))', Comment, 'comment'),
            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),
            (r'\b(open|module)(\s+)([\w.]+)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'\b(let!?)(\s+)(\w+)', bygroups(Keyword, Text, Name.Variable)),
            (r'\b(type)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class)),
            (r'\b(member|override)(\s+)(\w+)(\.)(\w+)',
             bygroups(Keyword, Text, Name, Punctuation, Name.Function)),
            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
            (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name),
            (r'(%s)' % '|'.join(keyopts), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
            (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n',
             Comment.Preproc),
            (r"[^\W\d][\w']*", Name),
            (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex),
            (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct),
            (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?', Number.Float),
            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element
            (r'@?"', String.Double, 'string'),
            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'dotted': [
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
            (r'[A-Z][\w\']*', Name, '#pop'),
            (r'[a-z_][\w\']*', Name, '#pop'),
            # e.g. dictionary index access
            default('#pop'),
        ],
        'comment': [
            (r'[^(*)@"]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            # comments cannot be closed within strings in comments
            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),
            (r'[(*)@]', Comment),
        ],
        'string': [
            (r'[^\\"]+', String),
            include('escape-sequence'),
            (r'\\\n', String),
            (r'\n', String),  # newlines are allowed in any string
            (r'"B?', String, '#pop'),
        ],
        'lstring': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'""', String),
            (r'"B?', String, '#pop'),
        ],
        'tqs': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'"""B?', String, '#pop'),
            (r'"', String),
        ],
    }
Example #16
0
class VbNetLexer(RegexLexer):
    """
    For
    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
    source code.
    """

    name = 'VB.net'
    aliases = ['vb.net', 'vbnet']
    filenames = ['*.vb', '*.bas']
    mimetypes = ['text/x-vbnet', 'text/x-vba']  # (?)

    uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \
               '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                 'Cf', 'Mn', 'Mc') + ']*'

    flags = re.MULTILINE | re.IGNORECASE
    tokens = {
        'root': [
            (r'^\s*<.*?>', Name.Attribute),
            (r'\s+', Text),
            (r'\n', Text),
            (r'rem\b.*?\n', Comment),
            (r"'.*?\n", Comment),
            (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#Else|#End\s+If|#Const|'
             r'#ExternalSource.*?\n|#End\s+ExternalSource|'
             r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc),
            (r'[(){}!#,.:]', Punctuation),
            (r'Option\s+(Strict|Explicit|Compare)\s+'
             r'(On|Off|Binary|Text)', Keyword.Declaration),
            (words(
                ('AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case',
                 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl',
                 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort',
                 'CSng', 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort',
                 'Declare', 'Default', 'Delegate', 'DirectCast', 'Do', 'Each',
                 'Else', 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit',
                 'False', 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub',
                 'GoTo', 'Handles', 'If', 'Implements', 'Inherits',
                 'Interface', 'Let', 'Lib', 'Loop', 'Me', 'MustInherit',
                 'MustOverride', 'MyBase', 'MyClass', 'Narrowing', 'New',
                 'Next', 'Not', 'Nothing', 'NotInheritable', 'NotOverridable',
                 'Of', 'On', 'Operator', 'Option', 'Optional', 'Overloads',
                 'Overridable', 'Overrides', 'ParamArray', 'Partial',
                 'Private', 'Protected', 'Public', 'RaiseEvent', 'ReadOnly',
                 'ReDim', 'RemoveHandler', 'Resume', 'Return', 'Select', 'Set',
                 'Shadows', 'Shared', 'Single', 'Static', 'Step', 'Stop',
                 'SyncLock', 'Then', 'Throw', 'To', 'True', 'Try', 'TryCast',
                 'Wend', 'Using', 'When', 'While', 'Widening', 'With',
                 'WithEvents', 'WriteOnly'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Keyword),
            (r'(?<!\.)End\b', Keyword, 'end'),
            (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'),
            (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text),
             'funcname'),
            (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text),
             'classname'),
            (r'(?<!\.)(Module|Namespace|Imports)(\s+)',
             bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|'
             r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|'
             r'UShort)\b', Keyword.Type),
            (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|'
             r'Or|OrElse|TypeOf|Xor)\b', Operator.Word),
            (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|'
             r'<=|>=|<>|[-&*/\\^+=<>\[\]]', Operator),
            ('"', String, 'string'),
            (r'_\n', Text),  # Line continuation  (must be before Name)
            (uni_name + '[%&@!#$]?', Name),
            ('#.*?#', Literal.Date),
            (r'(\d+\.\d*|\d*\.\d+)(F[+-]?[0-9]+)?', Number.Float),
            (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer),
        ],
        'string': [
            (r'""', String),
            (r'"C?', String, '#pop'),
            (r'[^"]+', String),
        ],
        'dim': [
            (uni_name, Name.Variable, '#pop'),
            default('#pop'),  # any other syntax
        ],
        'funcname': [
            (uni_name, Name.Function, '#pop'),
        ],
        'classname': [
            (uni_name, Name.Class, '#pop'),
        ],
        'namespace': [
            (uni_name, Name.Namespace),
            (r'\.', Name.Namespace),
            default('#pop'),
        ],
        'end': [
            (r'\s+', Text),
            (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b',
             Keyword, '#pop'),
            default('#pop'),
        ]
    }

    def analyse_text(text):
        if re.search(r'^\s*(#If|Module|Namespace)', text, re.MULTILINE):
            return 0.5
Example #17
0
class CSharpLexer(RegexLexer):
    """
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

      .. versionadded:: 0.8
    """

    name = 'C#'
    aliases = ['csharp', 'c#']
    filenames = ['*.cs']
    mimetypes = ['text/x-csharp']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        r'@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in iteritems(levels):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|async|await|base|break|by|case|catch|'
                 r'checked|const|continue|default|delegate|'
                 r'do|else|enum|event|explicit|extern|false|finally|'
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
                 r'internal|is|let|lock|new|null|on|operator|'
                 r'out|override|params|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|stackalloc|static|'
                 r'switch|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|add|remove|value|alias|ascending|'
                 r'descending|from|group|into|orderby|select|thenby|where|'
                 r'join|equals)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|'
                 r'sbyte|short|string|uint|ulong|ushort|var)\b\??',
                 Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [
                (cs_ident, Name.Class, '#pop'),
                default('#pop'),
            ],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'),
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Example #18
0
class SassLexer(ExtendedRegexLexer):
    """
    For Sass stylesheets.

    .. versionadded:: 1.3
    """

    name = 'Sass'
    aliases = ['sass']
    filenames = ['*.sass']
    mimetypes = ['text/x-sass']

    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'content': [
            (r'//[^\n]*', _starts_block(Comment.Single,
                                        'single-comment'), 'root'),
            (r'/\*[^\n]*', _starts_block(Comment.Multiline,
                                         'multi-comment'), 'root'),
            (r'@import', Keyword, 'import'),
            (r'@for', Keyword, 'for'),
            (r'@(debug|warn|if|while)', Keyword, 'value'),
            (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'),
            (r'(@include)( [\w-]+)', bygroups(Keyword,
                                              Name.Decorator), 'value'),
            (r'@extend', Keyword, 'selector'),
            (r'@[\w-]+', Keyword, 'selector'),
            (r'=[\w-]+', Name.Function, 'value'),
            (r'\+[\w-]+', Name.Decorator, 'value'),
            (r'([!$][\w-]\w*)([ \t]*(?:(?:\|\|)?=|:))',
             bygroups(Name.Variable, Operator), 'value'),
            (r':', Name.Attribute, 'old-style-attr'),
            (r'(?=.+?[=:]([^a-z]|$))', Name.Attribute, 'new-style-attr'),
            default('selector'),
        ],
        'single-comment': [
            (r'.+', Comment.Single),
            (r'\n', Text, 'root'),
        ],
        'multi-comment': [
            (r'.+', Comment.Multiline),
            (r'\n', Text, 'root'),
        ],
        'import': [
            (r'[ \t]+', Text),
            (r'\S+', String),
            (r'\n', Text, 'root'),
        ],
        'old-style-attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*=', Operator, 'value'),
            default('value'),
        ],
        'new-style-attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*[=:]', Operator, 'value'),
        ],
        'inline-comment': [
            (r"(\\#|#(?=[^\n{])|\*(?=[^\n/])|[^\n#*])+", Comment.Multiline),
            (r'#\{', String.Interpol, 'interpolation'),
            (r"\*/", Comment, '#pop'),
        ],
    }
    for group, common in iteritems(common_sass_tokens):
        tokens[group] = copy.copy(common)
    tokens['value'].append((r'\n', Text, 'root'))
    tokens['selector'].append((r'\n', Text, 'root'))
Example #19
0
class NotmuchLexer(RegexLexer):
    """
    For `Notmuch <https://notmuchmail.org/>`_ email text format.

    .. versionadded:: 2.5

    Additional options accepted:

    `body_lexer`
        If given, highlight the contents of the message body with the specified
        lexer, else guess it according to the body content (default: ``None``).
    """

    name = 'Notmuch'
    aliases = ['notmuch']

    def _highlight_code(self, match):
        code = match.group(1)

        try:
            if self.body_lexer:
                lexer = get_lexer_by_name(self.body_lexer)
            else:
                lexer = guess_lexer(code.strip())
        except ClassNotFound:
            lexer = get_lexer_by_name('text')

        for item in lexer.get_tokens_unprocessed(code):
            yield item

    tokens = {
        'root': [
            (r'\fmessage{\s*', Keyword, ('message', 'message-attr')),
        ],
        'message-attr': [
            (r'(\s*id:\s*)([^\s]+)', bygroups(Name.Attribute, String)),
            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
             bygroups(Name.Attribute, Number.Integer)),
            (r'(\s*filename:\s*)(.+\n)', bygroups(Name.Attribute, String)),
            default('#pop'),
        ],
        'message': [
            (r'\fmessage}\n', Keyword, '#pop'),
            (r'\fheader{\n', Keyword, 'header'),
            (r'\fbody{\n', Keyword, 'body'),
        ],
        'header': [
            (r'\fheader}\n', Keyword, '#pop'),
            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
             bygroups(Name.Attribute, String)),
            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
             bygroups(Generic.Strong, Literal, Name.Tag)),
        ],
        'body': [
            (r'\fpart{\n', Keyword, 'part'),
            (r'\f(part|attachment){\s*', Keyword, ('part', 'part-attr')),
            (r'\fbody}\n', Keyword, '#pop'),
        ],
        'part-attr': [
            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
             bygroups(Punctuation, Name.Attribute, String)),
            (r'(,\s*)(Content-type:\s*)(.+\n)',
             bygroups(Punctuation, Name.Attribute, String)),
            default('#pop'),
        ],
        'part': [
            (r'\f(?:part|attachment)}\n', Keyword, '#pop'),
            (r'\f(?:part|attachment){\s*', Keyword, ('#push', 'part-attr')),
            (r'^Non-text part: .*\n', Comment),
            (r'(?s)(.*?(?=\f(?:part|attachment)}\n))', _highlight_code),
        ],
    }

    def analyse_text(text):
        return 1.0 if text.startswith('\fmessage{') else 0.0

    def __init__(self, **options):
        self.body_lexer = options.get('body_lexer', None)
        RegexLexer.__init__(self, **options)
Example #20
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words((
                    '@selector', '@private', '@protected', '@public', '@encode',
                    '@synchronized', '@try', '@throw', '@catch', '@finally',
                    '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer',
                    '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong',
                    'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic',
                    'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in',
                    'out', 'inout', 'release', 'class', '@dynamic', '@optional',
                    '@required', '@autoreleasepool', '@import'), suffix=r'\b'),
                 Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'),
                 Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                (r'([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname': [
                (r'([a-zA-Z$_][\w$]*)(\s*,\s*)',
                 bygroups(Name.Class, Text), 'oc_forward_classname'),
                (r'([a-zA-Z$_][\w$]*)(\s*;?)',
                 bygroups(Name.Class, Text), '#pop')
            ],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (r'^([-+])(\s*)'                         # method marker
                 r'(\(.*?\))?(\s*)'                      # return type
                 r'([a-zA-Z$_][\w$]*:?)',        # begin of method name
                 bygroups(Punctuation, Text, using(this),
                          Text, Name.Function),
                 'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from testflows._core.contrib.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Example #21
0
class ActionScript3Lexer(RegexLexer):
    """
    For ActionScript 3 source code.

    .. versionadded:: 0.11
    """

    name = 'ActionScript 3'
    aliases = ['as3', 'actionscript3']
    filenames = ['*.as']
    mimetypes = ['application/x-actionscript3', 'text/x-actionscript3',
                 'text/actionscript3']

    identifier = r'[$a-zA-Z_]\w*'
    typeidentifier = identifier + r'(?:\.<\w+>)?'

    flags = re.DOTALL | re.MULTILINE
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(function\s+)(' + identifier + r')(\s*)(\()',
             bygroups(Keyword.Declaration, Name.Function, Text, Operator),
             'funcparams'),
            (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' +
             typeidentifier + r')',
             bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text,
                      Keyword.Type)),
            (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)',
             bygroups(Keyword, Text, Name.Namespace, Text)),
            (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()',
             bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex),
            (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)),
            (r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
             r'throw|try|catch|with|new|typeof|arguments|instanceof|this|'
             r'switch|import|include|as|is)\b',
             Keyword),
            (r'(class|public|final|internal|native|override|private|protected|'
             r'static|import|extends|implements|interface|intrinsic|return|super|'
             r'dynamic|function|const|get|namespace|package|set)\b',
             Keyword.Declaration),
            (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b',
             Keyword.Constant),
            (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|'
             r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|'
             r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|'
             r'unescape)\b', Name.Function),
            (identifier, Name),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-f]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[~^*!%&<>|+=:;,/?\\{}\[\]().-]+', Operator),
        ],
        'funcparams': [
            (r'\s+', Text),
            (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' +
             typeidentifier + r'|\*)(\s*)',
             bygroups(Text, Punctuation, Name, Text, Operator, Text,
                      Keyword.Type, Text), 'defval'),
            (r'\)', Operator, 'type')
        ],
        'type': [
            (r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)',
             bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'),
            (r'\s+', Text, '#pop:2'),
            default('#pop:2')
        ],
        'defval': [
            (r'(=)(\s*)([^(),]+)(\s*)(,?)',
             bygroups(Operator, Text, using(this), Text, Operator), '#pop'),
            (r',', Operator, '#pop'),
            default('#pop')
        ]
    }

    def analyse_text(text):
        if re.match(r'\w+\s*:\s*\w', text):
            return 0.3
        return 0
Example #22
0
class BibTeXLexer(ExtendedRegexLexer):
    """
    A lexer for BibTeX bibliography data format.

    .. versionadded:: 2.2
    """

    name = 'BibTeX'
    aliases = ['bib', 'bibtex']
    filenames = ['*.bib']
    mimetypes = ["text/x-bibtex"]
    flags = re.IGNORECASE

    ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
    IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS,
                                    r'\w' + ALLOWED_CHARS)

    def open_brace_callback(self, match, ctx):
        opening_brace = match.group()
        ctx.opening_brace = opening_brace
        yield match.start(), Punctuation, opening_brace
        ctx.pos = match.end()

    def close_brace_callback(self, match, ctx):
        closing_brace = match.group()
        if (ctx.opening_brace == '{' and closing_brace != '}'
                or ctx.opening_brace == '(' and closing_brace != ')'):
            yield match.start(), Error, closing_brace
        else:
            yield match.start(), Punctuation, closing_brace
        del ctx.opening_brace
        ctx.pos = match.end()

    tokens = {
        'root': [
            include('whitespace'),
            ('@comment', Comment),
            ('@preamble', Name.Class, ('closing-brace', 'value',
                                       'opening-brace')),
            ('@string', Name.Class, ('closing-brace', 'field',
                                     'opening-brace')),
            ('@' + IDENTIFIER, Name.Class, ('closing-brace', 'command-body',
                                            'opening-brace')),
            ('.+', Comment),
        ],
        'opening-brace': [
            include('whitespace'),
            (r'[{(]', open_brace_callback, '#pop'),
        ],
        'closing-brace': [
            include('whitespace'),
            (r'[})]', close_brace_callback, '#pop'),
        ],
        'command-body': [
            include('whitespace'),
            (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
        ],
        'fields': [
            include('whitespace'),
            (',', Punctuation, 'field'),
            default('#pop'),
        ],
        'field': [
            include('whitespace'),
            (IDENTIFIER, Name.Attribute, ('value', '=')),
            default('#pop'),
        ],
        '=': [
            include('whitespace'),
            ('=', Punctuation, '#pop'),
        ],
        'value': [
            include('whitespace'),
            (IDENTIFIER, Name.Variable),
            ('"', String, 'quoted-string'),
            (r'\{', String, 'braced-string'),
            (r'[\d]+', Number),
            ('#', Punctuation),
            default('#pop'),
        ],
        'quoted-string': [
            (r'\{', String, 'braced-string'),
            ('"', String, '#pop'),
            (r'[^\{\"]+', String),
        ],
        'braced-string': [
            (r'\{', String, '#push'),
            (r'\}', String, '#pop'),
            (r'[^\{\}]+', String),
        ],
        'whitespace': [
            (r'\s+', Text),
        ],
    }
Example #23
0
class AdlLexer(AtomsLexer):
    """
    Lexer for ADL syntax.

    .. versionadded:: 2.1
    """

    name = 'ADL'
    aliases = ['adl']
    filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx']

    tokens = {
        'whitespace': [
            # blank line ends
            (r'\s*\n', Text),
            # comment-only line
            (r'^[ \t]*--.*$', Comment),
        ],
        'odin_section': [
            # repeating the following two rules from the root state enable multi-line
            # strings that start in the first column to be dealt with
            (r'^(language|description|ontology|terminology|annotations|'
             r'component_terminologies|revision_history)[ \t]*\n',
             Generic.Heading),
            (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
            (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)),
            (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)),
            # template overlay delimiter
            (r'^----------*\n', Text, '#pop'),
            (r'^.*\n', String),
            default('#pop'),
        ],
        'cadl_section': [
            (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)),
            default('#pop'),
        ],
        'rules_section': [
            (r'^[ \t]+.*\n', using(CadlLexer)),
            default('#pop'),
        ],
        'metadata': [
            (r'\)', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'([Tt]rue|[Ff]alse)', Literal),
            # numbers and version ids
            (r'\d+(\.\d+)*', Literal),
            # Guids
            (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal),
            (r'\w+', Name.Class),
            (r'"', String, 'string'),
            (r'=', Operator),
            (r'[ \t]+', Text),
            default('#pop'),
        ],
        'root': [
            (r'^(archetype|template_overlay|operational_template|template|'
             r'speciali[sz]e)', Generic.Heading),
            (r'^(language|description|ontology|terminology|annotations|'
             r'component_terminologies|revision_history)[ \t]*\n',
             Generic.Heading, 'odin_section'),
            (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
            (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'),
            include('archetype_id'),
            (r'[ \t]*\(', Punctuation, 'metadata'),
            include('whitespace'),
        ],
    }
Example #24
0
class HaxeLexer(ExtendedRegexLexer):
    """
    For Haxe source code (http://haxe.org/).

    .. versionadded:: 1.3
    """

    name = 'Haxe'
    aliases = ['hx', 'haxe', 'hxsl']
    filenames = ['*.hx', '*.hxsl']
    mimetypes = ['text/haxe', 'text/x-haxe', 'text/x-hx']

    # keywords extracted from lexer.mll in the haxe compiler source
    keyword = (r'(?:function|class|static|var|if|else|while|do|for|'
               r'break|return|continue|extends|implements|import|'
               r'switch|case|default|public|private|try|untyped|'
               r'catch|new|this|throw|extern|enum|in|interface|'
               r'cast|override|dynamic|typedef|package|'
               r'inline|using|null|true|false|abstract)\b')

    # idtype in lexer.mll
    typeid = r'_*[A-Z]\w*'

    # combined ident and dollar and idtype
    ident = r'(?:_*[a-z]\w*|_+[0-9]\w*|' + typeid + r'|_+|\$\w+)'

    binop = (r'(?:%=|&=|\|=|\^=|\+=|\-=|\*=|/=|<<=|>\s*>\s*=|>\s*>\s*>\s*=|==|'
             r'!=|<=|>\s*=|&&|\|\||<<|>>>|>\s*>|\.\.\.|<|>|%|&|\||\^|\+|\*|'
             r'/|\-|=>|=)')

    # ident except keywords
    ident_no_keyword = r'(?!' + keyword + ')' + ident

    flags = re.DOTALL | re.MULTILINE

    preproc_stack = []

    def preproc_callback(self, match, ctx):
        proc = match.group(2)

        if proc == 'if':
            # store the current stack
            self.preproc_stack.append(ctx.stack[:])
        elif proc in ['else', 'elseif']:
            # restore the stack back to right before #if
            if self.preproc_stack:
                ctx.stack = self.preproc_stack[-1][:]
        elif proc == 'end':
            # remove the saved stack of previous #if
            if self.preproc_stack:
                self.preproc_stack.pop()

        # #if and #elseif should follow by an expr
        if proc in ['if', 'elseif']:
            ctx.stack.append('preproc-expr')

        # #error can be optionally follow by the error msg
        if proc in ['error']:
            ctx.stack.append('preproc-error')

        yield match.start(), Comment.Preproc, u'#' + proc
        ctx.pos = match.end()

    tokens = {
        'root': [
            include('spaces'),
            include('meta'),
            (r'(?:package)\b', Keyword.Namespace, ('semicolon', 'package')),
            (r'(?:import)\b', Keyword.Namespace, ('semicolon', 'import')),
            (r'(?:using)\b', Keyword.Namespace, ('semicolon', 'using')),
            (r'(?:extern|private)\b', Keyword.Declaration),
            (r'(?:abstract)\b', Keyword.Declaration, 'abstract'),
            (r'(?:class|interface)\b', Keyword.Declaration, 'class'),
            (r'(?:enum)\b', Keyword.Declaration, 'enum'),
            (r'(?:typedef)\b', Keyword.Declaration, 'typedef'),

            # top-level expression
            # although it is not supported in haxe, but it is common to write
            # expression in web pages the positive lookahead here is to prevent
            # an infinite loop at the EOF
            (r'(?=.)', Text, 'expr-statement'),
        ],

        # space/tab/comment/preproc
        'spaces': [
            (r'\s+', Text),
            (r'//[^\n\r]*', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(#)(if|elseif|else|end|error)\b', preproc_callback),
        ],

        'string-single-interpol': [
            (r'\$\{', String.Interpol, ('string-interpol-close', 'expr')),
            (r'\$\$', String.Escape),
            (r'\$(?=' + ident + ')', String.Interpol, 'ident'),
            include('string-single'),
        ],

        'string-single': [
            (r"'", String.Single, '#pop'),
            (r'\\.', String.Escape),
            (r'.', String.Single),
        ],

        'string-double': [
            (r'"', String.Double, '#pop'),
            (r'\\.', String.Escape),
            (r'.', String.Double),
        ],

        'string-interpol-close': [
            (r'\$'+ident, String.Interpol),
            (r'\}', String.Interpol, '#pop'),
        ],

        'package': [
            include('spaces'),
            (ident, Name.Namespace),
            (r'\.', Punctuation, 'import-ident'),
            default('#pop'),
        ],

        'import': [
            include('spaces'),
            (ident, Name.Namespace),
            (r'\*', Keyword),  # wildcard import
            (r'\.', Punctuation, 'import-ident'),
            (r'in', Keyword.Namespace, 'ident'),
            default('#pop'),
        ],

        'import-ident': [
            include('spaces'),
            (r'\*', Keyword, '#pop'),  # wildcard import
            (ident, Name.Namespace, '#pop'),
        ],

        'using': [
            include('spaces'),
            (ident, Name.Namespace),
            (r'\.', Punctuation, 'import-ident'),
            default('#pop'),
        ],

        'preproc-error': [
            (r'\s+', Comment.Preproc),
            (r"'", String.Single, ('#pop', 'string-single')),
            (r'"', String.Double, ('#pop', 'string-double')),
            default('#pop'),
        ],

        'preproc-expr': [
            (r'\s+', Comment.Preproc),
            (r'\!', Comment.Preproc),
            (r'\(', Comment.Preproc, ('#pop', 'preproc-parenthesis')),

            (ident, Comment.Preproc, '#pop'),

            # Float
            (r'\.[0-9]+', Number.Float),
            (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float),
            (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float),
            (r'[0-9]+\.[0-9]+', Number.Float),
            (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float),

            # Int
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),

            # String
            (r"'", String.Single, ('#pop', 'string-single')),
            (r'"', String.Double, ('#pop', 'string-double')),
        ],

        'preproc-parenthesis': [
            (r'\s+', Comment.Preproc),
            (r'\)', Comment.Preproc, '#pop'),
            default('preproc-expr-in-parenthesis'),
        ],

        'preproc-expr-chain': [
            (r'\s+', Comment.Preproc),
            (binop, Comment.Preproc, ('#pop', 'preproc-expr-in-parenthesis')),
            default('#pop'),
        ],

        # same as 'preproc-expr' but able to chain 'preproc-expr-chain'
        'preproc-expr-in-parenthesis': [
            (r'\s+', Comment.Preproc),
            (r'\!', Comment.Preproc),
            (r'\(', Comment.Preproc,
             ('#pop', 'preproc-expr-chain', 'preproc-parenthesis')),

            (ident, Comment.Preproc, ('#pop', 'preproc-expr-chain')),

            # Float
            (r'\.[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')),
            (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')),
            (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')),
            (r'[0-9]+\.[0-9]+', Number.Float, ('#pop', 'preproc-expr-chain')),
            (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, ('#pop', 'preproc-expr-chain')),

            # Int
            (r'0x[0-9a-fA-F]+', Number.Hex, ('#pop', 'preproc-expr-chain')),
            (r'[0-9]+', Number.Integer, ('#pop', 'preproc-expr-chain')),

            # String
            (r"'", String.Single,
             ('#pop', 'preproc-expr-chain', 'string-single')),
            (r'"', String.Double,
             ('#pop', 'preproc-expr-chain', 'string-double')),
        ],

        'abstract': [
            include('spaces'),
            default(('#pop', 'abstract-body', 'abstract-relation',
                    'abstract-opaque', 'type-param-constraint', 'type-name')),
        ],

        'abstract-body': [
            include('spaces'),
            (r'\{', Punctuation, ('#pop', 'class-body')),
        ],

        'abstract-opaque': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'type')),
            default('#pop'),
        ],

        'abstract-relation': [
            include('spaces'),
            (r'(?:to|from)', Keyword.Declaration, 'type'),
            (r',', Punctuation),
            default('#pop'),
        ],

        'meta': [
            include('spaces'),
            (r'@', Name.Decorator, ('meta-body', 'meta-ident', 'meta-colon')),
        ],

        # optional colon
        'meta-colon': [
            include('spaces'),
            (r':', Name.Decorator, '#pop'),
            default('#pop'),
        ],

        # same as 'ident' but set token as Name.Decorator instead of Name
        'meta-ident': [
            include('spaces'),
            (ident, Name.Decorator, '#pop'),
        ],

        'meta-body': [
            include('spaces'),
            (r'\(', Name.Decorator, ('#pop', 'meta-call')),
            default('#pop'),
        ],

        'meta-call': [
            include('spaces'),
            (r'\)', Name.Decorator, '#pop'),
            default(('#pop', 'meta-call-sep', 'expr')),
        ],

        'meta-call-sep': [
            include('spaces'),
            (r'\)', Name.Decorator, '#pop'),
            (r',', Punctuation, ('#pop', 'meta-call')),
        ],

        'typedef': [
            include('spaces'),
            default(('#pop', 'typedef-body', 'type-param-constraint',
                     'type-name')),
        ],

        'typedef-body': [
            include('spaces'),
            (r'=', Operator, ('#pop', 'optional-semicolon', 'type')),
        ],

        'enum': [
            include('spaces'),
            default(('#pop', 'enum-body', 'bracket-open',
                     'type-param-constraint', 'type-name')),
        ],

        'enum-body': [
            include('spaces'),
            include('meta'),
            (r'\}', Punctuation, '#pop'),
            (ident_no_keyword, Name, ('enum-member', 'type-param-constraint')),
        ],

        'enum-member': [
            include('spaces'),
            (r'\(', Punctuation,
             ('#pop', 'semicolon', 'flag', 'function-param')),
            default(('#pop', 'semicolon', 'flag')),
        ],

        'class': [
            include('spaces'),
            default(('#pop', 'class-body', 'bracket-open', 'extends',
                     'type-param-constraint', 'type-name')),
        ],

        'extends': [
            include('spaces'),
            (r'(?:extends|implements)\b', Keyword.Declaration, 'type'),
            (r',', Punctuation),  # the comma is made optional here, since haxe2
                                  # requires the comma but haxe3 does not allow it
            default('#pop'),
        ],

        'bracket-open': [
            include('spaces'),
            (r'\{', Punctuation, '#pop'),
        ],

        'bracket-close': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
        ],

        'class-body': [
            include('spaces'),
            include('meta'),
            (r'\}', Punctuation, '#pop'),
            (r'(?:static|public|private|override|dynamic|inline|macro)\b',
             Keyword.Declaration),
            default('class-member'),
        ],

        'class-member': [
            include('spaces'),
            (r'(var)\b', Keyword.Declaration,
             ('#pop', 'optional-semicolon', 'var')),
            (r'(function)\b', Keyword.Declaration,
             ('#pop', 'optional-semicolon', 'class-method')),
        ],

        # local function, anonymous or not
        'function-local': [
            include('spaces'),
            (ident_no_keyword, Name.Function,
             ('#pop', 'optional-expr', 'flag', 'function-param',
              'parenthesis-open', 'type-param-constraint')),
            default(('#pop', 'optional-expr', 'flag', 'function-param',
                     'parenthesis-open', 'type-param-constraint')),
        ],

        'optional-expr': [
            include('spaces'),
            include('expr'),
            default('#pop'),
        ],

        'class-method': [
            include('spaces'),
            (ident, Name.Function, ('#pop', 'optional-expr', 'flag',
                                    'function-param', 'parenthesis-open',
                                    'type-param-constraint')),
        ],

        # function arguments
        'function-param': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
            (r'\?', Punctuation),
            (ident_no_keyword, Name,
             ('#pop', 'function-param-sep', 'assign', 'flag')),
        ],

        'function-param-sep': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'function-param')),
        ],

        'prop-get-set': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'parenthesis-close',
                                  'prop-get-set-opt', 'comma', 'prop-get-set-opt')),
            default('#pop'),
        ],

        'prop-get-set-opt': [
            include('spaces'),
            (r'(?:default|null|never|dynamic|get|set)\b', Keyword, '#pop'),
            (ident_no_keyword, Text, '#pop'),  # custom getter/setter
        ],

        'expr-statement': [
            include('spaces'),
            # makes semicolon optional here, just to avoid checking the last
            # one is bracket or not.
            default(('#pop', 'optional-semicolon', 'expr')),
        ],

        'expr': [
            include('spaces'),
            (r'@', Name.Decorator, ('#pop', 'optional-expr', 'meta-body',
                                    'meta-ident', 'meta-colon')),
            (r'(?:\+\+|\-\-|~(?!/)|!|\-)', Operator),
            (r'\(', Punctuation, ('#pop', 'expr-chain', 'parenthesis')),
            (r'(?:static|public|private|override|dynamic|inline)\b',
             Keyword.Declaration),
            (r'(?:function)\b', Keyword.Declaration, ('#pop', 'expr-chain',
                                                      'function-local')),
            (r'\{', Punctuation, ('#pop', 'expr-chain', 'bracket')),
            (r'(?:true|false|null)\b', Keyword.Constant, ('#pop', 'expr-chain')),
            (r'(?:this)\b', Keyword, ('#pop', 'expr-chain')),
            (r'(?:cast)\b', Keyword, ('#pop', 'expr-chain', 'cast')),
            (r'(?:try)\b', Keyword, ('#pop', 'catch', 'expr')),
            (r'(?:var)\b', Keyword.Declaration, ('#pop', 'var')),
            (r'(?:new)\b', Keyword, ('#pop', 'expr-chain', 'new')),
            (r'(?:switch)\b', Keyword, ('#pop', 'switch')),
            (r'(?:if)\b', Keyword, ('#pop', 'if')),
            (r'(?:do)\b', Keyword, ('#pop', 'do')),
            (r'(?:while)\b', Keyword, ('#pop', 'while')),
            (r'(?:for)\b', Keyword, ('#pop', 'for')),
            (r'(?:untyped|throw)\b', Keyword),
            (r'(?:return)\b', Keyword, ('#pop', 'optional-expr')),
            (r'(?:macro)\b', Keyword, ('#pop', 'macro')),
            (r'(?:continue|break)\b', Keyword, '#pop'),
            (r'(?:\$\s*[a-z]\b|\$(?!'+ident+'))', Name, ('#pop', 'dollar')),
            (ident_no_keyword, Name, ('#pop', 'expr-chain')),

            # Float
            (r'\.[0-9]+', Number.Float, ('#pop', 'expr-chain')),
            (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'expr-chain')),
            (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, ('#pop', 'expr-chain')),
            (r'[0-9]+\.[0-9]+', Number.Float, ('#pop', 'expr-chain')),
            (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, ('#pop', 'expr-chain')),

            # Int
            (r'0x[0-9a-fA-F]+', Number.Hex, ('#pop', 'expr-chain')),
            (r'[0-9]+', Number.Integer, ('#pop', 'expr-chain')),

            # String
            (r"'", String.Single, ('#pop', 'expr-chain', 'string-single-interpol')),
            (r'"', String.Double, ('#pop', 'expr-chain', 'string-double')),

            # EReg
            (r'~/(\\\\|\\/|[^/\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')),

            # Array
            (r'\[', Punctuation, ('#pop', 'expr-chain', 'array-decl')),
        ],

        'expr-chain': [
            include('spaces'),
            (r'(?:\+\+|\-\-)', Operator),
            (binop, Operator, ('#pop', 'expr')),
            (r'(?:in)\b', Keyword, ('#pop', 'expr')),
            (r'\?', Operator, ('#pop', 'expr', 'ternary', 'expr')),
            (r'(\.)(' + ident_no_keyword + ')', bygroups(Punctuation, Name)),
            (r'\[', Punctuation, 'array-access'),
            (r'\(', Punctuation, 'call'),
            default('#pop'),
        ],

        # macro reification
        'macro': [
            include('spaces'),
            include('meta'),
            (r':', Punctuation, ('#pop', 'type')),

            (r'(?:extern|private)\b', Keyword.Declaration),
            (r'(?:abstract)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'abstract')),
            (r'(?:class|interface)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'macro-class')),
            (r'(?:enum)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'enum')),
            (r'(?:typedef)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'typedef')),

            default(('#pop', 'expr')),
        ],

        'macro-class': [
            (r'\{', Punctuation, ('#pop', 'class-body')),
            include('class')
        ],

        # cast can be written as "cast expr" or "cast(expr, type)"
        'cast': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'parenthesis-close',
                                  'cast-type', 'expr')),
            default(('#pop', 'expr')),
        ],

        # optionally give a type as the 2nd argument of cast()
        'cast-type': [
            include('spaces'),
            (r',', Punctuation, ('#pop', 'type')),
            default('#pop'),
        ],

        'catch': [
            include('spaces'),
            (r'(?:catch)\b', Keyword, ('expr', 'function-param',
                                       'parenthesis-open')),
            default('#pop'),
        ],

        # do-while loop
        'do': [
            include('spaces'),
            default(('#pop', 'do-while', 'expr')),
        ],

        # the while after do
        'do-while': [
            include('spaces'),
            (r'(?:while)\b', Keyword, ('#pop', 'parenthesis',
                                       'parenthesis-open')),
        ],

        'while': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'expr', 'parenthesis')),
        ],

        'for': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'expr', 'parenthesis')),
        ],

        'if': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'else', 'optional-semicolon', 'expr',
                                  'parenthesis')),
        ],

        'else': [
            include('spaces'),
            (r'(?:else)\b', Keyword, ('#pop', 'expr')),
            default('#pop'),
        ],

        'switch': [
            include('spaces'),
            default(('#pop', 'switch-body', 'bracket-open', 'expr')),
        ],

        'switch-body': [
            include('spaces'),
            (r'(?:case|default)\b', Keyword, ('case-block', 'case')),
            (r'\}', Punctuation, '#pop'),
        ],

        'case': [
            include('spaces'),
            (r':', Punctuation, '#pop'),
            default(('#pop', 'case-sep', 'case-guard', 'expr')),
        ],

        'case-sep': [
            include('spaces'),
            (r':', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'case')),
        ],

        'case-guard': [
            include('spaces'),
            (r'(?:if)\b', Keyword, ('#pop', 'parenthesis', 'parenthesis-open')),
            default('#pop'),
        ],

        # optional multiple expr under a case
        'case-block': [
            include('spaces'),
            (r'(?!(?:case|default)\b|\})', Keyword, 'expr-statement'),
            default('#pop'),
        ],

        'new': [
            include('spaces'),
            default(('#pop', 'call', 'parenthesis-open', 'type')),
        ],

        'array-decl': [
            include('spaces'),
            (r'\]', Punctuation, '#pop'),
            default(('#pop', 'array-decl-sep', 'expr')),
        ],

        'array-decl-sep': [
            include('spaces'),
            (r'\]', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'array-decl')),
        ],

        'array-access': [
            include('spaces'),
            default(('#pop', 'array-access-close', 'expr')),
        ],

        'array-access-close': [
            include('spaces'),
            (r'\]', Punctuation, '#pop'),
        ],

        'comma': [
            include('spaces'),
            (r',', Punctuation, '#pop'),
        ],

        'colon': [
            include('spaces'),
            (r':', Punctuation, '#pop'),
        ],

        'semicolon': [
            include('spaces'),
            (r';', Punctuation, '#pop'),
        ],

        'optional-semicolon': [
            include('spaces'),
            (r';', Punctuation, '#pop'),
            default('#pop'),
        ],

        # identity that CAN be a Haxe keyword
        'ident': [
            include('spaces'),
            (ident, Name, '#pop'),
        ],

        'dollar': [
            include('spaces'),
            (r'\{', Punctuation, ('#pop', 'expr-chain', 'bracket-close', 'expr')),
            default(('#pop', 'expr-chain')),
        ],

        'type-name': [
            include('spaces'),
            (typeid, Name, '#pop'),
        ],

        'type-full-name': [
            include('spaces'),
            (r'\.', Punctuation, 'ident'),
            default('#pop'),
        ],

        'type': [
            include('spaces'),
            (r'\?', Punctuation),
            (ident, Name, ('#pop', 'type-check', 'type-full-name')),
            (r'\{', Punctuation, ('#pop', 'type-check', 'type-struct')),
            (r'\(', Punctuation, ('#pop', 'type-check', 'type-parenthesis')),
        ],

        'type-parenthesis': [
            include('spaces'),
            default(('#pop', 'parenthesis-close', 'type')),
        ],

        'type-check': [
            include('spaces'),
            (r'->', Punctuation, ('#pop', 'type')),
            (r'<(?!=)', Punctuation, 'type-param'),
            default('#pop'),
        ],

        'type-struct': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
            (r'\?', Punctuation),
            (r'>', Punctuation, ('comma', 'type')),
            (ident_no_keyword, Name, ('#pop', 'type-struct-sep', 'type', 'colon')),
            include('class-body'),
        ],

        'type-struct-sep': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'type-struct')),
        ],

        # type-param can be a normal type or a constant literal...
        'type-param-type': [
            # Float
            (r'\.[0-9]+', Number.Float, '#pop'),
            (r'[0-9]+[eE][+\-]?[0-9]+', Number.Float, '#pop'),
            (r'[0-9]+\.[0-9]*[eE][+\-]?[0-9]+', Number.Float, '#pop'),
            (r'[0-9]+\.[0-9]+', Number.Float, '#pop'),
            (r'[0-9]+\.(?!' + ident + r'|\.\.)', Number.Float, '#pop'),

            # Int
            (r'0x[0-9a-fA-F]+', Number.Hex, '#pop'),
            (r'[0-9]+', Number.Integer, '#pop'),

            # String
            (r"'", String.Single, ('#pop', 'string-single')),
            (r'"', String.Double, ('#pop', 'string-double')),

            # EReg
            (r'~/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex, '#pop'),

            # Array
            (r'\[', Operator, ('#pop', 'array-decl')),

            include('type'),
        ],

        # type-param part of a type
        # ie. the <A,B> path in Map<A,B>
        'type-param': [
            include('spaces'),
            default(('#pop', 'type-param-sep', 'type-param-type')),
        ],

        'type-param-sep': [
            include('spaces'),
            (r'>', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'type-param')),
        ],

        # optional type-param that may include constraint
        # ie. <T:Constraint, T2:(ConstraintA,ConstraintB)>
        'type-param-constraint': [
            include('spaces'),
            (r'<(?!=)', Punctuation, ('#pop', 'type-param-constraint-sep',
                                      'type-param-constraint-flag', 'type-name')),
            default('#pop'),
        ],

        'type-param-constraint-sep': [
            include('spaces'),
            (r'>', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'type-param-constraint-sep',
                                 'type-param-constraint-flag', 'type-name')),
        ],

        # the optional constraint inside type-param
        'type-param-constraint-flag': [
            include('spaces'),
            (r':', Punctuation, ('#pop', 'type-param-constraint-flag-type')),
            default('#pop'),
        ],

        'type-param-constraint-flag-type': [
            include('spaces'),
            (r'\(', Punctuation, ('#pop', 'type-param-constraint-flag-type-sep',
                                  'type')),
            default(('#pop', 'type')),
        ],

        'type-param-constraint-flag-type-sep': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
            (r',', Punctuation, 'type'),
        ],

        # a parenthesis expr that contain exactly one expr
        'parenthesis': [
            include('spaces'),
            default(('#pop', 'parenthesis-close', 'flag', 'expr')),
        ],

        'parenthesis-open': [
            include('spaces'),
            (r'\(', Punctuation, '#pop'),
        ],

        'parenthesis-close': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
        ],

        'var': [
            include('spaces'),
            (ident_no_keyword, Text, ('#pop', 'var-sep', 'assign', 'flag', 'prop-get-set')),
        ],

        # optional more var decl.
        'var-sep': [
            include('spaces'),
            (r',', Punctuation, ('#pop', 'var')),
            default('#pop'),
        ],

        # optional assignment
        'assign': [
            include('spaces'),
            (r'=', Operator, ('#pop', 'expr')),
            default('#pop'),
        ],

        # optional type flag
        'flag': [
            include('spaces'),
            (r':', Punctuation, ('#pop', 'type')),
            default('#pop'),
        ],

        # colon as part of a ternary operator (?:)
        'ternary': [
            include('spaces'),
            (r':', Operator, '#pop'),
        ],

        # function call
        'call': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
            default(('#pop', 'call-sep', 'expr')),
        ],

        # after a call param
        'call-sep': [
            include('spaces'),
            (r'\)', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'call')),
        ],

        # bracket can be block or object
        'bracket': [
            include('spaces'),
            (r'(?!(?:\$\s*[a-z]\b|\$(?!'+ident+')))' + ident_no_keyword, Name,
             ('#pop', 'bracket-check')),
            (r"'", String.Single, ('#pop', 'bracket-check', 'string-single')),
            (r'"', String.Double, ('#pop', 'bracket-check', 'string-double')),
            default(('#pop', 'block')),
        ],

        'bracket-check': [
            include('spaces'),
            (r':', Punctuation, ('#pop', 'object-sep', 'expr')),  # is object
            default(('#pop', 'block', 'optional-semicolon', 'expr-chain')),  # is block
        ],

        # code block
        'block': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
            default('expr-statement'),
        ],

        # object in key-value pairs
        'object': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
            default(('#pop', 'object-sep', 'expr', 'colon', 'ident-or-string'))
        ],

        # a key of an object
        'ident-or-string': [
            include('spaces'),
            (ident_no_keyword, Name, '#pop'),
            (r"'", String.Single, ('#pop', 'string-single')),
            (r'"', String.Double, ('#pop', 'string-double')),
        ],

        # after a key-value pair in object
        'object-sep': [
            include('spaces'),
            (r'\}', Punctuation, '#pop'),
            (r',', Punctuation, ('#pop', 'object')),
        ],



    }

    def analyse_text(text):
        if re.match(r'\w+\s*:\s*\w', text):
            return 0.3
Example #25
0
class CoqLexer(RegexLexer):
    """
    For the `Coq <http://coq.inria.fr/>`_ theorem prover.

    .. versionadded:: 1.5
    """

    name = 'Coq'
    aliases = ['coq']
    filenames = ['*.v']
    mimetypes = ['text/x-coq']

    keywords1 = (
        # Vernacular commands
        'Section',
        'Module',
        'End',
        'Require',
        'Import',
        'Export',
        'Variable',
        'Variables',
        'Parameter',
        'Parameters',
        'Axiom',
        'Hypothesis',
        'Hypotheses',
        'Notation',
        'Local',
        'Tactic',
        'Reserved',
        'Scope',
        'Open',
        'Close',
        'Bind',
        'Delimit',
        'Definition',
        'Let',
        'Ltac',
        'Fixpoint',
        'CoFixpoint',
        'Morphism',
        'Relation',
        'Implicit',
        'Arguments',
        'Set',
        'Unset',
        'Contextual',
        'Strict',
        'Prenex',
        'Implicits',
        'Inductive',
        'CoInductive',
        'Record',
        'Structure',
        'Canonical',
        'Coercion',
        'Theorem',
        'Lemma',
        'Corollary',
        'Proposition',
        'Fact',
        'Remark',
        'Example',
        'Proof',
        'Goal',
        'Save',
        'Qed',
        'Defined',
        'Hint',
        'Resolve',
        'Rewrite',
        'View',
        'Search',
        'Show',
        'Print',
        'Printing',
        'All',
        'Graph',
        'Projections',
        'inside',
        'outside',
        'Check',
        'Global',
        'Instance',
        'Class',
        'Existing',
        'Universe',
        'Polymorphic',
        'Monomorphic',
        'Context')
    keywords2 = (
        # Gallina
        'forall',
        'exists',
        'exists2',
        'fun',
        'fix',
        'cofix',
        'struct',
        'match',
        'end',
        'in',
        'return',
        'let',
        'if',
        'is',
        'then',
        'else',
        'for',
        'of',
        'nosimpl',
        'with',
        'as',
    )
    keywords3 = (
        # Sorts
        'Type',
        'Prop',
    )
    keywords4 = (
        # Tactics
        'pose',
        'set',
        'move',
        'case',
        'elim',
        'apply',
        'clear',
        'hnf',
        'intro',
        'intros',
        'generalize',
        'rename',
        'pattern',
        'after',
        'destruct',
        'induction',
        'using',
        'refine',
        'inversion',
        'injection',
        'rewrite',
        'congr',
        'unlock',
        'compute',
        'ring',
        'field',
        'replace',
        'fold',
        'unfold',
        'change',
        'cutrewrite',
        'simpl',
        'have',
        'suff',
        'wlog',
        'suffices',
        'without',
        'loss',
        'nat_norm',
        'assert',
        'cut',
        'trivial',
        'revert',
        'bool_congr',
        'nat_congr',
        'symmetry',
        'transitivity',
        'auto',
        'split',
        'left',
        'right',
        'autorewrite',
        'tauto',
        'setoid_rewrite',
        'intuition',
        'eauto',
        'eapply',
        'econstructor',
        'etransitivity',
        'constructor',
        'erewrite',
        'red',
        'cbv',
        'lazy',
        'vm_compute',
        'native_compute',
        'subst',
    )
    keywords5 = (
        # Terminators
        'by',
        'done',
        'exact',
        'reflexivity',
        'tauto',
        'romega',
        'omega',
        'assumption',
        'solve',
        'contradiction',
        'discriminate',
        'congruence',
    )
    keywords6 = (
        # Control
        'do',
        'last',
        'first',
        'try',
        'idtac',
        'repeat',
    )
    # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
    # 'downto', 'else', 'end', 'exception', 'external', 'false',
    # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
    # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
    # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
    # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
    # 'type', 'val', 'virtual', 'when', 'while', 'with'
    keyopts = (
        '!=',
        '#',
        '&',
        '&&',
        r'\(',
        r'\)',
        r'\*',
        r'\+',
        ',',
        '-',
        r'-\.',
        '->',
        r'\.',
        r'\.\.',
        ':',
        '::',
        ':=',
        ':>',
        ';',
        ';;',
        '<',
        '<-',
        '<->',
        '=',
        '>',
        '>]',
        r'>\}',
        r'\?',
        r'\?\?',
        r'\[',
        r'\[<',
        r'\[>',
        r'\[\|',
        ']',
        '_',
        '`',
        r'\{',
        r'\{<',
        r'\|',
        r'\|]',
        r'\}',
        '~',
        '=>',
        r'/\\',
        r'\\/',
        r'\{\|',
        r'\|\}',
        u'Π',
        u'λ',
    )
    operators = r'[!$%&*+\./:<=>?@^|~-]'
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\(\*', Comment, 'comment'),
            (words(keywords1, prefix=r'\b', suffix=r'\b'), Keyword.Namespace),
            (words(keywords2, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(keywords3, prefix=r'\b', suffix=r'\b'), Keyword.Type),
            (words(keywords4, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(keywords5, prefix=r'\b', suffix=r'\b'), Keyword.Pseudo),
            (words(keywords6, prefix=r'\b', suffix=r'\b'), Keyword.Reserved),
            # (r'\b([A-Z][\w\']*)(\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name),
            (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r"[^\W\d][\w']*", Name),
            (r'\d[\d_]*', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
            (r'0[oO][0-7][0-7_]*', Number.Oct),
            (r'0[bB][01][01_]*', Number.Bin),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element
            (r'"', String.Double, 'string'),
            (r'[~?][a-z][\w\']*:', Name),
        ],
        'comment': [
            (r'[^(*)]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            (r'[(*)]', Comment),
        ],
        'string': [
            (r'[^"]+', String.Double),
            (r'""', String.Double),
            (r'"', String.Double, '#pop'),
        ],
        'dotted': [(r'\s+', Text), (r'\.', Punctuation),
                   (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
                   (r'[A-Z][\w\']*', Name.Class, '#pop'),
                   (r'[a-z][a-z0-9_\']*', Name, '#pop'),
                   default('#pop')],
    }

    def analyse_text(text):
        if text.startswith('(*'):
            return True
Example #26
0
class PostScriptLexer(RegexLexer):
    """
    Lexer for PostScript files.

    The PostScript Language Reference published by Adobe at
    <http://partners.adobe.com/public/developer/en/ps/PLRM.pdf>
    is the authority for this.

    .. versionadded:: 1.4
    """
    name = 'PostScript'
    aliases = ['postscript', 'postscr']
    filenames = ['*.ps', '*.eps']
    mimetypes = ['application/postscript']

    delimiter = r'()<>\[\]{}/%\s'
    delimiter_end = r'(?=[%s])' % delimiter

    valid_name_chars = r'[^%s]' % delimiter
    valid_name = r"%s+%s" % (valid_name_chars, delimiter_end)

    tokens = {
        'root': [
            # All comment types
            (r'^%!.+\n', Comment.Preproc),
            (r'%%.*\n', Comment.Special),
            (r'(^%.*\n){2,}', Comment.Multiline),
            (r'%.*\n', Comment.Single),

            # String literals are awkward; enter separate state.
            (r'\(', String, 'stringliteral'),
            (r'[{}<>\[\]]', Punctuation),

            # Numbers
            (r'<[0-9A-Fa-f]+>' + delimiter_end, Number.Hex),
            # Slight abuse: use Oct to signify any explicit base system
            (r'[0-9]+\#(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)'
             r'((e|E)[0-9]+)?' + delimiter_end, Number.Oct),
            (r'(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)((e|E)[0-9]+)?'
             + delimiter_end, Number.Float),
            (r'(\-|\+)?[0-9]+' + delimiter_end, Number.Integer),

            # References
            (r'\/%s' % valid_name, Name.Variable),

            # Names
            (valid_name, Name.Function),  # Anything else is executed

            # These keywords taken from
            # <http://www.math.ubc.ca/~cass/graphics/manual/pdf/a1.pdf>
            # Is there an authoritative list anywhere that doesn't involve
            # trawling documentation?
            (r'(false|true)' + delimiter_end, Keyword.Constant),

            # Conditionals / flow control
            (r'(eq|ne|g[et]|l[et]|and|or|not|if(?:else)?|for(?:all)?)' +
             delimiter_end, Keyword.Reserved),
            (words(
                ('abs', 'add', 'aload', 'arc', 'arcn', 'array', 'atan',
                 'begin', 'bind', 'ceiling', 'charpath', 'clip', 'closepath',
                 'concat', 'concatmatrix', 'copy', 'cos', 'currentlinewidth',
                 'currentmatrix', 'currentpoint', 'curveto', 'cvi', 'cvs',
                 'def', 'defaultmatrix', 'dict', 'dictstackoverflow', 'div',
                 'dtransform', 'dup', 'end', 'exch', 'exec', 'exit', 'exp',
                 'fill', 'findfont', 'floor', 'get', 'getinterval', 'grestore',
                 'gsave', 'gt', 'identmatrix', 'idiv', 'idtransform', 'index',
                 'invertmatrix', 'itransform', 'length', 'lineto', 'ln',
                 'load', 'log', 'loop', 'matrix', 'mod', 'moveto', 'mul',
                 'neg', 'newpath', 'pathforall', 'pathbbox', 'pop', 'print',
                 'pstack', 'put', 'quit', 'rand', 'rangecheck', 'rcurveto',
                 'repeat', 'restore', 'rlineto', 'rmoveto', 'roll', 'rotate',
                 'round', 'run', 'save', 'scale', 'scalefont', 'setdash',
                 'setfont', 'setgray', 'setlinecap', 'setlinejoin',
                 'setlinewidth', 'setmatrix', 'setrgbcolor', 'shfill', 'show',
                 'showpage', 'sin', 'sqrt', 'stack', 'stringwidth', 'stroke',
                 'strokepath', 'sub', 'syntaxerror', 'transform', 'translate',
                 'truncate', 'typecheck', 'undefined', 'undefinedfilename',
                 'undefinedresult'),
                suffix=delimiter_end), Name.Builtin),
            (r'\s+', Text),
        ],
        'stringliteral': [
            (r'[^()\\]+', String),
            (r'\\', String.Escape, 'escape'),
            (r'\(', String, '#push'),
            (r'\)', String, '#pop'),
        ],
        'escape': [
            (r'[0-8]{3}|n|r|t|b|f|\\|\(|\)', String.Escape, '#pop'),
            default('#pop'),
        ],
    }
Example #27
0
class CrystalLexer(ExtendedRegexLexer):
    """
    For `Crystal <http://crystal-lang.org>`_ source code.

    .. versionadded:: 2.2
    """

    name = 'Crystal'
    aliases = ['cr', 'crystal']
    filenames = ['*.cr']
    mimetypes = ['text/x-crystal']

    flags = re.DOTALL | re.MULTILINE

    def heredoc_callback(self, match, ctx):
        # okay, this is the hardest part of parsing Crystal...
        # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line

        start = match.start(1)
        yield start, Operator, match.group(1)  # <<-?
        yield match.start(2), String.Heredoc, match.group(2)  # quote ", ', `
        yield match.start(3), String.Delimiter, match.group(3)  # heredoc name
        yield match.start(4), String.Heredoc, match.group(4)  # quote again

        heredocstack = ctx.__dict__.setdefault('heredocstack', [])
        outermost = not bool(heredocstack)
        heredocstack.append((match.group(1) == '<<-', match.group(3)))

        ctx.pos = match.start(5)
        ctx.end = match.end(5)
        # this may find other heredocs
        for i, t, v in self.get_tokens_unprocessed(context=ctx):
            yield i, t, v
        ctx.pos = match.end()

        if outermost:
            # this is the outer heredoc again, now we can process them all
            for tolerant, hdname in heredocstack:
                lines = []
                for match in line_re.finditer(ctx.text, ctx.pos):
                    if tolerant:
                        check = match.group().strip()
                    else:
                        check = match.group().rstrip()
                    if check == hdname:
                        for amatch in lines:
                            yield amatch.start(), String.Heredoc, amatch.group(
                            )
                        yield match.start(), String.Delimiter, match.group()
                        ctx.pos = match.end()
                        break
                    else:
                        lines.append(match)
                else:
                    # end of heredoc not found -- error!
                    for amatch in lines:
                        yield amatch.start(), Error, amatch.group()
            ctx.end = len(ctx.text)
            del heredocstack[:]

    def gen_crystalstrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[imsx]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
            (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            # This allows arbitrary text after '\ for simplicity
            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
            (r':"', String.Symbol, 'simple-sym'),
            # Crystal doesn't have "symbol:"s but this simplifies function args
            (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-' + name] = [
                include('string-escaped' if name ==
                        'sym' else 'string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in \
                ('\\{', '\\}', '{}', 'cb'), \
                ('\\[', '\\]', '\\[\\]', 'sb'), \
                ('\\(', '\\)', '()', 'pa'), \
                ('<', '>', '<>', 'ab'):
            states[name + '-intp-string'] = [
                (r'\\[' + lbrace + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            states['strings'].append(
                (r'%' + lbrace, String.Other, name + '-intp-string'))
            states[name + '-string'] = [
                (r'\\[\\' + bracecc + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
            states['strings'].append(
                (r'%[wi]' + lbrace, String.Other, name + '-string'))
            states[name + '-regex'] = [
                (r'\\[\\' + bracecc + ']', String.Regex),
                (lbrace, String.Regex, '#push'),
                (rbrace + '[imsx]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + bracecc + ']', String.Regex),
                (r'[^\\#' + bracecc + ']+', String.Regex),
            ]
            states['strings'].append(
                (r'%r' + lbrace, String.Regex, name + '-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)', intp_regex_callback
             ),
            # regular fancy strings with qsw
            (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback),
        ]

        return states

    tokens = {
        'root': [
            (r'#.*?$', Comment.Single),
            # keywords
            (words('''
                abstract asm as begin break case do else elsif end ensure extend ifdef if
                include instance_sizeof next of pointerof private protected rescue return
                require sizeof super then typeof unless until when while with yield
            '''.split(),
                   suffix=r'\b'), Keyword),
            (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
            # start of function, class and module names
            (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
             bygroups(Keyword, Text, Name.Namespace), 'funcname'),
            (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
            (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
             bygroups(Keyword, Text, Name.Namespace), 'classname'),
            (r'(self|out|uninitialized)\b|(is_a|responds_to)\?',
             Keyword.Pseudo),
            # macros
            (words('''
                debugger record pp assert_responds_to spawn parallel
                getter setter property delegate def_hash def_equals def_equals_and_hash
                forward_missing_to
            '''.split(),
                   suffix=r'\b'), Name.Builtin.Pseudo),
            (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b',
             Name.Builtin.Pseudo),
            # builtins
            # http://crystal-lang.org/api/toplevel.html
            (words('''
                Object Value Struct Reference Proc Class Nil Symbol Enum Void
                Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64
                Float Float32 Float64 Char String
                Pointer Slice Range Exception Regex
                Mutex StaticArray Array Hash Set Tuple Deque Box Process File
                Dir Time Channel Concurrent Scheduler
                abort at_exit caller delay exit fork future get_stack_top gets
                lazy loop main p print printf puts
                raise rand read_line sleep sprintf system with_color
            '''.split(),
                   prefix=r'(?<!\.)',
                   suffix=r'\b'), Name.Builtin),
            # normal heredocs
            (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
             heredoc_callback),
            # empty string heredocs
            (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
            (r'__END__', Comment.Preproc, 'end-part'),
            # multiline regex (after keywords or assignments)
            (r'(?:^|(?<=[=<>~!:])|'
             r'(?<=(?:\s|;)when\s)|'
             r'(?<=(?:\s|;)or\s)|'
             r'(?<=(?:\s|;)and\s)|'
             r'(?<=\.index\s)|'
             r'(?<=\.scan\s)|'
             r'(?<=\.sub\s)|'
             r'(?<=\.sub!\s)|'
             r'(?<=\.gsub\s)|'
             r'(?<=\.gsub!\s)|'
             r'(?<=\.match\s)|'
             r'(?<=(?:\s|;)if\s)|'
             r'(?<=(?:\s|;)elsif\s)|'
             r'(?<=^when\s)|'
             r'(?<=^index\s)|'
             r'(?<=^scan\s)|'
             r'(?<=^sub\s)|'
             r'(?<=^gsub\s)|'
             r'(?<=^sub!\s)|'
             r'(?<=^gsub!\s)|'
             r'(?<=^match\s)|'
             r'(?<=^if\s)|'
             r'(?<=^elsif\s)'
             r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
            # multiline regex (in method calls or subscripts)
            (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
            # multiline regex (this time the funny no whitespace rule)
            (r'(\s+)(/)(?![\s=])', bygroups(Text,
                                            String.Regex), 'multiline-regex'),
            # lex numbers and ignore following regular expressions which
            # are division operators in fact (grrrr. i hate that. any
            # better ideas?)
            # since pygments 0.7 we also eat a "?" operator after numbers
            # so that the char operator does not work. Chars are not allowed
            # there so that you can use the ternary operator.
            # stupid example:
            #   x>=0?n[x]:""
            (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
             bygroups(Number.Oct, Text, Operator)),
            (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
             bygroups(Number.Hex, Text, Operator)),
            (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
             bygroups(Number.Bin, Text, Operator)),
            # 3 separate expressions for floats because any of the 3 optional
            # parts makes it a float
            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?'
             r'(?:_?f[0-9]+)?)(\s*)([/?])?',
             bygroups(Number.Float, Text, Operator)),
            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)'
             r'(?:_?f[0-9]+)?)(\s*)([/?])?',
             bygroups(Number.Float, Text, Operator)),
            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?'
             r'(?:_?f[0-9]+))(\s*)([/?])?',
             bygroups(Number.Float, Text, Operator)),
            (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
             bygroups(Number.Integer, Text, Operator)),
            # Names
            (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
            (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
            (r'\$\w+', Name.Variable.Global),
            (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
            (r'\$-[0adFiIlpvw]', Name.Variable.Global),
            (r'::', Operator),
            include('strings'),
            # chars
            (
                r'\?(\\[MC]-)*'  # modifiers
                r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
                r'(?!\w)',
                String.Char),
            (r'[A-Z][A-Z_]+\b', Name.Constant),
            # macro expansion
            (r'\{%', String.Interpol, 'in-macro-control'),
            (r'\{\{', String.Interpol, 'in-macro-expr'),
            # attributes
            (r'(@\[)(\s*)([A-Z]\w*)', bygroups(Operator, Text,
                                               Name.Decorator), 'in-attr'),
            # this is needed because Crystal attributes can look
            # like keywords (class) or like this: ` ?!?
            (words(CRYSTAL_OPERATORS,
                   prefix=r'(\.|::)'), bygroups(Operator, Name.Operator)),
            (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
             bygroups(Operator, Name)),
            # Names can end with [!?] unless it's "!="
            (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name),
            (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|'
             r'!~|&&?|\|\||\.{1,3})', Operator),
            (r'[-+/*%=<>&!^|~]=?', Operator),
            (r'[(){};,/?:\\]', Punctuation),
            (r'\s+', Text)
        ],
        'funcname': [(r'(?:([a-zA-Z_]\w*)(\.))?'
                      r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
                      r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
                      bygroups(Name.Class, Operator, Name.Function), '#pop'),
                     default('#pop')],
        'classname': [(r'[A-Z_]\w*', Name.Class),
                      (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))',
                       bygroups(Punctuation, Text, Name.Class, Text,
                                Punctuation)),
                      default('#pop')],
        'in-intp': [
            (r'\{', String.Interpol, '#push'),
            (r'\}', String.Interpol, '#pop'),
            include('root'),
        ],
        'string-intp': [
            (r'#\{', String.Interpol, 'in-intp'),
        ],
        'string-escaped':
        [(r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)],
        'string-intp-escaped': [
            include('string-intp'),
            include('string-escaped'),
        ],
        'interpolated-regex': [
            include('string-intp'),
            (r'[\\#]', String.Regex),
            (r'[^\\#]+', String.Regex),
        ],
        'interpolated-string': [
            include('string-intp'),
            (r'[\\#]', String.Other),
            (r'[^\\#]+', String.Other),
        ],
        'multiline-regex': [
            include('string-intp'),
            (r'\\\\', String.Regex),
            (r'\\/', String.Regex),
            (r'[\\#]', String.Regex),
            (r'[^\\/#]+', String.Regex),
            (r'/[imsx]*', String.Regex, '#pop'),
        ],
        'end-part': [(r'.+', Comment.Preproc, '#pop')],
        'in-macro-control': [
            (r'\{%', String.Interpol, '#push'),
            (r'%\}', String.Interpol, '#pop'),
            (r'for\b|in\b', Keyword),
            include('root'),
        ],
        'in-macro-expr': [
            (r'\{\{', String.Interpol, '#push'),
            (r'\}\}', String.Interpol, '#pop'),
            include('root'),
        ],
        'in-attr': [
            (r'\[', Operator, '#push'),
            (r'\]', Operator, '#pop'),
            include('root'),
        ],
    }
    tokens.update(gen_crystalstrings_rules())
Example #28
0
class AsymptoteLexer(RegexLexer):
    """
    For `Asymptote <http://asymptote.sf.net/>`_ source code.

    .. versionadded:: 1.2
    """
    name = 'Asymptote'
    aliases = ['asy', 'asymptote']
    filenames = ['*.asy']
    mimetypes = ['text/x-asymptote']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
        ],
        'statements': [
            # simple string (TeX friendly)
            (r'"(\\\\|\\"|[^"])*"', String),
            # C style string (with character escapes)
            (r"'", String, 'string'),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
            (r'(and|controls|tension|atleast|curl|if|else|while|for|do|'
             r'return|break|continue|struct|typedef|new|access|import|'
             r'unravel|from|include|quote|static|public|private|restricted|'
             r'this|explicit|true|false|null|cycle|newframe|operator)\b',
             Keyword),
            # Since an asy-type-name can be also an asy-function-name,
            # in the following we test if the string "  [a-zA-Z]" follows
            # the Keyword.Type.
            # Of course it is not perfect !
            (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|'
             r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|'
             r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|'
             r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|'
             r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|'
             r'path3|pen|picture|point|position|projection|real|revolution|'
             r'scaleT|scientific|segment|side|slice|splitface|string|surface|'
             r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|'
             r'transformation|tree|triangle|trilinear|triple|vector|'
             r'vertex|void)(?=\s+[a-zA-Z])', Keyword.Type),
            # Now the asy-type-name which are not asy-function-name
            # except yours !
            # Perhaps useless
            (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|'
             r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|'
             r'picture|position|real|revolution|slice|splitface|ticksgridT|'
             r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type),
            (r'[a-zA-Z_]\w*:(?!:)', Name.Label),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r"'", String, '#pop'),
            (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'\n', String),
            (r"[^\\'\n]+", String),  # all other characters
            (r'\\\n', String),
            (r'\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
    }

    def get_tokens_unprocessed(self, text):
        from testflows._core.contrib.pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name and value in ASYFUNCNAME:
                token = Name.Function
            elif token is Name and value in ASYVARNAME:
                token = Name.Variable
            yield index, token, value
Example #29
0
class Perl6Lexer(ExtendedRegexLexer):
    """
    For `Perl 6 <http://www.perl6.org>`_ source code.

    .. versionadded:: 2.0
    """

    name = 'Perl6'
    aliases = ['perl6', 'pl6']
    filenames = [
        '*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm',
        '*.p6m', '*.pm6', '*.t'
    ]
    mimetypes = ['text/x-perl6', 'application/x-perl6']
    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    PERL6_IDENTIFIER_RANGE = r"['\w:-]"

    PERL6_KEYWORDS = (
        'BEGIN',
        'CATCH',
        'CHECK',
        'CONTROL',
        'END',
        'ENTER',
        'FIRST',
        'INIT',
        'KEEP',
        'LAST',
        'LEAVE',
        'NEXT',
        'POST',
        'PRE',
        'START',
        'TEMP',
        'UNDO',
        'as',
        'assoc',
        'async',
        'augment',
        'binary',
        'break',
        'but',
        'cached',
        'category',
        'class',
        'constant',
        'contend',
        'continue',
        'copy',
        'deep',
        'default',
        'defequiv',
        'defer',
        'die',
        'do',
        'else',
        'elsif',
        'enum',
        'equiv',
        'exit',
        'export',
        'fail',
        'fatal',
        'for',
        'gather',
        'given',
        'goto',
        'grammar',
        'handles',
        'has',
        'if',
        'inline',
        'irs',
        'is',
        'last',
        'leave',
        'let',
        'lift',
        'loop',
        'looser',
        'macro',
        'make',
        'maybe',
        'method',
        'module',
        'multi',
        'my',
        'next',
        'of',
        'ofs',
        'only',
        'oo',
        'ors',
        'our',
        'package',
        'parsed',
        'prec',
        'proto',
        'readonly',
        'redo',
        'ref',
        'regex',
        'reparsed',
        'repeat',
        'require',
        'required',
        'return',
        'returns',
        'role',
        'rule',
        'rw',
        'self',
        'slang',
        'state',
        'sub',
        'submethod',
        'subset',
        'supersede',
        'take',
        'temp',
        'tighter',
        'token',
        'trusts',
        'try',
        'unary',
        'unless',
        'until',
        'use',
        'warn',
        'when',
        'where',
        'while',
        'will',
    )

    PERL6_BUILTINS = (
        'ACCEPTS',
        'HOW',
        'REJECTS',
        'VAR',
        'WHAT',
        'WHENCE',
        'WHERE',
        'WHICH',
        'WHO',
        'abs',
        'acos',
        'acosec',
        'acosech',
        'acosh',
        'acotan',
        'acotanh',
        'all',
        'any',
        'approx',
        'arity',
        'asec',
        'asech',
        'asin',
        'asinh',
        'assuming',
        'atan',
        'atan2',
        'atanh',
        'attr',
        'bless',
        'body',
        'by',
        'bytes',
        'caller',
        'callsame',
        'callwith',
        'can',
        'capitalize',
        'cat',
        'ceiling',
        'chars',
        'chmod',
        'chomp',
        'chop',
        'chr',
        'chroot',
        'circumfix',
        'cis',
        'classify',
        'clone',
        'close',
        'cmp_ok',
        'codes',
        'comb',
        'connect',
        'contains',
        'context',
        'cos',
        'cosec',
        'cosech',
        'cosh',
        'cotan',
        'cotanh',
        'count',
        'defined',
        'delete',
        'diag',
        'dies_ok',
        'does',
        'e',
        'each',
        'eager',
        'elems',
        'end',
        'eof',
        'eval',
        'eval_dies_ok',
        'eval_elsewhere',
        'eval_lives_ok',
        'evalfile',
        'exists',
        'exp',
        'first',
        'flip',
        'floor',
        'flunk',
        'flush',
        'fmt',
        'force_todo',
        'fork',
        'from',
        'getc',
        'gethost',
        'getlogin',
        'getpeername',
        'getpw',
        'gmtime',
        'graphs',
        'grep',
        'hints',
        'hyper',
        'im',
        'index',
        'infix',
        'invert',
        'is_approx',
        'is_deeply',
        'isa',
        'isa_ok',
        'isnt',
        'iterator',
        'join',
        'key',
        'keys',
        'kill',
        'kv',
        'lastcall',
        'lazy',
        'lc',
        'lcfirst',
        'like',
        'lines',
        'link',
        'lives_ok',
        'localtime',
        'log',
        'log10',
        'map',
        'max',
        'min',
        'minmax',
        'name',
        'new',
        'nextsame',
        'nextwith',
        'nfc',
        'nfd',
        'nfkc',
        'nfkd',
        'nok_error',
        'nonce',
        'none',
        'normalize',
        'not',
        'nothing',
        'ok',
        'once',
        'one',
        'open',
        'opendir',
        'operator',
        'ord',
        'p5chomp',
        'p5chop',
        'pack',
        'pair',
        'pairs',
        'pass',
        'perl',
        'pi',
        'pick',
        'plan',
        'plan_ok',
        'polar',
        'pop',
        'pos',
        'postcircumfix',
        'postfix',
        'pred',
        'prefix',
        'print',
        'printf',
        'push',
        'quasi',
        'quotemeta',
        'rand',
        're',
        'read',
        'readdir',
        'readline',
        'reduce',
        'reverse',
        'rewind',
        'rewinddir',
        'rindex',
        'roots',
        'round',
        'roundrobin',
        'run',
        'runinstead',
        'sameaccent',
        'samecase',
        'say',
        'sec',
        'sech',
        'sech',
        'seek',
        'shape',
        'shift',
        'sign',
        'signature',
        'sin',
        'sinh',
        'skip',
        'skip_rest',
        'sleep',
        'slurp',
        'sort',
        'splice',
        'split',
        'sprintf',
        'sqrt',
        'srand',
        'strand',
        'subst',
        'substr',
        'succ',
        'sum',
        'symlink',
        'tan',
        'tanh',
        'throws_ok',
        'time',
        'times',
        'to',
        'todo',
        'trim',
        'trim_end',
        'trim_start',
        'true',
        'truncate',
        'uc',
        'ucfirst',
        'undef',
        'undefine',
        'uniq',
        'unlike',
        'unlink',
        'unpack',
        'unpolar',
        'unshift',
        'unwrap',
        'use_ok',
        'value',
        'values',
        'vec',
        'version_lt',
        'void',
        'wait',
        'want',
        'wrap',
        'write',
        'zip',
    )

    PERL6_BUILTIN_CLASSES = (
        'Abstraction',
        'Any',
        'AnyChar',
        'Array',
        'Associative',
        'Bag',
        'Bit',
        'Blob',
        'Block',
        'Bool',
        'Buf',
        'Byte',
        'Callable',
        'Capture',
        'Char',
        'Class',
        'Code',
        'Codepoint',
        'Comparator',
        'Complex',
        'Decreasing',
        'Exception',
        'Failure',
        'False',
        'Grammar',
        'Grapheme',
        'Hash',
        'IO',
        'Increasing',
        'Int',
        'Junction',
        'KeyBag',
        'KeyExtractor',
        'KeyHash',
        'KeySet',
        'KitchenSink',
        'List',
        'Macro',
        'Mapping',
        'Match',
        'Matcher',
        'Method',
        'Module',
        'Num',
        'Object',
        'Ordered',
        'Ordering',
        'OrderingPair',
        'Package',
        'Pair',
        'Positional',
        'Proxy',
        'Range',
        'Rat',
        'Regex',
        'Role',
        'Routine',
        'Scalar',
        'Seq',
        'Set',
        'Signature',
        'Str',
        'StrLen',
        'StrPos',
        'Sub',
        'Submethod',
        'True',
        'UInt',
        'Undef',
        'Version',
        'Void',
        'Whatever',
        'bit',
        'bool',
        'buf',
        'buf1',
        'buf16',
        'buf2',
        'buf32',
        'buf4',
        'buf64',
        'buf8',
        'complex',
        'int',
        'int1',
        'int16',
        'int2',
        'int32',
        'int4',
        'int64',
        'int8',
        'num',
        'rat',
        'rat1',
        'rat16',
        'rat2',
        'rat32',
        'rat4',
        'rat64',
        'rat8',
        'uint',
        'uint1',
        'uint16',
        'uint2',
        'uint32',
        'uint4',
        'uint64',
        'uint8',
        'utf16',
        'utf32',
        'utf8',
    )

    PERL6_OPERATORS = (
        'X',
        'Z',
        'after',
        'also',
        'and',
        'andthen',
        'before',
        'cmp',
        'div',
        'eq',
        'eqv',
        'extra',
        'ff',
        'fff',
        'ge',
        'gt',
        'le',
        'leg',
        'lt',
        'm',
        'mm',
        'mod',
        'ne',
        'or',
        'orelse',
        'rx',
        's',
        'tr',
        'x',
        'xor',
        'xx',
        '++',
        '--',
        '**',
        '!',
        '+',
        '-',
        '~',
        '?',
        '|',
        '||',
        '+^',
        '~^',
        '?^',
        '^',
        '*',
        '/',
        '%',
        '%%',
        '+&',
        '+<',
        '+>',
        '~&',
        '~<',
        '~>',
        '?&',
        'gcd',
        'lcm',
        '+',
        '-',
        '+|',
        '+^',
        '~|',
        '~^',
        '?|',
        '?^',
        '~',
        '&',
        '^',
        'but',
        'does',
        '<=>',
        '..',
        '..^',
        '^..',
        '^..^',
        '!=',
        '==',
        '<',
        '<=',
        '>',
        '>=',
        '~~',
        '===',
        '!eqv',
        '&&',
        '||',
        '^^',
        '//',
        'min',
        'max',
        '??',
        '!!',
        'ff',
        'fff',
        'so',
        'not',
        '<==',
        '==>',
        '<<==',
        '==>>',
    )

    # Perl 6 has a *lot* of possible bracketing characters
    # this list was lifted from STD.pm6 (https://github.com/perl6/std)
    PERL6_BRACKETS = {
        u'\u0028': u'\u0029',
        u'\u003c': u'\u003e',
        u'\u005b': u'\u005d',
        u'\u007b': u'\u007d',
        u'\u00ab': u'\u00bb',
        u'\u0f3a': u'\u0f3b',
        u'\u0f3c': u'\u0f3d',
        u'\u169b': u'\u169c',
        u'\u2018': u'\u2019',
        u'\u201a': u'\u2019',
        u'\u201b': u'\u2019',
        u'\u201c': u'\u201d',
        u'\u201e': u'\u201d',
        u'\u201f': u'\u201d',
        u'\u2039': u'\u203a',
        u'\u2045': u'\u2046',
        u'\u207d': u'\u207e',
        u'\u208d': u'\u208e',
        u'\u2208': u'\u220b',
        u'\u2209': u'\u220c',
        u'\u220a': u'\u220d',
        u'\u2215': u'\u29f5',
        u'\u223c': u'\u223d',
        u'\u2243': u'\u22cd',
        u'\u2252': u'\u2253',
        u'\u2254': u'\u2255',
        u'\u2264': u'\u2265',
        u'\u2266': u'\u2267',
        u'\u2268': u'\u2269',
        u'\u226a': u'\u226b',
        u'\u226e': u'\u226f',
        u'\u2270': u'\u2271',
        u'\u2272': u'\u2273',
        u'\u2274': u'\u2275',
        u'\u2276': u'\u2277',
        u'\u2278': u'\u2279',
        u'\u227a': u'\u227b',
        u'\u227c': u'\u227d',
        u'\u227e': u'\u227f',
        u'\u2280': u'\u2281',
        u'\u2282': u'\u2283',
        u'\u2284': u'\u2285',
        u'\u2286': u'\u2287',
        u'\u2288': u'\u2289',
        u'\u228a': u'\u228b',
        u'\u228f': u'\u2290',
        u'\u2291': u'\u2292',
        u'\u2298': u'\u29b8',
        u'\u22a2': u'\u22a3',
        u'\u22a6': u'\u2ade',
        u'\u22a8': u'\u2ae4',
        u'\u22a9': u'\u2ae3',
        u'\u22ab': u'\u2ae5',
        u'\u22b0': u'\u22b1',
        u'\u22b2': u'\u22b3',
        u'\u22b4': u'\u22b5',
        u'\u22b6': u'\u22b7',
        u'\u22c9': u'\u22ca',
        u'\u22cb': u'\u22cc',
        u'\u22d0': u'\u22d1',
        u'\u22d6': u'\u22d7',
        u'\u22d8': u'\u22d9',
        u'\u22da': u'\u22db',
        u'\u22dc': u'\u22dd',
        u'\u22de': u'\u22df',
        u'\u22e0': u'\u22e1',
        u'\u22e2': u'\u22e3',
        u'\u22e4': u'\u22e5',
        u'\u22e6': u'\u22e7',
        u'\u22e8': u'\u22e9',
        u'\u22ea': u'\u22eb',
        u'\u22ec': u'\u22ed',
        u'\u22f0': u'\u22f1',
        u'\u22f2': u'\u22fa',
        u'\u22f3': u'\u22fb',
        u'\u22f4': u'\u22fc',
        u'\u22f6': u'\u22fd',
        u'\u22f7': u'\u22fe',
        u'\u2308': u'\u2309',
        u'\u230a': u'\u230b',
        u'\u2329': u'\u232a',
        u'\u23b4': u'\u23b5',
        u'\u2768': u'\u2769',
        u'\u276a': u'\u276b',
        u'\u276c': u'\u276d',
        u'\u276e': u'\u276f',
        u'\u2770': u'\u2771',
        u'\u2772': u'\u2773',
        u'\u2774': u'\u2775',
        u'\u27c3': u'\u27c4',
        u'\u27c5': u'\u27c6',
        u'\u27d5': u'\u27d6',
        u'\u27dd': u'\u27de',
        u'\u27e2': u'\u27e3',
        u'\u27e4': u'\u27e5',
        u'\u27e6': u'\u27e7',
        u'\u27e8': u'\u27e9',
        u'\u27ea': u'\u27eb',
        u'\u2983': u'\u2984',
        u'\u2985': u'\u2986',
        u'\u2987': u'\u2988',
        u'\u2989': u'\u298a',
        u'\u298b': u'\u298c',
        u'\u298d': u'\u298e',
        u'\u298f': u'\u2990',
        u'\u2991': u'\u2992',
        u'\u2993': u'\u2994',
        u'\u2995': u'\u2996',
        u'\u2997': u'\u2998',
        u'\u29c0': u'\u29c1',
        u'\u29c4': u'\u29c5',
        u'\u29cf': u'\u29d0',
        u'\u29d1': u'\u29d2',
        u'\u29d4': u'\u29d5',
        u'\u29d8': u'\u29d9',
        u'\u29da': u'\u29db',
        u'\u29f8': u'\u29f9',
        u'\u29fc': u'\u29fd',
        u'\u2a2b': u'\u2a2c',
        u'\u2a2d': u'\u2a2e',
        u'\u2a34': u'\u2a35',
        u'\u2a3c': u'\u2a3d',
        u'\u2a64': u'\u2a65',
        u'\u2a79': u'\u2a7a',
        u'\u2a7d': u'\u2a7e',
        u'\u2a7f': u'\u2a80',
        u'\u2a81': u'\u2a82',
        u'\u2a83': u'\u2a84',
        u'\u2a8b': u'\u2a8c',
        u'\u2a91': u'\u2a92',
        u'\u2a93': u'\u2a94',
        u'\u2a95': u'\u2a96',
        u'\u2a97': u'\u2a98',
        u'\u2a99': u'\u2a9a',
        u'\u2a9b': u'\u2a9c',
        u'\u2aa1': u'\u2aa2',
        u'\u2aa6': u'\u2aa7',
        u'\u2aa8': u'\u2aa9',
        u'\u2aaa': u'\u2aab',
        u'\u2aac': u'\u2aad',
        u'\u2aaf': u'\u2ab0',
        u'\u2ab3': u'\u2ab4',
        u'\u2abb': u'\u2abc',
        u'\u2abd': u'\u2abe',
        u'\u2abf': u'\u2ac0',
        u'\u2ac1': u'\u2ac2',
        u'\u2ac3': u'\u2ac4',
        u'\u2ac5': u'\u2ac6',
        u'\u2acd': u'\u2ace',
        u'\u2acf': u'\u2ad0',
        u'\u2ad1': u'\u2ad2',
        u'\u2ad3': u'\u2ad4',
        u'\u2ad5': u'\u2ad6',
        u'\u2aec': u'\u2aed',
        u'\u2af7': u'\u2af8',
        u'\u2af9': u'\u2afa',
        u'\u2e02': u'\u2e03',
        u'\u2e04': u'\u2e05',
        u'\u2e09': u'\u2e0a',
        u'\u2e0c': u'\u2e0d',
        u'\u2e1c': u'\u2e1d',
        u'\u2e20': u'\u2e21',
        u'\u3008': u'\u3009',
        u'\u300a': u'\u300b',
        u'\u300c': u'\u300d',
        u'\u300e': u'\u300f',
        u'\u3010': u'\u3011',
        u'\u3014': u'\u3015',
        u'\u3016': u'\u3017',
        u'\u3018': u'\u3019',
        u'\u301a': u'\u301b',
        u'\u301d': u'\u301e',
        u'\ufd3e': u'\ufd3f',
        u'\ufe17': u'\ufe18',
        u'\ufe35': u'\ufe36',
        u'\ufe37': u'\ufe38',
        u'\ufe39': u'\ufe3a',
        u'\ufe3b': u'\ufe3c',
        u'\ufe3d': u'\ufe3e',
        u'\ufe3f': u'\ufe40',
        u'\ufe41': u'\ufe42',
        u'\ufe43': u'\ufe44',
        u'\ufe47': u'\ufe48',
        u'\ufe59': u'\ufe5a',
        u'\ufe5b': u'\ufe5c',
        u'\ufe5d': u'\ufe5e',
        u'\uff08': u'\uff09',
        u'\uff1c': u'\uff1e',
        u'\uff3b': u'\uff3d',
        u'\uff5b': u'\uff5d',
        u'\uff5f': u'\uff60',
        u'\uff62': u'\uff63',
    }

    def _build_word_match(words,
                          boundary_regex_fragment=None,
                          prefix='',
                          suffix=''):
        if boundary_regex_fragment is None:
            return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
                suffix + r')\b'
        else:
            return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
                r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
                boundary_regex_fragment + r')'

    def brackets_callback(token_class):
        def callback(lexer, match, context):
            groups = match.groupdict()
            opening_chars = groups['delimiter']
            n_chars = len(opening_chars)
            adverbs = groups.get('adverbs')

            closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
            text = context.text

            if closer is None:  # it's not a mirrored character, which means we
                # just need to look for the next occurrence

                end_pos = text.find(opening_chars,
                                    match.start('delimiter') + n_chars)
            else:  # we need to look for the corresponding closing character,
                # keep nesting in mind
                closing_chars = closer * n_chars
                nesting_level = 1

                search_pos = match.start('delimiter')

                while nesting_level > 0:
                    next_open_pos = text.find(opening_chars,
                                              search_pos + n_chars)
                    next_close_pos = text.find(closing_chars,
                                               search_pos + n_chars)

                    if next_close_pos == -1:
                        next_close_pos = len(text)
                        nesting_level = 0
                    elif next_open_pos != -1 and next_open_pos < next_close_pos:
                        nesting_level += 1
                        search_pos = next_open_pos
                    else:  # next_close_pos < next_open_pos
                        nesting_level -= 1
                        search_pos = next_close_pos

                end_pos = next_close_pos

            if end_pos < 0:  # if we didn't find a closer, just highlight the
                # rest of the text in this class
                end_pos = len(text)

            if adverbs is not None and re.search(r':to\b', adverbs):
                heredoc_terminator = text[match.start('delimiter') +
                                          n_chars:end_pos]
                end_heredoc = re.search(
                    r'^\s*' + re.escape(heredoc_terminator) + r'\s*$',
                    text[end_pos:], re.MULTILINE)

                if end_heredoc:
                    end_pos += end_heredoc.end()
                else:
                    end_pos = len(text)

            yield match.start(), token_class, text[match.start():end_pos +
                                                   n_chars]
            context.pos = end_pos + n_chars

        return callback

    def opening_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter an opening brace and we're one level
        # below a token state, it means we need to increment
        # the nesting level for braces so we know later when
        # we should return to the token rules.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level += 1

    def closing_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter a free closing brace and we're one level
        # below a token state, it means we need to check the nesting
        # level to see if we need to return to the token state.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level -= 1
            if context.perl6_token_nesting_level == 0:
                stack.pop()

    def embedded_perl6_callback(lexer, match, context):
        context.perl6_token_nesting_level = 1
        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()
        context.stack.append('root')

    # If you're modifying these rules, be careful if you need to process '{' or '}'
    # characters. We have special logic for processing these characters (due to the fact
    # that you can nest Perl 6 code in regex blocks), so if you need to process one of
    # them, make sure you also process the corresponding one!
    tokens = {
        'common': [
            (r'#[`|=](?P<delimiter>(?P<first_char>[' +
             ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
             brackets_callback(Comment.Multiline)),
            (r'#[^\n]*$', Comment.Single),
            (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
            (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
            (r'^=.*?\n\s*?\n', Comment.Multiline),
            (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
             bygroups(Keyword, Name), 'token-sym-brackets'),
            (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' +
             PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword,
                                                      Name), 'pre-token'),
            # deal with a special case in the Perl 6 grammar (role q { ... })
            (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
            (_build_word_match(PERL6_KEYWORDS,
                               PERL6_IDENTIFIER_RANGE), Keyword),
            (_build_word_match(PERL6_BUILTIN_CLASSES,
                               PERL6_IDENTIFIER_RANGE,
                               suffix='(?::[UD])?'), Name.Builtin),
            (_build_word_match(PERL6_BUILTINS,
                               PERL6_IDENTIFIER_RANGE), Name.Builtin),
            # copied from PerlLexer
            (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE +
             u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable),
            (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
            (r'::\?\w+', Name.Variable.Global),
            (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE +
             u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
            (r'\$(?:<.*?>)+', Name.Variable),
            (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
             r'(?P=first_char)*)', brackets_callback(String)),
            # copied from PerlLexer
            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
            (r'0b[01]+(_[01]+)*', Number.Bin),
            (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
             Number.Float),
            (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
            (r'\d+(_\d+)*', Number.Integer),
            (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'm\w+(?=\()', Name),
            (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
             r'(?P=first_char)*)', brackets_callback(String.Regex)),
            (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
             String.Regex),
            (r'<[^\s=].*?\S>', String),
            (_build_word_match(PERL6_OPERATORS), Operator),
            (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
        ],
        'root': [
            include('common'),
            (r'\{', opening_brace_callback),
            (r'\}', closing_brace_callback),
            (r'.+?', Text),
        ],
        'pre-token': [
            include('common'),
            (r'\{', Text, ('#pop', 'token')),
            (r'.+?', Text),
        ],
        'token-sym-brackets': [
            (r'(?P<delimiter>(?P<first_char>[' +
             ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
             brackets_callback(Name), ('#pop', 'pre-token')),
            default(('#pop', 'pre-token')),
        ],
        'token': [
            (r'\}', Text, '#pop'),
            (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
            # make sure that quotes in character classes aren't treated as strings
            (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
            # make sure that '#' characters in quotes aren't treated as comments
            (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
            (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
            (r'#.*?$', Comment.Single),
            (r'\{', embedded_perl6_callback),
            ('.+?', String.Regex),
        ],
    }

    def analyse_text(text):
        def strip_pod(lines):
            in_pod = False
            stripped_lines = []

            for line in lines:
                if re.match(r'^=(?:end|cut)', line):
                    in_pod = False
                elif re.match(r'^=\w+', line):
                    in_pod = True
                elif not in_pod:
                    stripped_lines.append(line)

            return stripped_lines

        # XXX handle block comments
        lines = text.splitlines()
        lines = strip_pod(lines)
        text = '\n'.join(lines)

        if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
            return True

        saw_perl_decl = False
        rating = False

        # check for my/our/has declarations
        if re.search(
                r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
                r"+\s+)?[$@%&(]", text):
            rating = 0.8
            saw_perl_decl = True

        for line in lines:
            line = re.sub('#.*', '', line)
            if re.match(r'^\s*$', line):
                continue

            # match v6; use v6; use v6.0; use v6.0.0;
            if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
                return True
            # match class, module, role, enum, grammar declarations
            class_decl = re.match(
                r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)',
                line)
            if class_decl:
                if saw_perl_decl or class_decl.group('scope') is not None:
                    return True
                rating = 0.05
                continue
            break

        return rating

    def __init__(self, **options):
        super(Perl6Lexer, self).__init__(**options)
        self.encoding = options.get('encoding', 'utf-8')
Example #30
0
class GnuplotLexer(RegexLexer):
    """
    For `Gnuplot <http://gnuplot.info/>`_ plotting scripts.

    .. versionadded:: 0.11
    """

    name = 'Gnuplot'
    aliases = ['gnuplot']
    filenames = ['*.plot', '*.plt']
    mimetypes = ['text/x-gnuplot']

    tokens = {
        'root': [
            include('whitespace'),
            (_shortened('bi$nd'), Keyword, 'bind'),
            (_shortened_many('ex$it', 'q$uit'), Keyword, 'quit'),
            (_shortened('f$it'), Keyword, 'fit'),
            (r'(if)(\s*)(\()', bygroups(Keyword, Text, Punctuation), 'if'),
            (r'else\b', Keyword),
            (_shortened('pa$use'), Keyword, 'pause'),
            (_shortened_many('p$lot', 'rep$lot', 'sp$lot'), Keyword, 'plot'),
            (_shortened('sa$ve'), Keyword, 'save'),
            (_shortened('se$t'), Keyword, ('genericargs', 'optionarg')),
            (_shortened_many('sh$ow',
                             'uns$et'), Keyword, ('noargs', 'optionarg')),
            (_shortened_many('low$er', 'ra$ise', 'ca$ll', 'cd$', 'cl$ear',
                             'h$elp', '\\?$', 'hi$story', 'l$oad', 'pr$int',
                             'pwd$', 're$read', 'res$et', 'scr$eendump',
                             'she$ll', 'sy$stem',
                             'up$date'), Keyword, 'genericargs'),
            (_shortened_many('pwd$', 're$read', 'res$et', 'scr$eendump',
                             'she$ll', 'test$'), Keyword, 'noargs'),
            (r'([a-zA-Z_]\w*)(\s*)(=)', bygroups(Name.Variable, Text,
                                                 Operator), 'genericargs'),
            (r'([a-zA-Z_]\w*)(\s*\(.*?\)\s*)(=)',
             bygroups(Name.Function, Text, Operator), 'genericargs'),
            (r'@[a-zA-Z_]\w*', Name.Constant),  # macros
            (r';', Keyword),
        ],
        'comment': [
            (r'[^\\\n]', Comment),
            (r'\\\n', Comment),
            (r'\\', Comment),
            # don't add the newline to the Comment token
            default('#pop'),
        ],
        'whitespace': [
            ('#', Comment, 'comment'),
            (r'[ \t\v\f]+', Text),
        ],
        'noargs': [
            include('whitespace'),
            # semicolon and newline end the argument list
            (r';', Punctuation, '#pop'),
            (r'\n', Text, '#pop'),
        ],
        'dqstring': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
            (r'\n', String, '#pop'),  # newline ends the string too
        ],
        'sqstring': [
            (r"''", String),  # escaped single quote
            (r"'", String, '#pop'),
            (r"[^\\'\n]+", String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # normal backslash
            (r'\n', String, '#pop'),  # newline ends the string too
        ],
        'genericargs': [
            include('noargs'),
            (r'"', String, 'dqstring'),
            (r"'", String, 'sqstring'),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float),
            (r'(\d+\.\d*|\.\d+)', Number.Float),
            (r'-?\d+', Number.Integer),
            ('[,.~!%^&*+=|?:<>/-]', Operator),
            (r'[{}()\[\]]', Punctuation),
            (r'(eq|ne)\b', Operator.Word),
            (r'([a-zA-Z_]\w*)(\s*)(\()',
             bygroups(Name.Function, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
            (r'@[a-zA-Z_]\w*', Name.Constant),  # macros
            (r'\\\n', Text),
        ],
        'optionarg': [
            include('whitespace'),
            (_shortened_many(
                "a$ll", "an$gles", "ar$row", "au$toscale", "b$ars",
                "bor$der", "box$width", "cl$abel", "c$lip", "cn$trparam",
                "co$ntour", "da$ta", "data$file", "dg$rid3d",
                "du$mmy", "enc$oding", "dec$imalsign", "fit$", "font$path",
                "fo$rmat", "fu$nction", "fu$nctions", "g$rid", "hid$den3d",
                "his$torysize", "is$osamples", "k$ey", "keyt$itle", "la$bel",
                "li$nestyle", "ls$", "loa$dpath", "loc$ale", "log$scale",
                "mac$ros", "map$ping", "map$ping3d", "mar$gin", "lmar$gin",
                "rmar$gin", "tmar$gin", "bmar$gin", "mo$use", "multi$plot",
                "mxt$ics", "nomxt$ics", "mx2t$ics", "nomx2t$ics", "myt$ics",
                "nomyt$ics", "my2t$ics", "nomy2t$ics", "mzt$ics", "nomzt$ics",
                "mcbt$ics", "nomcbt$ics", "of$fsets", "or$igin", "o$utput",
                "pa$rametric", "pm$3d", "pal$ette", "colorb$ox", "p$lot",
                "poi$ntsize", "pol$ar", "pr$int", "obj$ect", "sa$mples",
                "si$ze", "st$yle", "su$rface", "table$", "t$erminal",
                "termo$ptions", "ti$cs", "ticsc$ale", "ticsl$evel", "timef$mt",
                "tim$estamp", "tit$le", "v$ariables", "ve$rsion", "vi$ew",
                "xyp$lane", "xda$ta", "x2da$ta", "yda$ta", "y2da$ta", "zda$ta",
                "cbda$ta", "xl$abel", "x2l$abel", "yl$abel", "y2l$abel",
                "zl$abel", "cbl$abel", "xti$cs", "noxti$cs", "x2ti$cs",
                "nox2ti$cs", "yti$cs", "noyti$cs", "y2ti$cs", "noy2ti$cs",
                "zti$cs", "nozti$cs", "cbti$cs", "nocbti$cs", "xdti$cs",
                "noxdti$cs", "x2dti$cs", "nox2dti$cs", "ydti$cs", "noydti$cs",
                "y2dti$cs", "noy2dti$cs", "zdti$cs", "nozdti$cs", "cbdti$cs",
                "nocbdti$cs", "xmti$cs", "noxmti$cs", "x2mti$cs", "nox2mti$cs",
                "ymti$cs", "noymti$cs", "y2mti$cs", "noy2mti$cs", "zmti$cs",
                "nozmti$cs", "cbmti$cs", "nocbmti$cs", "xr$ange", "x2r$ange",
                "yr$ange", "y2r$ange", "zr$ange", "cbr$ange", "rr$ange",
                "tr$ange", "ur$ange", "vr$ange", "xzeroa$xis", "x2zeroa$xis",
                "yzeroa$xis", "y2zeroa$xis", "zzeroa$xis", "zeroa$xis",
                "z$ero"), Name.Builtin, '#pop'),
        ],
        'bind': [
            ('!', Keyword, '#pop'),
            (_shortened('all$windows'), Name.Builtin),
            include('genericargs'),
        ],
        'quit': [
            (r'gnuplot\b', Keyword),
            include('noargs'),
        ],
        'fit': [
            (r'via\b', Name.Builtin),
            include('plot'),
        ],
        'if': [
            (r'\)', Punctuation, '#pop'),
            include('genericargs'),
        ],
        'pause': [
            (r'(mouse|any|button1|button2|button3)\b', Name.Builtin),
            (_shortened('key$press'), Name.Builtin),
            include('genericargs'),
        ],
        'plot': [
            (_shortened_many('ax$es', 'axi$s', 'bin$ary', 'ev$ery', 'i$ndex',
                             'mat$rix', 's$mooth', 'thru$', 't$itle',
                             'not$itle', 'u$sing', 'w$ith'), Name.Builtin),
            include('genericargs'),
        ],
        'save': [
            (_shortened_many('f$unctions', 's$et', 't$erminal',
                             'v$ariables'), Name.Builtin),
            include('genericargs'),
        ],
    }