Example #1
0
class BrainfuckLexer(RegexLexer):
    """
    Lexer for the esoteric `BrainFuck <http://www.muppetlabs.com/~breadbox/bf/>`_
    language.
    """

    name = 'Brainfuck'
    aliases = ['brainfuck', 'bf']
    filenames = ['*.bf', '*.b']
    mimetypes = ['application/x-brainfuck']

    tokens = {
        'common': [
            # use different colors for different instruction types
            (r'[.,]+', Name.Tag),
            (r'[+-]+', Name.Builtin),
            (r'[<>]+', Name.Variable),
            (r'[^.,+\-<>\[\]]+', Comment),
        ],
        'root': [
            (r'\[', Keyword, 'loop'),
            (r'\]', Error),
            include('common'),
        ],
        'loop': [
            (r'\[', Keyword, '#push'),
            (r'\]', Keyword, '#pop'),
            include('common'),
        ]
    }
Example #2
0
class VCLSnippetLexer(VCLLexer):
    """
    For Varnish Configuration Language snippets.

    .. versionadded:: 2.2
    """
    name = 'VCLSnippets'
    aliases = ['vclsnippets', 'vclsnippet']
    mimetypes = ['text/x-vclsnippet']
    filenames = []

    def analyse_text(text):
        # override method inherited from VCLLexer
        return 0

    tokens = {
        'snippetspre': [
            (r'\.\.\.+', Comment),
            (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|'
             r'storage)($|\.\*)', Name.Variable),
        ],
        'snippetspost': [
            (r'(backend)\b', Keyword.Reserved),
        ],
        'root': [
            include('snippetspre'),
            inherit,
            include('snippetspost'),
        ],
    }
Example #3
0
class ZephirLexer(RegexLexer):
    """
    For `Zephir language <http://zephir-lang.com/>`_ source code.

    Zephir is a compiled high level language aimed
    to the creation of C-extensions for PHP.

    .. versionadded:: 2.0
    """

    name = 'Zephir'
    aliases = ['zephir']
    filenames = ['*.zep']

    zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
    zephir_type = ['bit', 'bits', 'string']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
             r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
             r'empty)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
             r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
             r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
             r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
             r'window)\b', Name.Builtin),
            (r'[$a-zA-Z_][\w\\]*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Example #4
0
class TexLexer(RegexLexer):
    """
    Lexer for the TeX and LaTeX typesetting languages.
    """

    name = 'TeX'
    aliases = ['tex', 'latex']
    filenames = ['*.tex', '*.aux', '*.toc']
    mimetypes = ['text/x-tex', 'text/x-latex']

    tokens = {
        'general': [
            (r'%.*?\n', Comment),
            (r'[{}]', Name.Builtin),
            (r'[&_^]', Name.Builtin),
        ],
        'root': [
            (r'\\\[', String.Backtick, 'displaymath'),
            (r'\\\(', String, 'inlinemath'),
            (r'\$\$', String.Backtick, 'displaymath'),
            (r'\$', String, 'inlinemath'),
            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
            (r'\\$', Keyword),
            include('general'),
            (r'[^\\$%&_^{}]+', Text),
        ],
        'math': [
            (r'\\([a-zA-Z]+|.)', Name.Variable),
            include('general'),
            (r'[0-9]+', Number),
            (r'[-=!+*/()\[\]]', Operator),
            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
        ],
        'inlinemath': [
            (r'\\\)', String, '#pop'),
            (r'\$', String, '#pop'),
            include('math'),
        ],
        'displaymath': [
            (r'\\\]', String, '#pop'),
            (r'\$\$', String, '#pop'),
            (r'\$', Name.Builtin),
            include('math'),
        ],
        'command': [
            (r'\[.*?\]', Name.Attribute),
            (r'\*', Keyword),
            default('#pop'),
        ],
    }

    def analyse_text(text):
        for start in ("\\documentclass", "\\input", "\\documentstyle",
                      "\\relax"):
            if text[:len(start)] == start:
                return True
Example #5
0
class DarcsPatchLexer(RegexLexer):
    """
    DarcsPatchLexer is a lexer for the various versions of the darcs patch
    format.  Examples of this format are derived by commands such as
    ``darcs annotate --patch`` and ``darcs send``.

    .. versionadded:: 0.10
    """

    name = 'Darcs Patch'
    aliases = ['dpatch']
    filenames = ['*.dpatch', '*.darcspatch']

    DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
                       'replace')

    tokens = {
        'root': [
            (r'<', Operator),
            (r'>', Operator),
            (r'\{', Operator),
            (r'\}', Operator),
            (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
             bygroups(Operator, Keyword, Name, Text, Name, Operator,
                      Literal.Date, Text, Operator)),
            (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
             bygroups(Operator, Keyword, Name, Text, Name, Operator,
                      Literal.Date, Text), 'comment'),
            (r'New patches:', Generic.Heading),
            (r'Context:', Generic.Heading),
            (r'Patch bundle hash:', Generic.Heading),
            (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
             bygroups(Text, Keyword, Text)),
            (r'\+', Generic.Inserted, "insert"),
            (r'-', Generic.Deleted, "delete"),
            (r'.*\n', Text),
        ],
        'comment': [
            (r'[^\]].*\n', Comment),
            (r'\]', Operator, "#pop"),
        ],
        'specialText': [  # darcs add [_CODE_] special operators for clarity
            (r'\n', Text, "#pop"),  # line-based
            (r'\[_[^_]*_]', Operator),
        ],
        'insert': [
            include('specialText'),
            (r'\[', Generic.Inserted),
            (r'[^\n\[]+', Generic.Inserted),
        ],
        'delete': [
            include('specialText'),
            (r'\[', Generic.Deleted),
            (r'[^\n\[]+', Generic.Deleted),
        ],
    }
Example #6
0
def gen_elixir_string_rules(name, symbol, token):
    states = {}
    states['string_' + name] = [
        (r'[^#%s\\]+' % (symbol,), token),
        include('escapes'),
        (r'\\.', token),
        (r'(%s)' % (symbol,), bygroups(token), "#pop"),
        include('interpol')
    ]
    return states
Example #7
0
class GroffLexer(RegexLexer):
    """
    Lexer for the (g)roff typesetting language, supporting groff
    extensions. Mainly useful for highlighting manpage sources.

    .. versionadded:: 0.6
    """

    name = 'Groff'
    aliases = ['groff', 'nroff', 'man']
    filenames = ['*.[1234567]', '*.man']
    mimetypes = ['application/x-troff', 'text/troff']

    tokens = {
        'root': [
            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
            (r'\.', Punctuation, 'request'),
            # Regular characters, slurp till we find a backslash or newline
            (r'[^\\\n]+', Text, 'textline'),
            default('textline'),
        ],
        'textline': [
            include('escapes'),
            (r'[^\\\n]+', Text),
            (r'\n', Text, '#pop'),
        ],
        'escapes': [
            # groff has many ways to write escapes.
            (r'\\"[^\n]*', Comment),
            (r'\\[fn]\w', String.Escape),
            (r'\\\(.{2}', String.Escape),
            (r'\\.\[.*\]', String.Escape),
            (r'\\.', String.Escape),
            (r'\\\n', Text, 'request'),
        ],
        'request': [
            (r'\n', Text, '#pop'),
            include('escapes'),
            (r'"[^\n"]+"', String.Double),
            (r'\d+', Number),
            (r'\S+', String),
            (r'\s+', Text),
        ],
    }

    def analyse_text(text):
        if text[:1] != '.':
            return False
        if text[:3] == '.\\"':
            return True
        if text[:4] == '.TH ':
            return True
        if text[1:3].isalnum() and text[3].isspace():
            return 0.9
Example #8
0
class AwkLexer(RegexLexer):
    """
    For Awk scripts.

    .. versionadded:: 1.5
    """

    name = 'Awk'
    aliases = ['awk', 'gawk', 'mawk', 'nawk']
    filenames = ['*.awk']
    mimetypes = ['application/x-awk']

    tokens = {
        'commentsandwhitespace': [
            (r'\s+', Text),
            (r'#.*$', Comment.Single)
        ],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'\B', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [
            (r'\n', Text, '#pop')
        ],
        'root': [
            (r'^(?=\s|/)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
             r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(break|continue|do|while|exit|for|if|else|'
             r'return)\b', Keyword, 'slashstartsregex'),
            (r'function\b', Keyword.Declaration, 'slashstartsregex'),
            (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
             r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
             r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
             r'delete|system)\b', Keyword.Reserved),
            (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
             r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
             r'RSTART|RT|SUBSEP)\b', Name.Builtin),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Example #9
0
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
Example #10
0
class ClayLexer(RegexLexer):
    """
    For `Clay <http://claylabs.com/clay/>`_ source.

    .. versionadded:: 2.0
    """
    name = 'Clay'
    filenames = ['*.clay']
    aliases = ['clay']
    mimetypes = ['text/x-clay']
    tokens = {
        'root': [
            (r'\s', Text),
            (r'//.*?$', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
            (r'\b(public|private|import|as|record|variant|instance'
             r'|define|overload|default|external|alias'
             r'|rvalue|ref|forward|inline|noinline|forceinline'
             r'|enum|var|and|or|not|if|else|goto|return|while'
             r'|switch|case|break|continue|for|in|true|false|try|catch|throw'
             r'|finally|onerror|staticassert|eval|when|newtype'
             r'|__FILE__|__LINE__|__COLUMN__|__ARG__'
             r')\b', Keyword),
            (r'[~!%^&*+=|:<>/-]', Operator),
            (r'[#(){}\[\],;.]', Punctuation),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\b(true|false)\b', Name.Builtin),
            (r'(?i)[a-z_?][\w?]*', Name),
            (r'"""', String, 'tdqs'),
            (r'"', String, 'dqs'),
        ],
        'strings': [
            (r'(?i)\\(x[0-9a-f]{2}|.)', String.Escape),
            (r'.', String),
        ],
        'nl': [
            (r'\n', String),
        ],
        'dqs': [
            (r'"', String, '#pop'),
            include('strings'),
        ],
        'tdqs': [
            (r'"""', String, '#pop'),
            include('strings'),
            include('nl'),
        ],
    }
Example #11
0
def gen_elixir_sigstr_rules(term, token, interpol=True):
    if interpol:
        return [
            (r'[^#%s\\]+' % (term,), token),
            include('escapes'),
            (r'\\.', token),
            (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
            include('interpol')
        ]
    else:
        return [
            (r'[^%s\\]+' % (term,), token),
            (r'\\.', token),
            (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
        ]
Example #12
0
class NewspeakLexer(RegexLexer):
    """
    For `Newspeak <http://newspeaklanguage.org/>` syntax.

    .. versionadded:: 1.1
    """
    name = 'Newspeak'
    filenames = ['*.ns2']
    aliases = [
        'newspeak',
    ]
    mimetypes = ['text/x-newspeak']

    tokens = {
        'root':
        [(r'\b(Newsqueak2)\b', Keyword.Declaration), (r"'[^']*'", String),
         (r'\b(class)(\s+)(\w+)(\s*)',
          bygroups(Keyword.Declaration, Text, Name.Class, Text)),
         (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b',
          Keyword),
         (r'(\w+\:)(\s*)([a-zA-Z_]\w+)',
          bygroups(Name.Function, Text, Name.Variable)),
         (r'(\w+)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)),
         (r'<\w+>', Comment.Special),
         include('expressionstat'),
         include('whitespace')],
        'expressionstat': [
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'\d+', Number.Integer),
            (r':\w+', Name.Variable),
            (r'(\w+)(::)', bygroups(Name.Variable, Operator)),
            (r'\w+:', Name.Function),
            (r'\w+', Name.Variable),
            (r'\(|\)', Punctuation),
            (r'\[|\]', Punctuation),
            (r'\{|\}', Punctuation),
            (r'(\^|\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-|:)', Operator),
            (r'\.|;', Punctuation),
            include('whitespace'),
            include('literals'),
        ],
        'literals':
        [(r'\$.', String), (r"'[^']*'", String), (r"#'[^']*'", String.Symbol),
         (r"#\w+:?", String.Symbol),
         (r"#(\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-)+", String.Symbol)],
        'whitespace': [(r'\s+', Text), (r'"[^"]*"', Comment)],
    }
Example #13
0
class OdinLexer(AtomsLexer):
    """
    Lexer for ODIN syntax.

    .. versionadded:: 2.1
    """
    name = 'ODIN'
    aliases = ['odin']
    filenames = ['*.odin']
    mimetypes = ['text/odin']

    tokens = {
        'path': [
            (r'>', Punctuation, '#pop'),
            # attribute name
            (r'[a-z_]\w*', Name.Class),
            (r'/', Punctuation),
            (r'\[', Punctuation, 'key'),
            (r'\s*,\s*', Punctuation, '#pop'),
            (r'\s+', Text, '#pop'),
        ],
        'key': [
            include('values'),
            (r'\]', Punctuation, '#pop'),
        ],
        'type_cast': [
            (r'\)', Punctuation, '#pop'),
            (r'[^)]+', Name.Class),
        ],
        'root': [
            include('whitespace'),
            (r'([Tt]rue|[Ff]alse)', Literal),
            include('values'),
            # x-ref path
            (r'/', Punctuation, 'path'),
            # x-ref path starting with key
            (r'\[', Punctuation, 'key'),
            # attribute name
            (r'[a-z_]\w*', Name.Class),
            (r'=', Operator),
            (r'\(', Punctuation, 'type_cast'),
            (r',', Punctuation),
            (r'<', Punctuation),
            (r'>', Punctuation),
            (r';', Punctuation),
        ],
    }
Example #14
0
class ParaSailLexer(RegexLexer):
    """
    For `ParaSail <http://www.parasail-lang.org>`_ source code.

    .. versionadded:: 2.1
    """

    name = 'ParaSail'
    aliases = ['parasail']
    filenames = ['*.psi', '*.psl']
    mimetypes = ['text/x-parasail']

    flags = re.MULTILINE

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'//.*?\n', Comment.Single),
            (r'\b(and|or|xor)=', Operator.Word),
            (r'\b(and(\s+then)?|or(\s+else)?|xor|rem|mod|'
             r'(is|not)\s+null)\b', Operator.Word),
            # Keywords
            (
                r'\b(abs|abstract|all|block|class|concurrent|const|continue|'
                r'each|end|exit|extends|exports|forward|func|global|implements|'
                r'import|in|interface|is|lambda|locked|new|not|null|of|op|'
                r'optional|private|queued|ref|return|reverse|separate|some|'
                r'type|until|var|with|'
                # Control flow
                r'if|then|else|elsif|case|for|while|loop)\b',
                Keyword.Reserved),
            (r'(abstract\s+)?(interface|class|op|func|type)',
             Keyword.Declaration),
            # Literals
            (r'"[^"]*"', String),
            (r'\\[\'ntrf"0]', String.Escape),
            (r'#[a-zA-Z]\w*', Literal),  # Enumeration
            include('numbers'),
            (r"'[^']'", String.Char),
            (r'[a-zA-Z]\w*', Name),
            # Operators and Punctuation
            (r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|'
             r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\|=|\||/=|\+|-|\*|/|'
             r'\.\.|<\.\.|\.\.<|<\.\.<)', Operator),
            (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', Punctuation),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'\d[0-9_]*#[0-9a-fA-F][0-9a-fA-F_]*#', Number.Hex),  # any base
            (r'0[xX][0-9a-fA-F][0-9a-fA-F_]*', Number.Hex),  # C-like hex
            (r'0[bB][01][01_]*', Number.Bin),  # C-like bin
            (
                r'\d[0-9_]*\.\d[0-9_]*[eE][+-]\d[0-9_]*',  # float exp
                Number.Float),
            (r'\d[0-9_]*\.\d[0-9_]*', Number.Float),  # float
            (r'\d[0-9_]*', Number.Integer),  # integer
        ],
    }
Example #15
0
class BSTLexer(RegexLexer):
    """
    A lexer for BibTeX bibliography styles.

    .. versionadded:: 2.2
    """

    name = 'BST'
    aliases = ['bst', 'bst-pybtex']
    filenames = ['*.bst']
    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            include('whitespace'),
            (words(['read', 'sort']), Keyword),
            (words(['execute', 'integers', 'iterate', 'reverse',
                    'strings']), Keyword, ('group')),
            (words(['function', 'macro']), Keyword, ('group', 'group')),
            (words(['entry']), Keyword, ('group', 'group', 'group')),
        ],
        'group': [
            include('whitespace'),
            (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
        ],
        'group-end': [
            include('whitespace'),
            (r'\}', Punctuation, '#pop'),
        ],
        'body': [
            include('whitespace'),
            (r"\'[^#\"\{\}\s]+", Name.Function),
            (r'[^#\"\{\}\s]+\$', Name.Builtin),
            (r'[^#\"\{\}\s]+', Name.Variable),
            (r'"[^\"]*"', String),
            (r'#-?\d+', Number),
            (r'\{', Punctuation, ('group-end', 'body')),
            default('#pop'),
        ],
        'whitespace': [
            (r'\s+', Text),
            ('%.*?$', Comment.SingleLine),
        ],
    }
Example #16
0
class MscgenLexer(RegexLexer):
    """
    For `Mscgen <http://www.mcternan.me.uk/mscgen/>`_ files.

    .. versionadded:: 1.6
    """
    name = 'Mscgen'
    aliases = ['mscgen', 'msc']
    filenames = ['*.msc']

    _var = r'(\w+|"(?:\\"|[^"])*")'

    tokens = {
        'root': [
            (r'msc\b', Keyword.Type),
            # Options
            (r'(hscale|HSCALE|width|WIDTH|wordwraparcs|WORDWRAPARCS'
             r'|arcgradient|ARCGRADIENT)\b', Name.Property),
            # Operators
            (r'(abox|ABOX|rbox|RBOX|box|BOX|note|NOTE)\b', Operator.Word),
            (r'(\.|-|\|){3}', Keyword),
            (r'(?:-|=|\.|:){2}'
             r'|<<=>>|<->|<=>|<<>>|<:>'
             r'|->|=>>|>>|=>|:>|-x|-X'
             r'|<-|<<=|<<|<=|<:|x-|X-|=', Operator),
            # Names
            (r'\*', Name.Builtin),
            (_var, Name.Variable),
            # Other
            (r'\[', Punctuation, 'attrs'),
            (r'\{|\}|,|;', Punctuation),
            include('comments')
        ],
        'attrs': [(r'\]', Punctuation, '#pop'),
                  (_var + r'(\s*)(=)(\s*)' + _var,
                   bygroups(Name.Attribute, Text.Whitespace, Operator,
                            Text.Whitespace, String)), (r',', Punctuation),
                  include('comments')],
        'comments': [(r'(?://|#).*?\n', Comment.Single),
                     (r'/\*(?:.|\n)*?\*/', Comment.Multiline),
                     (r'[ \t\r\n]+', Text.Whitespace)]
    }
Example #17
0
class EiffelLexer(RegexLexer):
    """
    For `Eiffel <http://www.eiffel.com>`_ source code.

    .. versionadded:: 2.0
    """
    name = 'Eiffel'
    aliases = ['eiffel']
    filenames = ['*.e']
    mimetypes = ['text/x-eiffel']

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'--.*?\n', Comment.Single),
            (r'[^\S\n]+', Text),
            # Please note that keyword and operator are case insensitive.
            (r'(?i)(true|false|void|current|result|precursor)\b',
             Keyword.Constant),
            (r'(?i)(and(\s+then)?|not|xor|implies|or(\s+else)?)\b',
             Operator.Word),
            (words(
                ('across', 'agent', 'alias', 'all', 'as', 'assign', 'attached',
                 'attribute', 'check', 'class', 'convert', 'create', 'debug',
                 'deferred', 'detachable', 'do', 'else', 'elseif', 'end',
                 'ensure', 'expanded', 'export', 'external', 'feature', 'from',
                 'frozen', 'if', 'inherit', 'inspect', 'invariant', 'like',
                 'local', 'loop', 'none', 'note', 'obsolete', 'old', 'once',
                 'only', 'redefine', 'rename', 'require', 'rescue', 'retry',
                 'select', 'separate', 'then', 'undefine', 'until', 'variant',
                 'when'),
                prefix=r'(?i)\b',
                suffix=r'\b'), Keyword.Reserved),
            (r'"\[(([^\]%]|\n)|%(.|\n)|\][^"])*?\]"', String),
            (r'"([^"%\n]|%.)*?"', String),
            include('numbers'),
            (r"'([^'%]|%'|%%)'", String.Char),
            (r"(//|\\\\|>=|<=|:=|/=|~|/~|[\\?!#%&@|+/\-=>*$<^\[\]])",
             Operator),
            (r"([{}():;,.])", Punctuation),
            (r'([a-z]\w*)|([A-Z][A-Z0-9_]*[a-z]\w*)', Name),
            (r'([A-Z][A-Z0-9_]*)', Name.Class),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
            (r'0[bB][01]+', Number.Bin),
            (r'0[cC][0-7]+', Number.Oct),
            (r'([0-9]+\.[0-9]*)|([0-9]*\.[0-9]+)', Number.Float),
            (r'[0-9]+', Number.Integer),
        ],
    }
Example #18
0
 def _make_follow_state(compound,
                        _label=_label,
                        _label_compound=_label_compound,
                        _nl=_nl,
                        _space=_space,
                        _start_label=_start_label,
                        _token=_token,
                        _token_compound=_token_compound,
                        _ws=_ws):
     suffix = '/compound' if compound else ''
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'%s([%s]*)(%s)(.*)' %
          (_start_label, _ws, _label_compound if compound else _label),
          bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
         include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'),
         (r'\|\|?|&&?', Punctuation, '#pop'),
         include('text')
     ]
     return state
Example #19
0
class AmbientTalkLexer(RegexLexer):
    """
    Lexer for `AmbientTalk <https://code.google.com/p/ambienttalk>`_ source code.

    .. versionadded:: 2.0
    """
    name = 'AmbientTalk'
    filenames = ['*.at']
    aliases = ['at', 'ambienttalk', 'ambienttalk/2']
    mimetypes = ['text/x-ambienttalk']

    flags = re.MULTILINE | re.DOTALL

    builtin = words(('if:', 'then:', 'else:', 'when:', 'whenever:',
                     'discovered:', 'disconnected:', 'reconnected:',
                     'takenOffline:', 'becomes:', 'export:', 'as:', 'object:',
                     'actor:', 'mirror:', 'taggedAs:', 'mirroredBy:', 'is:'))
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(def|deftype|import|alias|exclude)\b', Keyword),
            (builtin, Name.Builtin),
            (r'(true|false|nil)\b', Keyword.Constant),
            (r'(~|lobby|jlobby|/)\.', Keyword.Constant, 'namespace'),
            (r'"(\\\\|\\"|[^"])*"', String),
            (r'\|', Punctuation, 'arglist'),
            (r'<:|[*^!%&<>+=,./?-]|:=', Operator),
            (r"`[a-zA-Z_]\w*", String.Symbol),
            (r"[a-zA-Z_]\w*:", Name.Function),
            (r"[{}()\[\];`]", Punctuation),
            (r'(self|super)\b', Name.Variable.Instance),
            (r"[a-zA-Z_]\w*", Name.Variable),
            (r"@[a-zA-Z_]\w*", Name.Class),
            (r"@\[", Name.Class, 'annotations'),
            include('numbers'),
        ],
        'numbers': [(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
                    (r'\d+', Number.Integer)],
        'namespace': [(r'[a-zA-Z_]\w*\.', Name.Namespace),
                      (r'[a-zA-Z_]\w*:', Name.Function, '#pop'),
                      (r'[a-zA-Z_]\w*(?!\.)', Name.Function, '#pop')],
        'annotations': [(r"(.*?)\]", Name.Class, '#pop')],
        'arglist': [
            (r'\|', Punctuation, '#pop'),
            (r'\s*(,)\s*', Punctuation),
            (r'[a-zA-Z_]\w*', Name.Variable),
        ],
    }
Example #20
0
class SilverLexer(RegexLexer):
    """
    For `Silver <https://bitbucket.org/viperproject/silver>`_ source code.

    .. versionadded:: 2.2
    """
    name = 'Silver'
    aliases = ['silver']
    filenames = ['*.sil', '*.vpr']

    tokens = {
        'root': [
            # Whitespace and Comments
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
            (r'//[/!](.*?)\n', Comment.Doc),
            (r'//(.*?)\n', Comment.Single),
            (r'/\*', Comment.Multiline, 'comment'),
            (words(('result', 'true', 'false', 'null', 'method', 'function',
                    'predicate', 'program', 'domain', 'axiom', 'var',
                    'returns', 'field', 'define', 'requires', 'ensures',
                    'invariant', 'fold', 'unfold', 'inhale', 'exhale', 'new',
                    'assert', 'assume', 'goto', 'while', 'if', 'elseif',
                    'else', 'fresh', 'constraining', 'Seq', 'Set', 'Multiset',
                    'union', 'intersection', 'setminus', 'subset', 'unfolding',
                    'in', 'old', 'forall', 'exists', 'acc', 'wildcard',
                    'write', 'none', 'epsilon', 'perm', 'unique', 'apply',
                    'package', 'folding', 'label', 'forperm'),
                   suffix=r'\b'), Keyword),
            (words(('Int', 'Perm', 'Bool', 'Ref'),
                   suffix=r'\b'), Keyword.Type),
            include('numbers'),
            (r'[!%&*+=|?:<>/\-\[\]]', Operator),
            (r'([{}():;,.])', Punctuation),
            # Identifier
            (r'[\w$]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'numbers': [
            (r'[0-9]+', Number.Integer),
        ],
    }
Example #21
0
class ScdocLexer(RegexLexer):
    """
    `scdoc` is a simple man page generator for POSIX systems written in C99.
    https://git.sr.ht/~sircmpwn/scdoc

    .. versionadded:: 2.5
    """
    name = 'scdoc'
    aliases = ['scdoc', 'scd']
    filenames = ['*.scd', '*.scdoc']
    flags = re.MULTILINE

    tokens = {
        'root': [
            # comment
            (r'^(;.+\n)', bygroups(Comment)),

            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)(\.+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # underlines
            (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)),
            # bold
            (r'(\s)(\*[^\*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }
Example #22
0
class EbnfLexer(RegexLexer):
    """
    Lexer for `ISO/IEC 14977 EBNF
    <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
    grammars.

    .. versionadded:: 2.0
    """

    name = 'EBNF'
    aliases = ['ebnf']
    filenames = ['*.ebnf']
    mimetypes = ['text/x-ebnf']

    tokens = {
        'root': [
            include('whitespace'),
            include('comment_start'),
            include('identifier'),
            (r'=', Operator, 'production'),
        ],
        'production': [
            include('whitespace'),
            include('comment_start'),
            include('identifier'),
            (r'"[^"]*"', String.Double),
            (r"'[^']*'", String.Single),
            (r'(\?[^?]*\?)', Name.Entity),
            (r'[\[\]{}(),|]', Punctuation),
            (r'-', Operator),
            (r';', Punctuation, '#pop'),
            (r'\.', Punctuation, '#pop'),
        ],
        'whitespace': [
            (r'\s+', Text),
        ],
        'comment_start': [
            (r'\(\*', Comment.Multiline, 'comment'),
        ],
        'comment': [
            (r'[^*)]', Comment.Multiline),
            include('comment_start'),
            (r'\*\)', Comment.Multiline, '#pop'),
            (r'[*)]', Comment.Multiline),
        ],
        'identifier': [
            (r'([a-zA-Z][\w \-]*)', Keyword),
        ],
    }
Example #23
0
 def _make_arithmetic_state(compound,
                            _nl=_nl,
                            _punct=_punct,
                            _string=_string,
                            _variable=_variable,
                            _ws=_ws):
     op = r'=+\-*/!~'
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex),
               (r'\d+', Number.Integer), (r'[(),]+', Punctuation),
               (r'([%s]|%%|\^\^)+' % op, Operator),
               (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
                (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
                 r'[^)]' if compound else r'[\w\W]'),
                using(this, state='variable')),
               (r'(?=[\x00|&])', Text, '#pop'),
               include('follow')]
     return state
Example #24
0
class BoogieLexer(RegexLexer):
    """
    For `Boogie <https://boogie.codeplex.com/>`_ source code.

    .. versionadded:: 2.1
    """
    name = 'Boogie'
    aliases = ['boogie']
    filenames = ['*.bpl']

    tokens = {
        'root': [
            # Whitespace and Comments
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
            (r'//[/!](.*?)\n', Comment.Doc),
            (r'//(.*?)\n', Comment.Single),
            (r'/\*', Comment.Multiline, 'comment'),
            (words(('axiom', 'break', 'call', 'ensures', 'else', 'exists',
                    'function', 'forall', 'if', 'invariant', 'modifies',
                    'procedure', 'requires', 'then', 'var', 'while'),
                   suffix=r'\b'), Keyword),
            (words(('const', ), suffix=r'\b'), Keyword.Reserved),
            (words(('bool', 'int', 'ref'), suffix=r'\b'), Keyword.Type),
            include('numbers'),
            (r"(>=|<=|:=|!=|==>|&&|\|\||[+/\-=>*<\[\]])", Operator),
            (r"([{}():;,.])", Punctuation),
            # Identifier
            (r'[a-zA-Z_]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'numbers': [
            (r'[0-9]+', Number.Integer),
        ],
    }
Example #25
0
class JsonBareObjectLexer(JsonLexer):
    """
    For JSON data structures (with missing object curly braces).

    .. versionadded:: 2.2
    """

    name = 'JSONBareObject'
    aliases = ['json-object']
    filenames = []
    mimetypes = ['application/json-object']

    tokens = {
        'root': [
            (r'\}', Error),
            include('objectvalue'),
        ],
        'objectattribute': [
            (r'\}', Error),
            inherit,
        ],
    }
Example #26
0
class CMakeLexer(RegexLexer):
    """
    Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files.

    .. versionadded:: 1.2
    """
    name = 'CMake'
    aliases = ['cmake']
    filenames = ['*.cmake', 'CMakeLists.txt']
    mimetypes = ['text/x-cmake']

    tokens = {
        'root': [
            # (r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|'
            # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|'
            # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|'
            # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|'
            # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|'
            # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|'
            # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|'
            # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|'
            # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|'
            # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|'
            # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|'
            # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|'
            # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|'
            # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|'
            # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|'
            # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|'
            # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|'
            # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|'
            # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|'
            # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|'
            # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|'
            # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|'
            # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|'
            # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|'
            # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|'
            # r'COUNTARGS)\b', Name.Builtin, 'args'),
            (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text,
                                              Punctuation), 'args'),
            include('keywords'),
            include('ws')
        ],
        'args': [
            (r'\(', Punctuation, '#push'),
            (r'\)', Punctuation, '#pop'),
            (r'(\$\{)(.+?)(\})', bygroups(Operator, Name.Variable, Operator)),
            (r'(\$ENV\{)(.+?)(\})', bygroups(Operator, Name.Variable, Operator)),
            (r'(\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator)),
            (r'(?s)".*?"', String.Double),
            (r'\\\S+', String),
            (r'[^)$"# \t\n]+', String),
            (r'\n', Text),  # explicitly legal
            include('keywords'),
            include('ws')
        ],
        'string': [

        ],
        'keywords': [
            (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|'
             r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword),
        ],
        'ws': [
            (r'[ \t]+', Text),
            (r'#.*\n', Comment),
        ]
    }

    def analyse_text(text):
        exp = r'^ *CMAKE_MINIMUM_REQUIRED *\( *VERSION *\d(\.\d)* *( FATAL_ERROR)? *\) *$'
        if re.search(exp, text, flags=re.MULTILINE | re.IGNORECASE):
            return 0.8
        return 0.0
Example #27
0
class Perl6Lexer(ExtendedRegexLexer):
    """
    For `Perl 6 <http://www.perl6.org>`_ source code.

    .. versionadded:: 2.0
    """

    name = 'Perl6'
    aliases = ['perl6', 'pl6']
    filenames = [
        '*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm',
        '*.p6m', '*.pm6', '*.t'
    ]
    mimetypes = ['text/x-perl6', 'application/x-perl6']
    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    PERL6_IDENTIFIER_RANGE = r"['\w:-]"

    PERL6_KEYWORDS = (
        'BEGIN',
        'CATCH',
        'CHECK',
        'CONTROL',
        'END',
        'ENTER',
        'FIRST',
        'INIT',
        'KEEP',
        'LAST',
        'LEAVE',
        'NEXT',
        'POST',
        'PRE',
        'START',
        'TEMP',
        'UNDO',
        'as',
        'assoc',
        'async',
        'augment',
        'binary',
        'break',
        'but',
        'cached',
        'category',
        'class',
        'constant',
        'contend',
        'continue',
        'copy',
        'deep',
        'default',
        'defequiv',
        'defer',
        'die',
        'do',
        'else',
        'elsif',
        'enum',
        'equiv',
        'exit',
        'export',
        'fail',
        'fatal',
        'for',
        'gather',
        'given',
        'goto',
        'grammar',
        'handles',
        'has',
        'if',
        'inline',
        'irs',
        'is',
        'last',
        'leave',
        'let',
        'lift',
        'loop',
        'looser',
        'macro',
        'make',
        'maybe',
        'method',
        'module',
        'multi',
        'my',
        'next',
        'of',
        'ofs',
        'only',
        'oo',
        'ors',
        'our',
        'package',
        'parsed',
        'prec',
        'proto',
        'readonly',
        'redo',
        'ref',
        'regex',
        'reparsed',
        'repeat',
        'require',
        'required',
        'return',
        'returns',
        'role',
        'rule',
        'rw',
        'self',
        'slang',
        'state',
        'sub',
        'submethod',
        'subset',
        'supersede',
        'take',
        'temp',
        'tighter',
        'token',
        'trusts',
        'try',
        'unary',
        'unless',
        'until',
        'use',
        'warn',
        'when',
        'where',
        'while',
        'will',
    )

    PERL6_BUILTINS = (
        'ACCEPTS',
        'HOW',
        'REJECTS',
        'VAR',
        'WHAT',
        'WHENCE',
        'WHERE',
        'WHICH',
        'WHO',
        'abs',
        'acos',
        'acosec',
        'acosech',
        'acosh',
        'acotan',
        'acotanh',
        'all',
        'any',
        'approx',
        'arity',
        'asec',
        'asech',
        'asin',
        'asinh',
        'assuming',
        'atan',
        'atan2',
        'atanh',
        'attr',
        'bless',
        'body',
        'by',
        'bytes',
        'caller',
        'callsame',
        'callwith',
        'can',
        'capitalize',
        'cat',
        'ceiling',
        'chars',
        'chmod',
        'chomp',
        'chop',
        'chr',
        'chroot',
        'circumfix',
        'cis',
        'classify',
        'clone',
        'close',
        'cmp_ok',
        'codes',
        'comb',
        'connect',
        'contains',
        'context',
        'cos',
        'cosec',
        'cosech',
        'cosh',
        'cotan',
        'cotanh',
        'count',
        'defined',
        'delete',
        'diag',
        'dies_ok',
        'does',
        'e',
        'each',
        'eager',
        'elems',
        'end',
        'eof',
        'eval',
        'eval_dies_ok',
        'eval_elsewhere',
        'eval_lives_ok',
        'evalfile',
        'exists',
        'exp',
        'first',
        'flip',
        'floor',
        'flunk',
        'flush',
        'fmt',
        'force_todo',
        'fork',
        'from',
        'getc',
        'gethost',
        'getlogin',
        'getpeername',
        'getpw',
        'gmtime',
        'graphs',
        'grep',
        'hints',
        'hyper',
        'im',
        'index',
        'infix',
        'invert',
        'is_approx',
        'is_deeply',
        'isa',
        'isa_ok',
        'isnt',
        'iterator',
        'join',
        'key',
        'keys',
        'kill',
        'kv',
        'lastcall',
        'lazy',
        'lc',
        'lcfirst',
        'like',
        'lines',
        'link',
        'lives_ok',
        'localtime',
        'log',
        'log10',
        'map',
        'max',
        'min',
        'minmax',
        'name',
        'new',
        'nextsame',
        'nextwith',
        'nfc',
        'nfd',
        'nfkc',
        'nfkd',
        'nok_error',
        'nonce',
        'none',
        'normalize',
        'not',
        'nothing',
        'ok',
        'once',
        'one',
        'open',
        'opendir',
        'operator',
        'ord',
        'p5chomp',
        'p5chop',
        'pack',
        'pair',
        'pairs',
        'pass',
        'perl',
        'pi',
        'pick',
        'plan',
        'plan_ok',
        'polar',
        'pop',
        'pos',
        'postcircumfix',
        'postfix',
        'pred',
        'prefix',
        'print',
        'printf',
        'push',
        'quasi',
        'quotemeta',
        'rand',
        're',
        'read',
        'readdir',
        'readline',
        'reduce',
        'reverse',
        'rewind',
        'rewinddir',
        'rindex',
        'roots',
        'round',
        'roundrobin',
        'run',
        'runinstead',
        'sameaccent',
        'samecase',
        'say',
        'sec',
        'sech',
        'sech',
        'seek',
        'shape',
        'shift',
        'sign',
        'signature',
        'sin',
        'sinh',
        'skip',
        'skip_rest',
        'sleep',
        'slurp',
        'sort',
        'splice',
        'split',
        'sprintf',
        'sqrt',
        'srand',
        'strand',
        'subst',
        'substr',
        'succ',
        'sum',
        'symlink',
        'tan',
        'tanh',
        'throws_ok',
        'time',
        'times',
        'to',
        'todo',
        'trim',
        'trim_end',
        'trim_start',
        'true',
        'truncate',
        'uc',
        'ucfirst',
        'undef',
        'undefine',
        'uniq',
        'unlike',
        'unlink',
        'unpack',
        'unpolar',
        'unshift',
        'unwrap',
        'use_ok',
        'value',
        'values',
        'vec',
        'version_lt',
        'void',
        'wait',
        'want',
        'wrap',
        'write',
        'zip',
    )

    PERL6_BUILTIN_CLASSES = (
        'Abstraction',
        'Any',
        'AnyChar',
        'Array',
        'Associative',
        'Bag',
        'Bit',
        'Blob',
        'Block',
        'Bool',
        'Buf',
        'Byte',
        'Callable',
        'Capture',
        'Char',
        'Class',
        'Code',
        'Codepoint',
        'Comparator',
        'Complex',
        'Decreasing',
        'Exception',
        'Failure',
        'False',
        'Grammar',
        'Grapheme',
        'Hash',
        'IO',
        'Increasing',
        'Int',
        'Junction',
        'KeyBag',
        'KeyExtractor',
        'KeyHash',
        'KeySet',
        'KitchenSink',
        'List',
        'Macro',
        'Mapping',
        'Match',
        'Matcher',
        'Method',
        'Module',
        'Num',
        'Object',
        'Ordered',
        'Ordering',
        'OrderingPair',
        'Package',
        'Pair',
        'Positional',
        'Proxy',
        'Range',
        'Rat',
        'Regex',
        'Role',
        'Routine',
        'Scalar',
        'Seq',
        'Set',
        'Signature',
        'Str',
        'StrLen',
        'StrPos',
        'Sub',
        'Submethod',
        'True',
        'UInt',
        'Undef',
        'Version',
        'Void',
        'Whatever',
        'bit',
        'bool',
        'buf',
        'buf1',
        'buf16',
        'buf2',
        'buf32',
        'buf4',
        'buf64',
        'buf8',
        'complex',
        'int',
        'int1',
        'int16',
        'int2',
        'int32',
        'int4',
        'int64',
        'int8',
        'num',
        'rat',
        'rat1',
        'rat16',
        'rat2',
        'rat32',
        'rat4',
        'rat64',
        'rat8',
        'uint',
        'uint1',
        'uint16',
        'uint2',
        'uint32',
        'uint4',
        'uint64',
        'uint8',
        'utf16',
        'utf32',
        'utf8',
    )

    PERL6_OPERATORS = (
        'X',
        'Z',
        'after',
        'also',
        'and',
        'andthen',
        'before',
        'cmp',
        'div',
        'eq',
        'eqv',
        'extra',
        'ff',
        'fff',
        'ge',
        'gt',
        'le',
        'leg',
        'lt',
        'm',
        'mm',
        'mod',
        'ne',
        'or',
        'orelse',
        'rx',
        's',
        'tr',
        'x',
        'xor',
        'xx',
        '++',
        '--',
        '**',
        '!',
        '+',
        '-',
        '~',
        '?',
        '|',
        '||',
        '+^',
        '~^',
        '?^',
        '^',
        '*',
        '/',
        '%',
        '%%',
        '+&',
        '+<',
        '+>',
        '~&',
        '~<',
        '~>',
        '?&',
        'gcd',
        'lcm',
        '+',
        '-',
        '+|',
        '+^',
        '~|',
        '~^',
        '?|',
        '?^',
        '~',
        '&',
        '^',
        'but',
        'does',
        '<=>',
        '..',
        '..^',
        '^..',
        '^..^',
        '!=',
        '==',
        '<',
        '<=',
        '>',
        '>=',
        '~~',
        '===',
        '!eqv',
        '&&',
        '||',
        '^^',
        '//',
        'min',
        'max',
        '??',
        '!!',
        'ff',
        'fff',
        'so',
        'not',
        '<==',
        '==>',
        '<<==',
        '==>>',
    )

    # Perl 6 has a *lot* of possible bracketing characters
    # this list was lifted from STD.pm6 (https://github.com/perl6/std)
    PERL6_BRACKETS = {
        u'\u0028': u'\u0029',
        u'\u003c': u'\u003e',
        u'\u005b': u'\u005d',
        u'\u007b': u'\u007d',
        u'\u00ab': u'\u00bb',
        u'\u0f3a': u'\u0f3b',
        u'\u0f3c': u'\u0f3d',
        u'\u169b': u'\u169c',
        u'\u2018': u'\u2019',
        u'\u201a': u'\u2019',
        u'\u201b': u'\u2019',
        u'\u201c': u'\u201d',
        u'\u201e': u'\u201d',
        u'\u201f': u'\u201d',
        u'\u2039': u'\u203a',
        u'\u2045': u'\u2046',
        u'\u207d': u'\u207e',
        u'\u208d': u'\u208e',
        u'\u2208': u'\u220b',
        u'\u2209': u'\u220c',
        u'\u220a': u'\u220d',
        u'\u2215': u'\u29f5',
        u'\u223c': u'\u223d',
        u'\u2243': u'\u22cd',
        u'\u2252': u'\u2253',
        u'\u2254': u'\u2255',
        u'\u2264': u'\u2265',
        u'\u2266': u'\u2267',
        u'\u2268': u'\u2269',
        u'\u226a': u'\u226b',
        u'\u226e': u'\u226f',
        u'\u2270': u'\u2271',
        u'\u2272': u'\u2273',
        u'\u2274': u'\u2275',
        u'\u2276': u'\u2277',
        u'\u2278': u'\u2279',
        u'\u227a': u'\u227b',
        u'\u227c': u'\u227d',
        u'\u227e': u'\u227f',
        u'\u2280': u'\u2281',
        u'\u2282': u'\u2283',
        u'\u2284': u'\u2285',
        u'\u2286': u'\u2287',
        u'\u2288': u'\u2289',
        u'\u228a': u'\u228b',
        u'\u228f': u'\u2290',
        u'\u2291': u'\u2292',
        u'\u2298': u'\u29b8',
        u'\u22a2': u'\u22a3',
        u'\u22a6': u'\u2ade',
        u'\u22a8': u'\u2ae4',
        u'\u22a9': u'\u2ae3',
        u'\u22ab': u'\u2ae5',
        u'\u22b0': u'\u22b1',
        u'\u22b2': u'\u22b3',
        u'\u22b4': u'\u22b5',
        u'\u22b6': u'\u22b7',
        u'\u22c9': u'\u22ca',
        u'\u22cb': u'\u22cc',
        u'\u22d0': u'\u22d1',
        u'\u22d6': u'\u22d7',
        u'\u22d8': u'\u22d9',
        u'\u22da': u'\u22db',
        u'\u22dc': u'\u22dd',
        u'\u22de': u'\u22df',
        u'\u22e0': u'\u22e1',
        u'\u22e2': u'\u22e3',
        u'\u22e4': u'\u22e5',
        u'\u22e6': u'\u22e7',
        u'\u22e8': u'\u22e9',
        u'\u22ea': u'\u22eb',
        u'\u22ec': u'\u22ed',
        u'\u22f0': u'\u22f1',
        u'\u22f2': u'\u22fa',
        u'\u22f3': u'\u22fb',
        u'\u22f4': u'\u22fc',
        u'\u22f6': u'\u22fd',
        u'\u22f7': u'\u22fe',
        u'\u2308': u'\u2309',
        u'\u230a': u'\u230b',
        u'\u2329': u'\u232a',
        u'\u23b4': u'\u23b5',
        u'\u2768': u'\u2769',
        u'\u276a': u'\u276b',
        u'\u276c': u'\u276d',
        u'\u276e': u'\u276f',
        u'\u2770': u'\u2771',
        u'\u2772': u'\u2773',
        u'\u2774': u'\u2775',
        u'\u27c3': u'\u27c4',
        u'\u27c5': u'\u27c6',
        u'\u27d5': u'\u27d6',
        u'\u27dd': u'\u27de',
        u'\u27e2': u'\u27e3',
        u'\u27e4': u'\u27e5',
        u'\u27e6': u'\u27e7',
        u'\u27e8': u'\u27e9',
        u'\u27ea': u'\u27eb',
        u'\u2983': u'\u2984',
        u'\u2985': u'\u2986',
        u'\u2987': u'\u2988',
        u'\u2989': u'\u298a',
        u'\u298b': u'\u298c',
        u'\u298d': u'\u298e',
        u'\u298f': u'\u2990',
        u'\u2991': u'\u2992',
        u'\u2993': u'\u2994',
        u'\u2995': u'\u2996',
        u'\u2997': u'\u2998',
        u'\u29c0': u'\u29c1',
        u'\u29c4': u'\u29c5',
        u'\u29cf': u'\u29d0',
        u'\u29d1': u'\u29d2',
        u'\u29d4': u'\u29d5',
        u'\u29d8': u'\u29d9',
        u'\u29da': u'\u29db',
        u'\u29f8': u'\u29f9',
        u'\u29fc': u'\u29fd',
        u'\u2a2b': u'\u2a2c',
        u'\u2a2d': u'\u2a2e',
        u'\u2a34': u'\u2a35',
        u'\u2a3c': u'\u2a3d',
        u'\u2a64': u'\u2a65',
        u'\u2a79': u'\u2a7a',
        u'\u2a7d': u'\u2a7e',
        u'\u2a7f': u'\u2a80',
        u'\u2a81': u'\u2a82',
        u'\u2a83': u'\u2a84',
        u'\u2a8b': u'\u2a8c',
        u'\u2a91': u'\u2a92',
        u'\u2a93': u'\u2a94',
        u'\u2a95': u'\u2a96',
        u'\u2a97': u'\u2a98',
        u'\u2a99': u'\u2a9a',
        u'\u2a9b': u'\u2a9c',
        u'\u2aa1': u'\u2aa2',
        u'\u2aa6': u'\u2aa7',
        u'\u2aa8': u'\u2aa9',
        u'\u2aaa': u'\u2aab',
        u'\u2aac': u'\u2aad',
        u'\u2aaf': u'\u2ab0',
        u'\u2ab3': u'\u2ab4',
        u'\u2abb': u'\u2abc',
        u'\u2abd': u'\u2abe',
        u'\u2abf': u'\u2ac0',
        u'\u2ac1': u'\u2ac2',
        u'\u2ac3': u'\u2ac4',
        u'\u2ac5': u'\u2ac6',
        u'\u2acd': u'\u2ace',
        u'\u2acf': u'\u2ad0',
        u'\u2ad1': u'\u2ad2',
        u'\u2ad3': u'\u2ad4',
        u'\u2ad5': u'\u2ad6',
        u'\u2aec': u'\u2aed',
        u'\u2af7': u'\u2af8',
        u'\u2af9': u'\u2afa',
        u'\u2e02': u'\u2e03',
        u'\u2e04': u'\u2e05',
        u'\u2e09': u'\u2e0a',
        u'\u2e0c': u'\u2e0d',
        u'\u2e1c': u'\u2e1d',
        u'\u2e20': u'\u2e21',
        u'\u3008': u'\u3009',
        u'\u300a': u'\u300b',
        u'\u300c': u'\u300d',
        u'\u300e': u'\u300f',
        u'\u3010': u'\u3011',
        u'\u3014': u'\u3015',
        u'\u3016': u'\u3017',
        u'\u3018': u'\u3019',
        u'\u301a': u'\u301b',
        u'\u301d': u'\u301e',
        u'\ufd3e': u'\ufd3f',
        u'\ufe17': u'\ufe18',
        u'\ufe35': u'\ufe36',
        u'\ufe37': u'\ufe38',
        u'\ufe39': u'\ufe3a',
        u'\ufe3b': u'\ufe3c',
        u'\ufe3d': u'\ufe3e',
        u'\ufe3f': u'\ufe40',
        u'\ufe41': u'\ufe42',
        u'\ufe43': u'\ufe44',
        u'\ufe47': u'\ufe48',
        u'\ufe59': u'\ufe5a',
        u'\ufe5b': u'\ufe5c',
        u'\ufe5d': u'\ufe5e',
        u'\uff08': u'\uff09',
        u'\uff1c': u'\uff1e',
        u'\uff3b': u'\uff3d',
        u'\uff5b': u'\uff5d',
        u'\uff5f': u'\uff60',
        u'\uff62': u'\uff63',
    }

    def _build_word_match(words,
                          boundary_regex_fragment=None,
                          prefix='',
                          suffix=''):
        if boundary_regex_fragment is None:
            return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
                suffix + r')\b'
        else:
            return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
                r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
                boundary_regex_fragment + r')'

    def brackets_callback(token_class):
        def callback(lexer, match, context):
            groups = match.groupdict()
            opening_chars = groups['delimiter']
            n_chars = len(opening_chars)
            adverbs = groups.get('adverbs')

            closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
            text = context.text

            if closer is None:  # it's not a mirrored character, which means we
                # just need to look for the next occurrence

                end_pos = text.find(opening_chars,
                                    match.start('delimiter') + n_chars)
            else:  # we need to look for the corresponding closing character,
                # keep nesting in mind
                closing_chars = closer * n_chars
                nesting_level = 1

                search_pos = match.start('delimiter')

                while nesting_level > 0:
                    next_open_pos = text.find(opening_chars,
                                              search_pos + n_chars)
                    next_close_pos = text.find(closing_chars,
                                               search_pos + n_chars)

                    if next_close_pos == -1:
                        next_close_pos = len(text)
                        nesting_level = 0
                    elif next_open_pos != -1 and next_open_pos < next_close_pos:
                        nesting_level += 1
                        search_pos = next_open_pos
                    else:  # next_close_pos < next_open_pos
                        nesting_level -= 1
                        search_pos = next_close_pos

                end_pos = next_close_pos

            if end_pos < 0:  # if we didn't find a closer, just highlight the
                # rest of the text in this class
                end_pos = len(text)

            if adverbs is not None and re.search(r':to\b', adverbs):
                heredoc_terminator = text[match.start('delimiter') +
                                          n_chars:end_pos]
                end_heredoc = re.search(
                    r'^\s*' + re.escape(heredoc_terminator) + r'\s*$',
                    text[end_pos:], re.MULTILINE)

                if end_heredoc:
                    end_pos += end_heredoc.end()
                else:
                    end_pos = len(text)

            yield match.start(), token_class, text[match.start():end_pos +
                                                   n_chars]
            context.pos = end_pos + n_chars

        return callback

    def opening_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter an opening brace and we're one level
        # below a token state, it means we need to increment
        # the nesting level for braces so we know later when
        # we should return to the token rules.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level += 1

    def closing_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter a free closing brace and we're one level
        # below a token state, it means we need to check the nesting
        # level to see if we need to return to the token state.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level -= 1
            if context.perl6_token_nesting_level == 0:
                stack.pop()

    def embedded_perl6_callback(lexer, match, context):
        context.perl6_token_nesting_level = 1
        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()
        context.stack.append('root')

    # If you're modifying these rules, be careful if you need to process '{' or '}'
    # characters. We have special logic for processing these characters (due to the fact
    # that you can nest Perl 6 code in regex blocks), so if you need to process one of
    # them, make sure you also process the corresponding one!
    tokens = {
        'common': [
            (r'#[`|=](?P<delimiter>(?P<first_char>[' +
             ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
             brackets_callback(Comment.Multiline)),
            (r'#[^\n]*$', Comment.Single),
            (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
            (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
            (r'^=.*?\n\s*?\n', Comment.Multiline),
            (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
             bygroups(Keyword, Name), 'token-sym-brackets'),
            (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' +
             PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword,
                                                      Name), 'pre-token'),
            # deal with a special case in the Perl 6 grammar (role q { ... })
            (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
            (_build_word_match(PERL6_KEYWORDS,
                               PERL6_IDENTIFIER_RANGE), Keyword),
            (_build_word_match(PERL6_BUILTIN_CLASSES,
                               PERL6_IDENTIFIER_RANGE,
                               suffix='(?::[UD])?'), Name.Builtin),
            (_build_word_match(PERL6_BUILTINS,
                               PERL6_IDENTIFIER_RANGE), Name.Builtin),
            # copied from PerlLexer
            (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE +
             u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable),
            (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
            (r'::\?\w+', Name.Variable.Global),
            (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE +
             u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
            (r'\$(?:<.*?>)+', Name.Variable),
            (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
             r'(?P=first_char)*)', brackets_callback(String)),
            # copied from PerlLexer
            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
            (r'0b[01]+(_[01]+)*', Number.Bin),
            (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
             Number.Float),
            (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
            (r'\d+(_\d+)*', Number.Integer),
            (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'm\w+(?=\()', Name),
            (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
             r'(?P=first_char)*)', brackets_callback(String.Regex)),
            (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
             String.Regex),
            (r'<[^\s=].*?\S>', String),
            (_build_word_match(PERL6_OPERATORS), Operator),
            (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
        ],
        'root': [
            include('common'),
            (r'\{', opening_brace_callback),
            (r'\}', closing_brace_callback),
            (r'.+?', Text),
        ],
        'pre-token': [
            include('common'),
            (r'\{', Text, ('#pop', 'token')),
            (r'.+?', Text),
        ],
        'token-sym-brackets': [
            (r'(?P<delimiter>(?P<first_char>[' +
             ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
             brackets_callback(Name), ('#pop', 'pre-token')),
            default(('#pop', 'pre-token')),
        ],
        'token': [
            (r'\}', Text, '#pop'),
            (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
            # make sure that quotes in character classes aren't treated as strings
            (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
            # make sure that '#' characters in quotes aren't treated as comments
            (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
            (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
            (r'#.*?$', Comment.Single),
            (r'\{', embedded_perl6_callback),
            ('.+?', String.Regex),
        ],
    }

    def analyse_text(text):
        def strip_pod(lines):
            in_pod = False
            stripped_lines = []

            for line in lines:
                if re.match(r'^=(?:end|cut)', line):
                    in_pod = False
                elif re.match(r'^=\w+', line):
                    in_pod = True
                elif not in_pod:
                    stripped_lines.append(line)

            return stripped_lines

        # XXX handle block comments
        lines = text.splitlines()
        lines = strip_pod(lines)
        text = '\n'.join(lines)

        if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
            return True

        saw_perl_decl = False
        rating = False

        # check for my/our/has declarations
        if re.search(
                r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
                r"+\s+)?[$@%&(]", text):
            rating = 0.8
            saw_perl_decl = True

        for line in lines:
            line = re.sub('#.*', '', line)
            if re.match(r'^\s*$', line):
                continue

            # match v6; use v6; use v6.0; use v6.0.0;
            if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
                return True
            # match class, module, role, enum, grammar declarations
            class_decl = re.match(
                r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)',
                line)
            if class_decl:
                if saw_perl_decl or class_decl.group('scope') is not None:
                    return True
                rating = 0.05
                continue
            break

        return rating

    def __init__(self, **options):
        super(Perl6Lexer, self).__init__(**options)
        self.encoding = options.get('encoding', 'utf-8')
Example #28
0
class SolidityLexer(RegexLexer):
    """
    For Solidity source code.

    .. versionadded:: 2.5
    """

    name = 'Solidity'
    aliases = ['solidity']
    filenames = ['*.sol']
    mimetypes = []

    flags = re.MULTILINE | re.UNICODE

    datatype = (
        r'\b(address|bool|((bytes|hash|int|string|uint)(8|16|24|32|40|48|56|64'
        r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208'
        r'|216|224|232|240|248|256)?))\b')

    tokens = {
        'root': [
            include('whitespace'),
            include('comments'), (r'\bpragma\s+solidity\b', Keyword, 'pragma'),
            (r'\b(contract)(\s+)([a-zA-Z_]\w*)',
             bygroups(Keyword, Text.WhiteSpace, Name.Entity)),
            (datatype + r'(\s+)((external|public|internal|private)\s+)?' +
             r'([a-zA-Z_]\w*)',
             bygroups(Keyword.Type, None, None, None, Text.WhiteSpace, Keyword,
                      None, Name.Variable)),
            (r'\b(enum|event|function|struct)(\s+)([a-zA-Z_]\w*)',
             bygroups(Keyword.Type, Text.WhiteSpace, Name.Variable)),
            (r'\b(msg|block|tx)\.([A-Za-z_][A-Za-z0-9_]*)\b', Keyword),
            (words(('block', 'break', 'constant', 'constructor', 'continue',
                    'contract', 'do', 'else', 'external', 'false', 'for',
                    'function', 'if', 'import', 'inherited', 'internal', 'is',
                    'library', 'mapping', 'memory', 'modifier', 'msg', 'new',
                    'payable', 'private', 'public', 'require', 'return',
                    'returns', 'struct', 'suicide', 'throw', 'this', 'true',
                    'tx', 'var', 'while'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword.Type),
            (words(('keccak256', ), prefix=r'\b', suffix=r'\b'), Name.Builtin),
            (datatype, Keyword.Type),
            include('constants'), (r'[a-zA-Z_]\w*', Text),
            (r'[!<=>+*/-]', Operator), (r'[.;:{}(),\[\]]', Punctuation)
        ],
        'comments': [(r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
                     (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
                     (r'/(\\\n)?[*][\w\W]*', Comment.Multiline)],
        'constants': [
            (r'("([\\]"|.)*?")', String.Double),
            (r"('([\\]'|.)*?')", String.Single),
            (r'\b0[xX][0-9a-fA-F]+\b', Number.Hex),
            (r'\b\d+\b', Number.Decimal),
        ],
        'pragma': [
            include('whitespace'),
            include('comments'),
            (r'(\^|>=|<)(\s*)(\d+\.\d+\.\d+)',
             bygroups(Operator, Text.WhiteSpace, Keyword)),
            (r';', Punctuation, '#pop')
        ],
        'whitespace': [(r'\s+', Text.WhiteSpace), (r'\n', Text.WhiteSpace)]
    }
Example #29
0
class JuliaLexer(RegexLexer):
    """
    For `Julia <http://julialang.org/>`_ source code.

    .. versionadded:: 1.6
    """

    name = 'Julia'
    aliases = ['julia', 'jl']
    filenames = ['*.jl']
    mimetypes = ['text/x-julia', 'application/x-julia']

    flags = re.MULTILINE | re.UNICODE

    tokens = {
        'root': [
            (r'\n', Text),
            (r'[^\S\n]+', Text),
            (r'#=', Comment.Multiline, "blockcomment"),
            (r'#.*$', Comment),
            (r'[\[\]{}(),;]', Punctuation),

            # keywords
            (r'in\b', Keyword.Pseudo),
            (r'isa\b', Keyword.Pseudo),
            (r'(true|false)\b', Keyword.Constant),
            (r'(local|global|const)\b', Keyword.Declaration),
            (words([
                'function', 'type', 'typealias', 'abstract', 'immutable',
                'baremodule', 'begin', 'bitstype', 'break', 'catch', 'ccall',
                'continue', 'do', 'else', 'elseif', 'end', 'export', 'finally',
                'for', 'if', 'import', 'importall', 'let', 'macro', 'module',
                'mutable', 'primitive', 'quote', 'return', 'struct', 'try',
                'using', 'while'],
                suffix=r'\b'), Keyword),

            # NOTE
            # Patterns below work only for definition sites and thus hardly reliable.
            #
            # functions
            # (r'(function)(\s+)(' + allowed_variable + ')',
            #  bygroups(Keyword, Text, Name.Function)),
            #
            # types
            # (r'(type|typealias|abstract|immutable)(\s+)(' + allowed_variable + ')',
            #  bygroups(Keyword, Text, Name.Class)),

            # type names
            (words([
                'ANY', 'ASCIIString', 'AbstractArray', 'AbstractChannel',
                'AbstractFloat', 'AbstractMatrix', 'AbstractRNG',
                'AbstractSparseArray', 'AbstractSparseMatrix',
                'AbstractSparseVector', 'AbstractString', 'AbstractVecOrMat',
                'AbstractVector', 'Any', 'ArgumentError', 'Array',
                'AssertionError', 'Associative', 'Base64DecodePipe',
                'Base64EncodePipe', 'Bidiagonal', 'BigFloat', 'BigInt',
                'BitArray', 'BitMatrix', 'BitVector', 'Bool', 'BoundsError',
                'Box', 'BufferStream', 'CapturedException', 'CartesianIndex',
                'CartesianRange', 'Cchar', 'Cdouble', 'Cfloat', 'Channel',
                'Char', 'Cint', 'Cintmax_t', 'Clong', 'Clonglong',
                'ClusterManager', 'Cmd', 'Coff_t', 'Colon', 'Complex',
                'Complex128', 'Complex32', 'Complex64', 'CompositeException',
                'Condition', 'Cptrdiff_t', 'Cshort', 'Csize_t', 'Cssize_t',
                'Cstring', 'Cuchar', 'Cuint', 'Cuintmax_t', 'Culong',
                'Culonglong', 'Cushort', 'Cwchar_t', 'Cwstring', 'DataType',
                'Date', 'DateTime', 'DenseArray', 'DenseMatrix',
                'DenseVecOrMat', 'DenseVector', 'Diagonal', 'Dict',
                'DimensionMismatch', 'Dims', 'DirectIndexString', 'Display',
                'DivideError', 'DomainError', 'EOFError', 'EachLine', 'Enum',
                'Enumerate', 'ErrorException', 'Exception', 'Expr',
                'Factorization', 'FileMonitor', 'FileOffset', 'Filter',
                'Float16', 'Float32', 'Float64', 'FloatRange', 'Function',
                'GenSym', 'GlobalRef', 'GotoNode', 'HTML', 'Hermitian', 'IO',
                'IOBuffer', 'IOStream', 'IPv4', 'IPv6', 'InexactError',
                'InitError', 'Int', 'Int128', 'Int16', 'Int32', 'Int64', 'Int8',
                'IntSet', 'Integer', 'InterruptException', 'IntrinsicFunction',
                'InvalidStateException', 'Irrational', 'KeyError', 'LabelNode',
                'LambdaStaticData', 'LinSpace', 'LineNumberNode', 'LoadError',
                'LocalProcess', 'LowerTriangular', 'MIME', 'Matrix',
                'MersenneTwister', 'Method', 'MethodError', 'MethodTable',
                'Module', 'NTuple', 'NewvarNode', 'NullException', 'Nullable',
                'Number', 'ObjectIdDict', 'OrdinalRange', 'OutOfMemoryError',
                'OverflowError', 'Pair', 'ParseError', 'PartialQuickSort',
                'Pipe', 'PollingFileWatcher', 'ProcessExitedException',
                'ProcessGroup', 'Ptr', 'QuoteNode', 'RandomDevice', 'Range',
                'Rational', 'RawFD', 'ReadOnlyMemoryError', 'Real',
                'ReentrantLock', 'Ref', 'Regex', 'RegexMatch',
                'RemoteException', 'RemoteRef', 'RepString', 'RevString',
                'RopeString', 'RoundingMode', 'SegmentationFault',
                'SerializationState', 'Set', 'SharedArray', 'SharedMatrix',
                'SharedVector', 'Signed', 'SimpleVector', 'SparseMatrixCSC',
                'StackOverflowError', 'StatStruct', 'StepRange', 'StridedArray',
                'StridedMatrix', 'StridedVecOrMat', 'StridedVector', 'SubArray',
                'SubString', 'SymTridiagonal', 'Symbol', 'SymbolNode',
                'Symmetric', 'SystemError', 'TCPSocket', 'Task', 'Text',
                'TextDisplay', 'Timer', 'TopNode', 'Tridiagonal', 'Tuple',
                'Type', 'TypeConstructor', 'TypeError', 'TypeName', 'TypeVar',
                'UDPSocket', 'UInt', 'UInt128', 'UInt16', 'UInt32', 'UInt64',
                'UInt8', 'UTF16String', 'UTF32String', 'UTF8String',
                'UndefRefError', 'UndefVarError', 'UnicodeError', 'UniformScaling',
                'Union', 'UnitRange', 'Unsigned', 'UpperTriangular', 'Val',
                'Vararg', 'VecOrMat', 'Vector', 'VersionNumber', 'Void', 'WString',
                'WeakKeyDict', 'WeakRef', 'WorkerConfig', 'Zip'], suffix=r'\b'),
                Keyword.Type),

            # builtins
            (words([
                u'ARGS', u'CPU_CORES', u'C_NULL', u'DevNull', u'ENDIAN_BOM',
                u'ENV', u'I', u'Inf', u'Inf16', u'Inf32', u'Inf64',
                u'InsertionSort', u'JULIA_HOME', u'LOAD_PATH', u'MergeSort',
                u'NaN', u'NaN16', u'NaN32', u'NaN64', u'OS_NAME',
                u'QuickSort', u'RoundDown', u'RoundFromZero', u'RoundNearest',
                u'RoundNearestTiesAway', u'RoundNearestTiesUp',
                u'RoundToZero', u'RoundUp', u'STDERR', u'STDIN', u'STDOUT',
                u'VERSION', u'WORD_SIZE', u'catalan', u'e', u'eu',
                u'eulergamma', u'golden', u'im', u'nothing', u'pi', u'γ',
                u'π', u'φ'],
                suffix=r'\b'), Name.Builtin),

            # operators
            # see: https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
            (words([
                # prec-assignment
                u'=', u':=', u'+=', u'-=', u'*=', u'/=', u'//=', u'.//=', u'.*=', u'./=',
                u'\\=', u'.\\=', u'^=', u'.^=', u'÷=', u'.÷=', u'%=', u'.%=', u'|=', u'&=',
                u'$=', u'=>', u'<<=', u'>>=', u'>>>=', u'~', u'.+=', u'.-=',
                # prec-conditional
                u'?',
                # prec-arrow
                u'--', u'-->',
                # prec-lazy-or
                u'||',
                # prec-lazy-and
                u'&&',
                # prec-comparison
                u'>', u'<', u'>=', u'≥', u'<=', u'≤', u'==', u'===', u'≡', u'!=', u'≠',
                u'!==', u'≢', u'.>', u'.<', u'.>=', u'.≥', u'.<=', u'.≤', u'.==', u'.!=',
                u'.≠', u'.=', u'.!', u'<:', u'>:', u'∈', u'∉', u'∋', u'∌', u'⊆',
                u'⊈', u'⊂',
                u'⊄', u'⊊',
                # prec-pipe
                u'|>', u'<|',
                # prec-colon
                u':',
                # prec-plus
                u'+', u'-', u'.+', u'.-', u'|', u'∪', u'$',
                # prec-bitshift
                u'<<', u'>>', u'>>>', u'.<<', u'.>>', u'.>>>',
                # prec-times
                u'*', u'/', u'./', u'÷', u'.÷', u'%', u'⋅', u'.%', u'.*', u'\\', u'.\\', u'&', u'∩',
                # prec-rational
                u'//', u'.//',
                # prec-power
                u'^', u'.^',
                # prec-decl
                u'::',
                # prec-dot
                u'.',
                # unary op
                u'+', u'-', u'!', u'√', u'∛', u'∜'
            ]), Operator),

            # chars
            (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
             r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),

            # try to match trailing transpose
            (r'(?<=[.\w)\]])\'+', Operator),

            # strings
            (r'"""', String, 'tqstring'),
            (r'"', String, 'string'),

            # regular expressions
            (r'r"""', String.Regex, 'tqregex'),
            (r'r"', String.Regex, 'regex'),

            # backticks
            (r'`', String.Backtick, 'command'),

            # names
            (allowed_variable, Name),
            (r'@' + allowed_variable, Name.Decorator),

            # numbers
            (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float),
            (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float),
            (r'\d+(_\d+)+[eEf][+-]?[0-9]+', Number.Float),
            (r'\d+[eEf][+-]?[0-9]+', Number.Float),
            (r'0b[01]+(_[01]+)+', Number.Bin),
            (r'0b[01]+', Number.Bin),
            (r'0o[0-7]+(_[0-7]+)+', Number.Oct),
            (r'0o[0-7]+', Number.Oct),
            (r'0x[a-fA-F0-9]+(_[a-fA-F0-9]+)+', Number.Hex),
            (r'0x[a-fA-F0-9]+', Number.Hex),
            (r'\d+(_\d+)+', Number.Integer),
            (r'\d+', Number.Integer)
        ],

        "blockcomment": [
            (r'[^=#]', Comment.Multiline),
            (r'#=', Comment.Multiline, '#push'),
            (r'=#', Comment.Multiline, '#pop'),
            (r'[=#]', Comment.Multiline),
        ],

        'string': [
            (r'"', String, '#pop'),
            # FIXME: This escape pattern is not perfect.
            (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
            # Interpolation is defined as "$" followed by the shortest full
            # expression, which is something we can't parse.
            # Include the most common cases here: $word, and $(paren'd expr).
            (r'\$' + allowed_variable, String.Interpol),
            # (r'\$[a-zA-Z_]+', String.Interpol),
            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
            # @printf and @sprintf formats
            (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
             String.Interpol),
            (r'.|\s', String),
        ],

        'tqstring': [
            (r'"""', String, '#pop'),
            (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
            (r'\$' + allowed_variable, String.Interpol),
            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
            (r'.|\s', String),
        ],

        'regex': [
            (r'"', String.Regex, '#pop'),
            (r'\\"', String.Regex),
            (r'.|\s', String.Regex),
        ],

        'tqregex': [
            (r'"""', String.Regex, '#pop'),
            (r'.|\s', String.Regex),
        ],

        'command': [
            (r'`', String.Backtick, '#pop'),
            (r'\$' + allowed_variable, String.Interpol),
            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
            (r'.|\s', String.Backtick)
        ],

        'in-intp': [
            (r'\(', Punctuation, '#push'),
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ]
    }

    def analyse_text(text):
        return shebang_matches(text, r'julia')
Example #30
0
class GherkinLexer(RegexLexer):
    """
    For `Gherkin <http://github.com/aslakhellesoy/gherkin/>` syntax.

    .. versionadded:: 1.2
    """
    name = 'Gherkin'
    aliases = ['cucumber', 'gherkin']
    filenames = ['*.feature']
    mimetypes = ['text/x-gherkin']

    feature_keywords = u'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$'
    feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$'
    examples_keywords = u'^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$'
    step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\\* )'

    tokens = {
        'comments': [
            (r'^\s*#.*$', Comment),
        ],
        'feature_elements': [
            (step_keywords, Keyword, "step_content_stack"),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'feature_elements_on_stack': [
            (step_keywords, Keyword, "#pop:2"),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'examples_table': [
            (r"\s+\|", Keyword, 'examples_table_header'),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'examples_table_header': [
            (r"\s+\|\s*$", Keyword, "#pop:2"),
            include('comments'),
            (r"\\\|", Name.Variable),
            (r"\s*\|", Keyword),
            (r"[^|]", Name.Variable),
        ],
        'scenario_sections_on_stack': [
            (feature_element_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), "feature_elements_on_stack"),
        ],
        'narrative': [
            include('scenario_sections_on_stack'),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'table_vars': [
            (r'(<[^>]+>)', Name.Variable),
        ],
        'numbers': [
            (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', String),
        ],
        'string': [
            include('table_vars'),
            (r'(\s|.)', String),
        ],
        'py_string': [
            (r'"""', Keyword, "#pop"),
            include('string'),
        ],
        'step_content_root': [
            (r"$", Keyword, "#pop"),
            include('step_content'),
        ],
        'step_content_stack': [
            (r"$", Keyword, "#pop:2"),
            include('step_content'),
        ],
        'step_content': [
            (r'"', Name.Function, "double_string"),
            include('table_vars'),
            include('numbers'),
            include('comments'),
            (r'(\s|.)', Name.Function),
        ],
        'table_content': [
            (r"\s+\|\s*$", Keyword, "#pop"),
            include('comments'),
            (r"\\\|", String),
            (r"\s*\|", Keyword),
            include('string'),
        ],
        'double_string': [
            (r'"', Name.Function, "#pop"),
            include('string'),
        ],
        'root': [
            (r'\n', Name.Function),
            include('comments'),
            (r'"""', Keyword, "py_string"),
            (r'\s+\|', Keyword, 'table_content'),
            (r'"', Name.Function, "double_string"),
            include('table_vars'),
            include('numbers'),
            (r'(\s*)(@[^@\r\n\t ]+)', bygroups(Name.Function, Name.Tag)),
            (step_keywords, bygroups(Name.Function,
                                     Keyword), 'step_content_root'),
            (feature_keywords, bygroups(Keyword, Keyword,
                                        Name.Function), 'narrative'),
            (feature_element_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), 'feature_elements'),
            (examples_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), 'examples_table'),
            (r'(\s|.)', Name.Function),
        ]
    }