def gen_elixir_string_rules(name, symbol, token):
    states = {}
    states['string_' + name] = [
        (r'[^#%s\\]+' % (symbol,), token),
        include('escapes'),
        (r'\\.', token),
        (r'(%s)' % (symbol,), bygroups(token), "#pop"),
        include('interpol')
    ]
    return states
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
def gen_elixir_sigstr_rules(term, token, interpol=True):
    if interpol:
        return [
            (r'[^#%s\\]+' % (term,), token),
            include('escapes'),
            (r'\\.', token),
            (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
            include('interpol')
        ]
    else:
        return [
            (r'[^%s\\]+' % (term,), token),
            (r'\\.', token),
            (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
        ]
Beispiel #4
0
 def _make_follow_state(compound, _label=_label,
                        _label_compound=_label_compound, _nl=_nl,
                        _space=_space, _start_label=_start_label,
                        _token=_token, _token_compound=_token_compound,
                        _ws=_ws):
     suffix = '/compound' if compound else ''
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'%s([%s]*)(%s)(.*)' %
          (_start_label, _ws, _label_compound if compound else _label),
          bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
         include('redirect%s' % suffix),
         (r'(?=[%s])' % _nl, Text, '#pop'),
         (r'\|\|?|&&?', Punctuation, '#pop'),
         include('text')
     ]
     return state
Beispiel #5
0
 def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct,
                            _string=_string, _variable=_variable, _ws=_ws):
     op = r'=+\-*/!~'
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'0[0-7]+', Number.Oct),
         (r'0x[\da-f]+', Number.Hex),
         (r'\d+', Number.Integer),
         (r'[(),]+', Punctuation),
         (r'([%s]|%%|\^\^)+' % op, Operator),
         (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
          (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
           r'[^)]' if compound else r'[\w\W]'),
          using(this, state='variable')),
         (r'(?=[\x00|&])', Text, '#pop'),
         include('follow')
     ]
     return state
Beispiel #6
0
class AntlrLexer(RegexLexer):
    """
    Generic `ANTLR`_ Lexer.
    Should not be called directly, instead
    use DelegatingLexer for your target language.

    .. versionadded:: 1.1

    .. _ANTLR: http://www.antlr.org/
    """

    name = 'ANTLR'
    aliases = ['antlr']
    filenames = []

    _id = r'[A-Za-z]\w*'
    _TOKEN_REF = r'[A-Z]\w*'
    _RULE_REF = r'[a-z]\w*'
    _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
    _INT = r'[0-9]+'

    tokens = {
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'comments': [
            (r'//.*$', Comment),
            (r'/\*(.|\n)*?\*/', Comment),
        ],
        'root': [
            include('whitespace'),
            include('comments'),
            (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
                      Punctuation)),
            # optionsSpec
            (r'options\b', Keyword, 'options'),
            # tokensSpec
            (r'tokens\b', Keyword, 'tokens'),
            # attrScope
            (r'(scope)(\s*)(' + _id + ')(\s*)(\{)',
             bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
                      Punctuation), 'action'),
            # exception
            (r'(catch|finally)\b', Keyword, 'exception'),
            # action
            (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)(\{)',
             bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
                      Name.Label, Whitespace, Punctuation), 'action'),
            # rule
            (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id +
             ')(!)?', bygroups(Keyword, Whitespace, Name.Label,
                               Punctuation), ('rule-alts', 'rule-prelims')),
        ],
        'exception': [
            (r'\n', Whitespace, '#pop'),
            (r'\s', Whitespace),
            include('comments'),
            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
        ],
        'rule-prelims': [
            include('whitespace'),
            include('comments'),
            (r'returns\b', Keyword),
            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
            # throwsSpec
            (r'(throws)(\s+)(' + _id + ')',
             bygroups(Keyword, Whitespace, Name.Label)),
            (r'(,)(\s*)(' + _id + ')',
             bygroups(Punctuation, Whitespace,
                      Name.Label)),  # Additional throws
            # optionsSpec
            (r'options\b', Keyword, 'options'),
            # ruleScopeSpec - scope followed by target language code or name of action
            # TODO finish implementing other possibilities for scope
            # L173 ANTLRv3.g from ANTLR book
            (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace,
                                           Punctuation), 'action'),
            (r'(scope)(\s+)(' + _id + ')(\s*)(;)',
             bygroups(Keyword, Whitespace, Name.Label, Whitespace,
                      Punctuation)),
            # ruleAction
            (r'(@' + _id + ')(\s*)(\{)',
             bygroups(Name.Label, Whitespace, Punctuation), 'action'),
            # finished prelims, go to rule alts!
            (r':', Punctuation, '#pop')
        ],
        'rule-alts': [
            include('whitespace'),
            include('comments'),

            # These might need to go in a separate 'block' state triggered by (
            (r'options\b', Keyword, 'options'),
            (r':', Punctuation),

            # literals
            (r"'(\\\\|\\'|[^'])*'", String),
            (r'"(\\\\|\\"|[^"])*"', String),
            (r'<<([^>]|>[^>])>>', String),
            # identifiers
            # Tokens start with capital letter.
            (r'\$?[A-Z_]\w*', Name.Constant),
            # Rules start with small letter.
            (r'\$?[a-z_]\w*', Name.Variable),
            # operators
            (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
            (r',', Punctuation),
            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
            (r';', Punctuation, '#pop')
        ],
        'tokens': [
            include('whitespace'),
            include('comments'),
            (r'\{', Punctuation),
            (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL +
             ')?(\s*)(;)',
             bygroups(Name.Label, Whitespace, Punctuation, Whitespace, String,
                      Whitespace, Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'options': [
            include('whitespace'),
            include('comments'),
            (r'\{', Punctuation),
            (r'(' + _id + r')(\s*)(=)(\s*)(' + '|'.join(
                (_id, _STRING_LITERAL, _INT, '\*')) + ')(\s*)(;)',
             bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, Text,
                      Whitespace, Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'action': [
            (
                r'(' + r'|'.join((  # keep host code in largest possible chunks
                    r'[^${}\'"/\\]+',  # exclude unsafe characters

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'//.*$\n?',  # single line comment
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment

                    # regular expression: There's no reason for it to start
                    # with a * and this stops confusion with comments.
                    r'/(?!\*)(\\\\|\\/|[^/])*/',

                    # backslashes are okay, as long as we are not backslashing a %
                    r'\\(?!%)',

                    # Now that we've handled regex and javadoc comments
                    # it's safe to let / through.
                    r'/',
                )) + r')+',
                Other),
            (r'(\\)(%)', bygroups(Punctuation, Other)),
            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
             bygroups(Name.Variable, Punctuation, Name.Property)),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'nested-arg-action': [
            (
                r'(' +
                r'|'.join((  # keep host code in largest possible chunks.
                    r'[^$\[\]\'"/]+',  # exclude unsafe characters

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'//.*$\n?',  # single line comment
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment

                    # regular expression: There's no reason for it to start
                    # with a * and this stops confusion with comments.
                    r'/(?!\*)(\\\\|\\/|[^/])*/',

                    # Now that we've handled regex and javadoc comments
                    # it's safe to let / through.
                    r'/',
                )) + r')+',
                Other),
            (r'\[', Punctuation, '#push'),
            (r'\]', Punctuation, '#pop'),
            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
             bygroups(Name.Variable, Punctuation, Name.Property)),
            (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
        ]
    }

    def analyse_text(text):
        return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
Beispiel #7
0
    def gen_crystalstrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[imsx]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
            (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            # This allows arbitrary text after '\ for simplicity
            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
            (r':"', String.Symbol, 'simple-sym'),
            # Crystal doesn't have "symbol:"s but this simplifies function args
            (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-'+name] = [
                include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in \
                ('\\{', '\\}', '{}', 'cb'), \
                ('\\[', '\\]', '\\[\\]', 'sb'), \
                ('\\(', '\\)', '()', 'pa'), \
                ('<', '>', '<>', 'ab'):
            states[name+'-intp-string'] = [
                (r'\\[' + lbrace + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            states['strings'].append((r'%' + lbrace, String.Other,
                                      name+'-intp-string'))
            states[name+'-string'] = [
                (r'\\[\\' + bracecc + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
            states['strings'].append((r'%[wi]' + lbrace, String.Other,
                                      name+'-string'))
            states[name+'-regex'] = [
                (r'\\[\\' + bracecc + ']', String.Regex),
                (lbrace, String.Regex, '#push'),
                (rbrace + '[imsx]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + bracecc + ']', String.Regex),
                (r'[^\\#' + bracecc + ']+', String.Regex),
            ]
            states['strings'].append((r'%r' + lbrace, String.Regex,
                                      name+'-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)',
             intp_regex_callback),
            # regular fancy strings with qsw
            (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
        ]

        return states
Beispiel #8
0
class MathematicaLexer(RegexLexer):
    name = "Mathematica"
    aliases = [
        "mathematica",
        "mathics",
        "mma",
        "nb",
        "wl",
        "wolfram",
        "wolfram-language",
    ]
    filenames = ["*.cdf", "*.m", "*.ma", "*.nb", "*.wl"]
    mimetypes = [
        "application/mathematica",
        "application/vnd.wolfram.mathematica",
        "application/vnd.wolfram.mathematica.package",
        "application/vnd.wolfram.cdf",
        "application/vnd.wolfram.cdf.text",
    ]
    tokens = {
        "root": [
            (r"\(\*", MToken.COMMENT, "comments"),
            (r'"', MToken.STRING, "strings"),
            include("numbers"),
            (Regex.PATTERNS, MToken.PATTERN),
            (Regex.SYMBOLS, MToken.SYMBOL),
            (
                Regex.MATHICS_MESSAGE,
                bygroups(MToken.OPERATOR, MToken.WHITESPACE, MToken.TEXT,
                         MToken.TEXT),
            ),
            (Regex.SLOTS, MToken.SLOT),
            (Regex.GROUPINGS, MToken.GROUP),
            (
                Regex.MESSAGES,
                bygroups(MToken.OPERATOR, MToken.WHITESPACE, MToken.MESSAGE),
            ),
            (Regex.OPERATORS, MToken.OPERATOR),
            (r"\s+", MToken.WHITESPACE),
            # Note IDENTIFER should come after tokens that have IDENTIFIER parts, like SYMBOLS.
            # Otherwise we may have System`foo matching identifier System over Symbol System`foo
            #
            # I don't understand why this is not a problem in  pygments-mathematica.
            (Regex.IDENTIFIER, MToken.SYMBOL),
        ],
        "comments": [
            (r"[^\*\(\)]+", MToken.COMMENT),
            (r"\*[^\)]", MToken.COMMENT),
            (r"\(\*", MToken.COMMENT, "#push"),
            (r"\*\)", MToken.COMMENT, "#pop"),
            (r"\([^\*]?|[^\*]?\)", MToken.COMMENT),
        ],
        "numbers": [
            (Regex.BASE_NUMBER, MToken.NUMBER),
            (Regex.SCIENTIFIC_NUMBER, MToken.NUMBER),
            (Regex.REAL, MToken.NUMBER),
            (Regex.INTEGER, MToken.NUMBER),
        ],
        "strings": [
            (r'[^"\\]+', MToken.STRING),
            (r'^[\\"]', MToken.STRING),
            (r"(\\n|\\r)", MToken.STRING),
            (r'\\"', MToken.STRING),
            (r"\\", MToken.STRING),
            (r'"', MToken.STRING, "#pop"),
        ],
    }

    def get_tokens_unprocessed(self, text, stack=("root", )):
        ma = MathematicaAnnotations()
        annotations = (ma.builtins, ma.unicode, ma.lexical_scope)
        for index, token, value in RegexLexer.get_tokens_unprocessed(
                self, text):
            result = (index, token, value)
            for func in annotations:
                result = func(*result)

            yield result
Beispiel #9
0
class MarkdownLexer(RegexLexer):
    """
    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.

    .. versionadded:: 2.2
    """
    name = 'markdown'
    aliases = ['md']
    filenames = ['*.md']
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String        , match.group(1)
        yield match.start(2), String        , match.group(2)
        yield match.start(3), Text          , match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name( match.group(2).strip() )
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
            return

        for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
            yield item

        yield match.start(5), String        , match.group(5)

    tokens = {
        'root': [
            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
            # task list
            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
            bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
            bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)([0-9]+\.)( .+\n)',
            bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            # code block with language
            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),

            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # italics
            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)),
            # bold
            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
            (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
            # strikethrough
            (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r'[@#][\w/:]+', Name.Entity),
            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)
Beispiel #10
0
class MakoLexer(RegexLexer):
    name = "Mako"
    aliases = ["mako"]
    filenames = ["*.mao"]

    tokens = {
        "root": [
            (
                r"(\s*)(\%)(\s*end(?:\w+))(\n|\Z)",
                bygroups(Text, Comment.Preproc, Keyword, Other),
            ),
            (
                r"(\s*)(\%(?!%))([^\n]*)(\n|\Z)",
                bygroups(Text, Comment.Preproc, using(PythonLexer), Other),
            ),
            (
                r"(\s*)(##[^\n]*)(\n|\Z)",
                bygroups(Text, Comment.Preproc, Other),
            ),
            (r"""(?s)<%doc>.*?</%doc>""", Comment.Preproc),
            (
                r"(<%)([\w\.\:]+)",
                bygroups(Comment.Preproc, Name.Builtin),
                "tag",
            ),
            (
                r"(</%)([\w\.\:]+)(>)",
                bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc),
            ),
            (r"<%(?=([\w\.\:]+))", Comment.Preproc, "ondeftags"),
            (
                r"(?s)(<%(?:!?))(.*?)(%>)",
                bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc),
            ),
            (
                r"(\$\{)(.*?)(\})",
                bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc),
            ),
            (
                r"""(?sx)
                (.+?)               # anything, followed by:
                (?:
                 (?<=\n)(?=%(?!%)|\#\#) |  # an eval or comment line
                 (?=\#\*) |          # multiline comment
                 (?=</?%) |         # a python block
                                    # call start or end
                 (?=\$\{) |         # a substitution
                 (?<=\n)(?=\s*%) |
                                    # - don't consume
                 (\\\n) |           # an escaped newline
                 \Z                 # end of string
                )
            """,
                bygroups(Other, Operator),
            ),
            (r"\s+", Text),
        ],
        "ondeftags": [
            (r"<%", Comment.Preproc),
            (r"(?<=<%)(include|inherit|namespace|page)", Name.Builtin),
            include("tag"),
        ],
        "tag": [
            (r'((?:\w+)\s*=)\s*(".*?")', bygroups(Name.Attribute, String)),
            (r"/?\s*>", Comment.Preproc, "#pop"),
            (r"\s+", Text),
        ],
        "attr": [
            ('".*?"', String, "#pop"),
            ("'.*?'", String, "#pop"),
            (r"[^\s>]+", String, "#pop"),
        ],
    }
Beispiel #11
0
class PhpLexer(RegexLexer):
    """
    For PHP source code.
    For PHP embedded in HTML, use the `HtmlPhpLexer`.

    Additional options accepted:

    `startinline`
        If given and ``True`` the lexer starts highlighting with
        php code (i.e.: no starting ``<?php`` required).  The default
        is ``False``.
    `funcnamehighlighting`
        If given and ``True``, highlight builtin function names
        (default: ``True``).
    `disabledmodules`
        If given, must be a list of module names whose function names
        should not be highlighted. By default all modules are highlighted
        except the special ``'unknown'`` module that includes functions
        that are known to php but are undocumented.

        To get a list of allowed modules have a look into the
        `_php_builtins` module:

        .. sourcecode:: pycon

            >>> from pygments.lexers._php_builtins import MODULES
            >>> MODULES.keys()
            ['PHP Options/Info', 'Zip', 'dba', ...]

        In fact the names of those modules match the module names from
        the php documentation.
    """

    name = 'PHP'
    url = 'https://www.php.net/'
    aliases = ['php', 'php3', 'php4', 'php5']
    filenames = ['*.php', '*.php[345]', '*.inc']
    mimetypes = ['text/x-php']

    # Note that a backslash is included in the following two patterns
    # PHP uses a backslash as a namespace separator
    _ident_char = r'[\\\w]|[^\x00-\x7f]'
    _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
    _ident_end = r'(?:' + _ident_char + ')*'
    _ident_inner = _ident_begin + _ident_end

    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
    tokens = {
        'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other),
                 (r'<', Other)],
        'php': [
            (r'\?>', Comment.Preproc, '#pop'),
            (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
             bygroups(String, String, String.Delimiter, String,
                      String.Delimiter, Punctuation, Text)),
            (r'\s+', Text),
            (r'#.*?\n', Comment.Single),
            (r'//.*?\n', Comment.Single),
            # put the empty comment here, it is otherwise seen as
            # the start of a docstring
            (r'/\*\*/', Comment.Multiline),
            (r'/\*\*.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(->|::)(\s*)(' + _ident_inner + ')',
             bygroups(Operator, Text, Name.Attribute)),
            (r'[~!%^&*+=|:.<>/@-]+', Operator),
            (r'\?', Operator),  # don't add to the charclass above!
            (r'[\[\]{}();,]+', Punctuation),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
            (r'(function)(\s+)(&?)(\s*)',
             bygroups(Keyword, Text, Operator, Text), 'functionname'),
            (r'(const)(\s+)(' + _ident_inner + ')',
             bygroups(Keyword, Text, Name.Constant)),
            (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
             r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
             r'FALSE|print|for|require|continue|foreach|require_once|'
             r'declare|return|default|static|do|switch|die|stdClass|'
             r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
             r'virtual|endfor|include_once|while|endforeach|global|'
             r'endif|list|endswitch|new|endwhile|not|'
             r'array|E_ALL|NULL|final|php_user_filter|interface|'
             r'implements|public|private|protected|abstract|clone|try|'
             r'catch|throw|this|use|namespace|trait|yield|'
             r'finally|match)\b', Keyword),
            (r'(true|false|null)\b', Keyword.Constant),
            include('magicconstants'),
            (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable),
            (r'\$+' + _ident_inner, Name.Variable),
            (_ident_inner, Name.Other),
            (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
            (r'\d+e[+-]?[0-9]+', Number.Float),
            (r'0[0-7]+', Number.Oct),
            (r'0x[a-f0-9]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'0b[01]+', Number.Bin),
            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
            (r'"', String.Double, 'string'),
        ],
        'magicfuncs': [
            # source: http://php.net/manual/en/language.oop5.magic.php
            (words((
                '__construct',
                '__destruct',
                '__call',
                '__callStatic',
                '__get',
                '__set',
                '__isset',
                '__unset',
                '__sleep',
                '__wakeup',
                '__toString',
                '__invoke',
                '__set_state',
                '__clone',
                '__debugInfo',
            ),
                   suffix=r'\b'), Name.Function.Magic),
        ],
        'magicconstants': [
            # source: http://php.net/manual/en/language.constants.predefined.php
            (words((
                '__LINE__',
                '__FILE__',
                '__DIR__',
                '__FUNCTION__',
                '__CLASS__',
                '__TRAIT__',
                '__METHOD__',
                '__NAMESPACE__',
            ),
                   suffix=r'\b'), Name.Constant),
        ],
        'classname': [(_ident_inner, Name.Class, '#pop')],
        'functionname': [
            include('magicfuncs'), (_ident_inner, Name.Function, '#pop'),
            default('#pop')
        ],
        'string':
        [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double),
         (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
         (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?',
          String.Interpol),
         (r'(\{\$\{)(.*?)(\}\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\{)(\$.*?)(\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\$\{)(\S+)(\})',
          bygroups(String.Interpol, Name.Variable, String.Interpol)),
         (r'[${\\]', String.Double)],
    }

    def __init__(self, **options):
        self.funcnamehighlighting = get_bool_opt(options,
                                                 'funcnamehighlighting', True)
        self.disabledmodules = get_list_opt(options, 'disabledmodules',
                                            ['unknown'])
        self.startinline = get_bool_opt(options, 'startinline', False)

        # private option argument for the lexer itself
        if '_startinline' in options:
            self.startinline = options.pop('_startinline')

        # collect activated functions in a set
        self._functions = set()
        if self.funcnamehighlighting:
            from pygments.lexers._php_builtins import MODULES
            for key, value in MODULES.items():
                if key not in self.disabledmodules:
                    self._functions.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.startinline:
            stack.append('php')
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text, stack):
            if token is Name.Other:
                if value in self._functions:
                    yield index, Name.Builtin, value
                    continue
            yield index, token, value

    def analyse_text(text):
        if shebang_matches(text, r'php'):
            return True
        rv = 0.0
        if re.search(r'<\?(?!xml)', text):
            rv += 0.3
        return rv
Beispiel #12
0
class WhymlLexer(RegexLexer):
    """
    For the Whyml language.

    """

    name = 'Whyml'
    aliases = ['whyml']
    filenames = ['*.mlw', '*.why']
    mimetypes = ['text/x-why3']

    keywords_type = ('invariant', 'variant', 'diverges', 'requires', 'ensures',
                     'pure',  'returns', 'raises', 'reads', 'writes', 'alias',
                     'assert', 'assume', 'check')

    keywords_keyword = ('use', 'clone', 'scope', 'import', 'export',
                        'coinductive', 'inductive', 'external', 'constant',
                        'function', 'predicate', 'val', 'exception', 'axiom',
                        'lemma', 'goal', 'type', 'mutable', 'abstract',
                        'private', 'any', 'match', 'let', 'rec', 'in', 'if',
                        'then', 'else', 'begin', 'end', 'while', 'for', 'to',
                        'downto', 'do', 'done', 'loop', 'absurd', 'ghost',
                        'raise', 'return', 'break', 'continue', 'try', 'with',
                        'theory', 'uses', 'module', 'converter', 'fun', 'at',
                        'old', 'true', 'false', 'forall', 'exists', 'label',
                        'by', 'so', 'meta')

    keyopts = (
        '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
        r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
        '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
        r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~',
        r'/\\', r'\\/'
    )

    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list',
                  'array')

    tokens = {
        'escape-sequence': [
            (r'\\[\\"\'ntbr]', String.Escape),
            (r'\\[0-9]{3}', String.Escape),
            (r'\\x[0-9a-fA-F]{2}', String.Escape),
        ],
        'root': [
            (r'\s+', Text),
            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name.Class),
            (r'\(\*(?![)])', Comment, 'comment'),
            (r'\b(%s)\b' % '|'.join(keywords_keyword), Keyword),
            (r'\b(%s)\b' % '|'.join(keywords_type), Keyword.Type),
            (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),

            (r"[^\W\d][\w']*", Name),

            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
            (r'0[oO][0-7][0-7_]*', Number.Oct),
            (r'0[bB][01][01_]*', Number.Bin),
            (r'\d[\d_]*', Number.Integer),

            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element

            (r'"', String.Double, 'string'),

            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'comment': [
            (r'[^(*)]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            (r'[(*)]', Comment),
        ],
        'string': [
            (r'[^\\"]+', String.Double),
            include('escape-sequence'),
            (r'\\\n', String.Double),
            (r'"', String.Double, '#pop'),
        ],
        'dotted': [
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
            (r'[A-Z][\w\']*', Name.Class, '#pop'),
            (r'[a-z_][\w\']*', Name, '#pop'),
            default('#pop'),
        ],
    }
Beispiel #13
0
class FortranLexer(RegexLexer):
    """
    Lexer for FORTRAN 90 code.

    .. versionadded:: 0.10
    """
    name = 'Fortran'
    aliases = ['fortran']
    filenames = ['*.f', '*.f90', '*.F', '*.F90']
    mimetypes = ['text/x-fortran']
    flags = re.IGNORECASE

    # Data Types: INTEGER, REAL, COMPLEX, LOGICAL, CHARACTER and DOUBLE PRECISION
    # Operators: **, *, +, -, /, <, >, <=, >=, ==, /=
    # Logical (?): NOT, AND, OR, EQV, NEQV

    # Builtins:
    # http://gcc.gnu.org/onlinedocs/gcc-3.4.6/g77/Table-of-Intrinsic-Functions.html

    tokens = {
        'root': [
            (r'!.*\n', Comment),
            include('strings'),
            include('core'),
            (r'[a-z]\w*', Name.Variable),
            include('nums'),
            (r'[\s]+', Text),
        ],
        'core': [
            # Statements
            (words((
                'ABSTRACT', 'ACCEPT', 'ALL', 'ALLSTOP', 'ALLOCATABLE', 'ALLOCATE',
                'ARRAY', 'ASSIGN', 'ASSOCIATE', 'ASYNCHRONOUS', 'BACKSPACE', 'BIND',
                'BLOCK', 'BLOCKDATA', 'BYTE', 'CALL', 'CASE', 'CLASS', 'CLOSE',
                'CODIMENSION', 'COMMON', 'CONCURRRENT', 'CONTIGUOUS', 'CONTAINS',
                'CONTINUE', 'CRITICAL', 'CYCLE', 'DATA', 'DEALLOCATE', 'DECODE',
                'DEFERRED', 'DIMENSION', 'DO', 'ELEMENTAL', 'ELSE', 'ENCODE', 'END',
                'ENTRY', 'ENUM', 'ENUMERATOR', 'EQUIVALENCE', 'EXIT', 'EXTENDS',
                'EXTERNAL', 'EXTRINSIC', 'FILE', 'FINAL', 'FORALL', 'FORMAT',
                'FUNCTION', 'GENERIC', 'GOTO', 'IF', 'IMAGES', 'IMPLICIT',
                'IMPORT', 'IMPURE', 'INCLUDE', 'INQUIRE', 'INTENT', 'INTERFACE',
                'INTRINSIC', 'IS', 'LOCK', 'MEMORY', 'MODULE', 'NAMELIST', 'NULLIFY',
                'NONE', 'NON_INTRINSIC', 'NON_OVERRIDABLE', 'NOPASS', 'OPEN', 'OPTIONAL',
                'OPTIONS', 'PARAMETER', 'PASS', 'PAUSE', 'POINTER', 'PRINT', 'PRIVATE',
                'PROGRAM', 'PROCEDURE', 'PROTECTED', 'PUBLIC', 'PURE', 'READ',
                'RECURSIVE', 'RESULT', 'RETURN', 'REWIND', 'SAVE', 'SELECT', 'SEQUENCE',
                'STOP', 'SUBMODULE', 'SUBROUTINE', 'SYNC', 'SYNCALL', 'SYNCIMAGES',
                'SYNCMEMORY', 'TARGET', 'THEN', 'TYPE', 'UNLOCK', 'USE', 'VALUE',
                'VOLATILE', 'WHERE', 'WRITE', 'WHILE'), prefix=r'\b', suffix=r'\s*\b'),
             Keyword),

            # Data Types
            (words((
                'CHARACTER', 'COMPLEX', 'DOUBLE PRECISION', 'DOUBLE COMPLEX', 'INTEGER',
                'LOGICAL', 'REAL', 'C_INT', 'C_SHORT', 'C_LONG', 'C_LONG_LONG', 'C_SIGNED_CHAR',
                'C_SIZE_T', 'C_INT8_T', 'C_INT16_T', 'C_INT32_T', 'C_INT64_T', 'C_INT_LEAST8_T',
                'C_INT_LEAST16_T', 'C_INT_LEAST32_T', 'C_INT_LEAST64_T', 'C_INT_FAST8_T',
                'C_INT_FAST16_T', 'C_INT_FAST32_T', 'C_INT_FAST64_T', 'C_INTMAX_T',
                'C_INTPTR_T', 'C_FLOAT', 'C_DOUBLE', 'C_LONG_DOUBLE', 'C_FLOAT_COMPLEX',
                'C_DOUBLE_COMPLEX', 'C_LONG_DOUBLE_COMPLEX', 'C_BOOL', 'C_CHAR', 'C_PTR',
                'C_FUNPTR'), prefix=r'\b', suffix=r'\s*\b'),
             Keyword.Type),

            # Operators
            (r'(\*\*|\*|\+|-|\/|<|>|<=|>=|==|\/=|=)', Operator),

            (r'(::)', Keyword.Declaration),

            (r'[()\[\],:&%;]', Punctuation),
            # Intrinsics
            (words((
                'Abort', 'Abs', 'Access', 'AChar', 'ACos', 'ACosH', 'AdjustL',
                'AdjustR', 'AImag', 'AInt', 'Alarm', 'All', 'Allocated', 'ALog',
                'AMax', 'AMin', 'AMod', 'And', 'ANInt', 'Any', 'ASin', 'ASinH',
                'Associated', 'ATan', 'ATanH', 'Atomic_Define', 'Atomic_Ref',
                'BesJ', 'BesJN', 'Bessel_J0', 'Bessel_J1', 'Bessel_JN', 'Bessel_Y0',
                'Bessel_Y1', 'Bessel_YN', 'BesY', 'BesYN', 'BGE', 'BGT', 'BLE',
                'BLT', 'Bit_Size', 'BTest', 'CAbs', 'CCos', 'Ceiling', 'CExp',
                'Char', 'ChDir', 'ChMod', 'CLog', 'Cmplx', 'Command_Argument_Count',
                'Complex', 'Conjg', 'Cos', 'CosH', 'Count', 'CPU_Time', 'CShift',
                'CSin', 'CSqRt', 'CTime', 'C_Funloc', 'C_Loc', 'C_Associated',
                'C_Null_Ptr', 'C_Null_Funptr', 'C_F_Pointer', 'C_F_ProcPointer',
                'C_Null_Char', 'C_Alert', 'C_Backspace', 'C_Form_Feed', 'C_FunLoc',
                'C_Loc', 'C_Sizeof', 'C_New_Line', 'C_Carriage_Return',
                'C_Horizontal_Tab', 'C_Vertical_Tab', 'DAbs', 'DACos', 'DASin',
                'DATan', 'Date_and_Time', 'DbesJ', 'DbesJ', 'DbesJN', 'DbesY',
                'DbesY', 'DbesYN', 'Dble', 'DCos', 'DCosH', 'DDiM', 'DErF',
                'DErFC', 'DExp', 'Digits', 'DiM', 'DInt', 'DLog', 'DLog', 'DMax',
                'DMin', 'DMod', 'DNInt', 'Dot_Product', 'DProd', 'DSign', 'DSinH',
                'DShiftL', 'DShiftR', 'DSin', 'DSqRt', 'DTanH', 'DTan', 'DTime',
                'EOShift', 'Epsilon', 'ErF', 'ErFC', 'ErFC_Scaled', 'ETime',
                'Execute_Command_Line', 'Exit', 'Exp', 'Exponent', 'Extends_Type_Of',
                'FDate', 'FGet', 'FGetC', 'FindLoc', 'Float', 'Floor', 'Flush',
                'FNum', 'FPutC', 'FPut', 'Fraction', 'FSeek', 'FStat', 'FTell',
                'Gamma', 'GError', 'GetArg', 'Get_Command', 'Get_Command_Argument',
                'Get_Environment_Variable', 'GetCWD', 'GetEnv', 'GetGId', 'GetLog',
                'GetPId', 'GetUId', 'GMTime', 'HostNm', 'Huge', 'Hypot', 'IAbs',
                'IAChar', 'IAll', 'IAnd', 'IAny', 'IArgC', 'IBClr', 'IBits',
                'IBSet', 'IChar', 'IDate', 'IDiM', 'IDInt', 'IDNInt', 'IEOr',
                'IErrNo', 'IFix', 'Imag', 'ImagPart', 'Image_Index', 'Index',
                'Int', 'IOr', 'IParity', 'IRand', 'IsaTty', 'IShft', 'IShftC',
                'ISign', 'Iso_C_Binding', 'Is_Contiguous', 'Is_Iostat_End',
                'Is_Iostat_Eor', 'ITime', 'Kill', 'Kind', 'LBound', 'LCoBound',
                'Len', 'Len_Trim', 'LGe', 'LGt', 'Link', 'LLe', 'LLt', 'LnBlnk',
                'Loc', 'Log', 'Log_Gamma', 'Logical', 'Long', 'LShift', 'LStat',
                'LTime', 'MaskL', 'MaskR', 'MatMul', 'Max', 'MaxExponent',
                'MaxLoc', 'MaxVal', 'MClock', 'Merge', 'Merge_Bits', 'Move_Alloc',
                'Min', 'MinExponent', 'MinLoc', 'MinVal', 'Mod', 'Modulo', 'MvBits',
                'Nearest', 'New_Line', 'NInt', 'Norm2', 'Not', 'Null', 'Num_Images',
                'Or', 'Pack', 'Parity', 'PError', 'Precision', 'Present', 'Product',
                'Radix', 'Rand', 'Random_Number', 'Random_Seed', 'Range', 'Real',
                'RealPart', 'Rename', 'Repeat', 'Reshape', 'RRSpacing', 'RShift',
                'Same_Type_As', 'Scale', 'Scan', 'Second', 'Selected_Char_Kind',
                'Selected_Int_Kind', 'Selected_Real_Kind', 'Set_Exponent', 'Shape',
                'ShiftA', 'ShiftL', 'ShiftR', 'Short', 'Sign', 'Signal', 'SinH',
                'Sin', 'Sleep', 'Sngl', 'Spacing', 'Spread', 'SqRt', 'SRand',
                'Stat', 'Storage_Size', 'Sum', 'SymLnk', 'System', 'System_Clock',
                'Tan', 'TanH', 'Time', 'This_Image', 'Tiny', 'TrailZ', 'Transfer',
                'Transpose', 'Trim', 'TtyNam', 'UBound', 'UCoBound', 'UMask',
                'Unlink', 'Unpack', 'Verify', 'XOr', 'ZAbs', 'ZCos', 'ZExp',
                'ZLog', 'ZSin', 'ZSqRt'), prefix=r'\b', suffix=r'\s*\b'),
             Name.Builtin),

            # Booleans
            (r'\.(true|false)\.', Name.Builtin),
            # Comparing Operators
            (r'\.(eq|ne|lt|le|gt|ge|not|and|or|eqv|neqv)\.', Operator.Word),
        ],

        'strings': [
            (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double),
            (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
        ],

        'nums': [
            (r'\d+(?![.e])(_[a-z]\w+)?', Number.Integer),
            (r'[+-]?\d*\.\d+(e[-+]?\d+)?(_[a-z]\w+)?', Number.Float),
            (r'[+-]?\d+\.\d*(e[-+]?\d+)?(_[a-z]\w+)?', Number.Float),
        ],
    }
class RustLexer(RegexLexer):
    """
    Lexer for the Rust programming language (version 1.47).

    .. versionadded:: 1.6
    """
    name = 'Rust'
    filenames = ['*.rs', '*.rs.in']
    aliases = ['rust', 'rs']
    mimetypes = ['text/rust', 'text/x-rust']

    keyword_types = (words((
        'u8',
        'u16',
        'u32',
        'u64',
        'u128',
        'i8',
        'i16',
        'i32',
        'i64',
        'i128',
        'usize',
        'isize',
        'f32',
        'f64',
        'char',
        'str',
        'bool',
    ),
                           suffix=r'\b'), Keyword.Type)

    builtin_funcs_types = (words((
        'Copy',
        'Send',
        'Sized',
        'Sync',
        'Unpin',
        'Drop',
        'Fn',
        'FnMut',
        'FnOnce',
        'drop',
        'Box',
        'ToOwned',
        'Clone',
        'PartialEq',
        'PartialOrd',
        'Eq',
        'Ord',
        'AsRef',
        'AsMut',
        'Into',
        'From',
        'Default',
        'Iterator',
        'Extend',
        'IntoIterator',
        'DoubleEndedIterator',
        'ExactSizeIterator',
        'Option',
        'Some',
        'None',
        'Result',
        'Ok',
        'Err',
        'String',
        'ToString',
        'Vec',
    ),
                                 suffix=r'\b'), Name.Builtin)

    builtin_macros = (words((
        'asm',
        'assert',
        'assert_eq',
        'assert_ne',
        'cfg',
        'column',
        'compile_error',
        'concat',
        'concat_idents',
        'dbg',
        'debug_assert',
        'debug_assert_eq',
        'debug_assert_ne',
        'env',
        'eprint',
        'eprintln',
        'file',
        'format',
        'format_args',
        'format_args_nl',
        'global_asm',
        'include',
        'include_bytes',
        'include_str',
        'is_aarch64_feature_detected',
        'is_arm_feature_detected',
        'is_mips64_feature_detected',
        'is_mips_feature_detected',
        'is_powerpc64_feature_detected',
        'is_powerpc_feature_detected',
        'is_x86_feature_detected',
        'line',
        'llvm_asm',
        'log_syntax',
        'macro_rules',
        'matches',
        'module_path',
        'option_env',
        'panic',
        'print',
        'println',
        'stringify',
        'thread_local',
        'todo',
        'trace_macros',
        'unimplemented',
        'unreachable',
        'vec',
        'write',
        'writeln',
    ),
                            suffix=r'!'), Name.Function.Magic)

    tokens = {
        'root': [
            # rust allows a file to start with a shebang, but if the first line
            # starts with #![ then it's not a shebang but a crate attribute.
            (r'#![^[\r\n].*$', Comment.Preproc),
            default('base'),
        ],
        'base': [
            # Whitespace and Comments
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
            (r'//!.*?\n', String.Doc),
            (r'///(\n|[^/].*?\n)', String.Doc),
            (r'//(.*?)\n', Comment.Single),
            (r'/\*\*(\n|[^/*])', String.Doc, 'doccomment'),
            (r'/\*!', String.Doc, 'doccomment'),
            (r'/\*', Comment.Multiline, 'comment'),

            # Macro parameters
            (r"""\$([a-zA-Z_]\w*|\(,?|\),?|,?)""", Comment.Preproc),
            # Keywords
            (words(('as', 'async', 'await', 'box', 'const', 'crate', 'dyn',
                    'else', 'extern', 'for', 'if', 'impl', 'in', 'loop',
                    'match', 'move', 'mut', 'pub', 'ref', 'return', 'static',
                    'super', 'trait', 'unsafe', 'use', 'where', 'while'),
                   suffix=r'\b'), Keyword),
            (words(('abstract', 'become', 'do', 'final', 'macro', 'override',
                    'priv', 'typeof', 'try', 'unsized', 'virtual', 'yield'),
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false)\b', Keyword.Constant),
            (r'self\b', Name.Builtin.Pseudo),
            (r'mod\b', Keyword, 'modname'),
            (r'let\b', Keyword.Declaration),
            (r'fn\b', Keyword, 'funcname'),
            (r'(struct|enum|type|union)\b', Keyword, 'typename'),
            (r'(default)(\s+)(type|fn)\b', bygroups(Keyword, Text, Keyword)),
            keyword_types,
            (r'[sS]elf\b', Name.Builtin.Pseudo),
            # Prelude (taken from Rust's src/libstd/prelude.rs)
            builtin_funcs_types,
            builtin_macros,
            # Path seperators, so types don't catch them.
            (r'::\b', Text),
            # Types in positions.
            (r'(?::|->)', Text, 'typename'),
            # Labels
            (r'(break|continue)(\b\s*)(\'[A-Za-z_]\w*)?',
             bygroups(Keyword, Text.Whitespace, Name.Label)),

            # Character literals
            (r"""'(\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0"""
             r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", String.Char),
            (r"""b'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\0"""
             r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", String.Char),

            # Binary literals
            (r'0b[01_]+', Number.Bin, 'number_lit'),
            # Octal literals
            (r'0o[0-7_]+', Number.Oct, 'number_lit'),
            # Hexadecimal literals
            (r'0[xX][0-9a-fA-F_]+', Number.Hex, 'number_lit'),
            # Decimal literals
            (r'[0-9][0-9_]*(\.[0-9_]+[eE][+\-]?[0-9_]+|'
             r'\.[0-9_]*(?!\.)|[eE][+\-]?[0-9_]+)', Number.Float, 'number_lit'
             ),
            (r'[0-9][0-9_]*', Number.Integer, 'number_lit'),

            # String literals
            (r'b"', String, 'bytestring'),
            (r'"', String, 'string'),
            (r'(?s)b?r(#*)".*?"\1', String),

            # Lifetime names
            (r"'", Operator, 'lifetime'),

            # Operators and Punctuation
            (r'\.\.=?', Operator),
            (r'[{}()\[\],.;]', Punctuation),
            (r'[+\-*/%&|<>^!~@=:?]', Operator),

            # Identifiers
            (r'[a-zA-Z_]\w*', Name),
            # Raw identifiers
            (r'r#[a-zA-Z_]\w*', Name),

            # Attributes
            (r'#!?\[', Comment.Preproc, 'attribute['),

            # Misc
            # Lone hashes: not used in Rust syntax, but allowed in macro
            # arguments, most famously for quote::quote!()
            (r'#', Text),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'doccomment': [
            (r'[^*/]+', String.Doc),
            (r'/\*', String.Doc, '#push'),
            (r'\*/', String.Doc, '#pop'),
            (r'[*/]', String.Doc),
        ],
        'modname': [
            (r'\s+', Text),
            (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'funcname': [
            (r'\s+', Text),
            (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
            default('#pop'),
        ],
        'typename': [
            (r'\s+', Text),
            (r'&', Keyword.Pseudo),
            (r"'", Operator, 'lifetime'),
            builtin_funcs_types,
            keyword_types,
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'lifetime': [
            (r"(static|_)", Name.Builtin),
            (r"[a-zA-Z_]+\w*", Name.Attribute),
            default('#pop'),
        ],
        'number_lit': [
            (r'[ui](8|16|32|64|size)', Keyword, '#pop'),
            (r'f(32|64)', Keyword, '#pop'),
            default('#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r"""\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0"""
             r"""|\\u\{[0-9a-fA-F]{1,6}\}""", String.Escape),
            (r'[^\\"]+', String),
            (r'\\', String),
        ],
        'bytestring': [
            (r"""\\x[89a-fA-F][0-9a-fA-F]""", String.Escape),
            include('string'),
        ],
        'attribute_common': [
            (r'"', String, 'string'),
            (r'\[', Comment.Preproc, 'attribute['),
        ],
        'attribute[': [
            include('attribute_common'),
            (r'\]', Comment.Preproc, '#pop'),
            (r'[^"\]\[]+', Comment.Preproc),
        ],
    }
Beispiel #15
0
class DolmenLexerLexer(RegexLexer):
    """Pygment custom lexer for Dolmen lexer descriptions"""

    # Properties for inclusion in the local Pygments distribution
    name = "DolmenLexer"
    aliases = ['dolmenlexer', 'jl']
    filenames = ['*.jl']

    # We want '.' to include newlines, and \b as well
    flags = re.MULTILINE | re.DOTALL

    # Auxiliary reg. exps and keywords lists
    jl_keywords = ("as", "eof", "import", "orelse", "private", "public",
                   "rule", "shortest", "static")

    # Lexer rules
    tokens = {
        'root': [
            # Comments
            include('_comments'),
            # Java actions
            (r'{', Generic, 'action'),
            # Literals
            (r'"', String, 'string'),
            (r'\'', String.Char, 'character'),
            (r'[0-9]+', Literal.Number),
            # Keywords and identifiers
            (words(jl_keywords, prefix=r'\b', suffix=r'\b'), Keyword),
            (r'[_a-zA-Z][_a-zA-Z0-9]*', Name.Function),
            # Operators
            (r'\+', Operator),
            (r'\*', Operator),
            (r'\?', Operator),
            (r'\^', Operator),
            (r'\|', Operator),
            (r'\#', Operator),
            (r'-', Operator),
            (r'_', Operator),
            (r'=', Operator),
            # Punctuation
            (r'\(', Punctuation),
            (r'\)', Punctuation),
            (r'\[', Punctuation),
            (r'\]', Punctuation),
            (r'<', Punctuation),
            (r'>', Punctuation),
            (r',', Punctuation),
            (r';', Punctuation),
            # Catch-all rule
            (r'.', Text)
        ],
        'action': [
            (r'}', Generic, '#pop'),
            (r'{', Generic, '#push'),
            # Comments
            include('_java_comments'),
            # Java keywords
            (words(java_keywords, prefix=r'\b', suffix=r'\b'), Generic.Emph),
            # Java Literals
            (r'"', Generic.Strong, 'java_string'),
            (r'\'', Generic.Strong, 'java_character'),
            # Catch-all rule
            (r'.', Generic)
        ],
        'string': [(r'"', String, '#pop'), (r'[^"\\]', String),
                   (r'\\', String, 'escapeSequence')],
        'character':
        [(r'[^\'\\]', String.Char, ('#pop', 'endCharacter')),
         (r'\\', String.Char, ('#pop', 'endCharacter', 'escapeSequence'))],
        'endCharacter': [(r'\'', String.Char, '#pop')],
        'escapeSequence': [(r'\\|\'|"|r|n|b|t|f', String, '#pop'),
                           (r'[0-9]{3}', String, '#pop'),
                           (r'u+[0-9a-fA-F]{4}', String, '#pop')],

        # A state only used for sharing comments between partitions
        '_comments': [(r'/\*', Comment.Multiline, 'mlcomment'),
                      (r'//.*?$', Comment.Singleline)],
        'mlcomment': [(r'\*/', Comment.Multiline, '#pop'),
                      (r'[^*]', Comment.Multiline),
                      (r'\*', Comment.Multiline)],
        'java_string': [(r'"', Generic.Strong, '#pop'),
                        (r'[^"\\]', Generic.Strong),
                        (r'\\', Generic.Strong, 'java_escapeSequence')],
        'java_character':
        [(r'[^\'\\]', Generic.Strong, ('#pop', 'java_endCharacter')),
         (r'\\', Generic.Strong, ('#pop', 'java_endCharacter',
                                  'java_escapeSequence'))],
        'java_endCharacter': [(r'\'', Generic.Strong, '#pop')],
        'java_escapeSequence': [(r'\\|\'|"|r|n|b|t|f', Generic.Strong, '#pop'),
                                (r'[0-9]{3}', Generic.Strong, '#pop'),
                                (r'u+[0-9a-fA-F]{4}', Generic.Strong, '#pop')],

        # A state only used for sharing comments between partitions
        '_java_comments': [(r'/\*', Generic.Deleted, 'java_mlcomment'),
                           (r'//.*?$', Generic.Deleted)],
        'java_mlcomment': [(r'\*/', Generic.Deleted, '#pop'),
                           (r'[^*]', Generic.Deleted),
                           (r'\*', Generic.Deleted)]
    }
Beispiel #16
0
class LPyLexer(PythonLexer):
    """
    Lexer for LPy language.
    """
    name = 'LPy'
    aliases = [
        'lpy',
        'Lpy',
        'LPy',
        'l-py',
        'L-py',
        'L-Py',
    ]
    filenames = ['*.lpy']
    mimetypes = ['text/x-python', 'application/x-python']

    def module_callback(lexer, match):
        """
        Permit to detect and stock words after special words "axiom" and "module".
        This words are then colourized like other keywords.
        """
        possible_words = match.group().split(" ")
        for possible_word in possible_words:
            w = possible_word.split("(")[0]
            if w is not u"":
                # Stock "lpy modules"
                lexer.lpy_modules.append(w)
        # Colourize words after "axiom" and "module" in the same line.
        yield match.start(), Keyword, match.group()

    tokens = {
        'root': [include('lpykeywords'), inherit],
        'lpykeywords': [
            (r'(^Axiom|^module)', Generic.Subheading, 'module'),
            (r'(^derivation length|-->|-static->|decomposition|'
             r'^production|produce|homomorphism|^interpretation|group|'
             r'^endlsystem|endgroup|maximum depth|nproduce|nsproduce|'
             r'makestring|consider|ignore|forward|backward|isForward|'
             r'StartEach|EndEach|Start|End|getGroup|useGroup|getIterationNb|'
             r'module|@static|lpyimport)', Generic.Subheading),
        ],
        'module': [
            (r'(\w*)(\(.*\))', module_callback),
            (r'( )(\w*)( |$)', module_callback),
            (r'(:| )', Text),
        ]
    }

    def __init__(self, **options):
        super(LPyLexer, self).__init__(**options)
        # Add list to stock "lpy modules"
        self.lpy_modules = list()

    def get_tokens_unprocessed(self, text):
        for index, token, value in PythonLexer.get_tokens_unprocessed(
                self, text):
            if token is Name and value in self.lpy_modules:
                # Colourize previously detected modules
                yield index, Keyword, value
            else:
                yield index, token, value
Beispiel #17
0
class CPSLFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)',
             bygroups(using(this), Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)',
             bygroups(using(this), Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/#@-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(('asm', 'auto', 'break', 'case', 'const', 'continue',
                    'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                    'if', 'register', 'restricted', 'return', 'sizeof',
                    'static', 'struct', 'switch', 'typedef', 'union',
                    'volatile', 'while', 'assert', 'ASSERT', 'next_event',
                    'NEXT_EVENT', 'always', 'ALWAYS', 'next', 'NEXT', 'until',
                    'UNTIL', 'before', 'BEFORE', 'abort', 'ABORT', 'SET', 'GET',
                    'START', 'END'),
                   suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'), suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words((
                'asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl',
                'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try',
                'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop',
                'identifier', 'forceinline', 'assume'),
                prefix=r'__', suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
             r'([a-zA-Z_]\w*)'             # method name
             r'(\s*\([^;]*?\))'            # signature
             r'([^;{]*)(\{)',
             bygroups(using(this), Name.Function, using(this), using(this),
                      Punctuation),
             'function'),
            # function declarations
            (r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
             r'([a-zA-Z_]\w*)'             # method name
             r'(\s*\([^;]*?\))'            # signature
             r'([^;]*)(;)',
             bygroups(using(this), Name.Function, using(this), using(this),
                      Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')([^\n]+)',
             bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = set((
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t',
        'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t',
        'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'))
    c99_types = set((
        '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t',
        'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t',
        'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t',
        'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
        'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t',
        'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'))
    linux_types = set((
        'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t',
        'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t',
        'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t'))

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Beispiel #18
0
class DevicetreeLexer(RegexLexer):
    """
    Lexer for Devicetree files.

    .. versionadded:: 2.7
    """

    name = 'Devicetree'
    url = 'https://www.devicetree.org/'
    aliases = ['devicetree', 'dts']
    filenames = ['*.dts', '*.dtsi']
    mimetypes = ['text/x-c']

    #: optional Whitespace or /*...*/ style comment
    _ws = r'\s*(?:/[*][^*/]*?[*]/\s*)*'

    tokens = {
        'macro': [
            # Include preprocessor directives (C style):
            (r'(#include)(' + _ws + r')([^\n]+)',
             bygroups(Comment.Preproc, Comment.Multiline,
                      Comment.PreprocFile)),
            # Define preprocessor directives (C style):
            (r'(#define)(' + _ws + r')([^\n]+)',
             bygroups(Comment.Preproc, Comment.Multiline, Comment.Preproc)),
            # devicetree style with file:
            (r'(/[^*/{]+/)(' + _ws + r')("[^\n{]+")',
             bygroups(Comment.Preproc, Comment.Multiline,
                      Comment.PreprocFile)),
            # devicetree style with property:
            (r'(/[^*/{]+/)(' + _ws + r')([^\n;{]*)([;]?)',
             bygroups(Comment.Preproc, Comment.Multiline, Comment.Preproc,
                      Punctuation)),
        ],
        'whitespace': [
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimiter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'([^\s{}/*]*)(\s*)(:)', bygroups(Name.Label, Text,
                                               Punctuation), '#pop'),
            (words(('compatible', 'model', 'phandle', 'status',
                    '#address-cells', '#size-cells', 'reg', 'virtual-reg',
                    'ranges', 'dma-ranges', 'device_type', 'name'),
                   suffix=r'\b'), Keyword.Reserved),
            (r'([~!%^&*+=|?:<>/#-])', Operator),
            (r'[()\[\]{},.]', Punctuation),
            (r'[a-zA-Z_][\w-]*(?=(?:\s*,\s*[a-zA-Z_][\w-]*|(?:' + _ws +
             r'))*\s*[=;])', Name),
            (r'[a-zA-Z_]\w*', Name.Attribute),
        ],
        'root': [
            include('whitespace'),
            include('macro'),

            # Nodes
            (r'([^/*@\s&]+|/)(@?)((?:0x)?[0-9a-fA-F,]*)(' + _ws + r')(\{)',
             bygroups(Name.Function, Operator, Number.Integer,
                      Comment.Multiline, Punctuation), 'node'),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation, '#pop'),
        ],
        'node': [
            include('whitespace'),
            include('macro'),
            (r'([^/*@\s&]+|/)(@?)((?:0x)?[0-9a-fA-F,]*)(' + _ws + r')(\{)',
             bygroups(Name.Function, Operator, Number.Integer,
                      Comment.Multiline, Punctuation), '#push'),
            include('statements'),
            (r'\};', Punctuation, '#pop'),
            (';', Punctuation),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
    }
Beispiel #19
0
class NixLexer(RegexLexer):
    """
    For the `Nix language <http://nixos.org/nix/>`_.

    .. versionadded:: 2.0
    """

    name = 'Nix'
    aliases = ['nixos', 'nix']
    filenames = ['*.nix']
    mimetypes = ['text/x-nix']

    flags = re.MULTILINE | re.UNICODE

    keywords = [
        'rec', 'with', 'let', 'in', 'inherit', 'assert', 'if', 'else', 'then',
        '...'
    ]
    builtins = [
        'import', 'abort', 'baseNameOf', 'dirOf', 'isNull', 'builtins', 'map',
        'removeAttrs', 'throw', 'toString', 'derivation'
    ]
    operators = [
        '++', '+', '?', '.', '!', '//', '==', '!=', '&&', '||', '->', '='
    ]

    punctuations = ["(", ")", "[", "]", ";", "{", "}", ":", ",", "@"]

    tokens = {
        'root': [
            # comments starting with #
            (r'#.*$', Comment.Single),

            # multiline comments
            (r'/\*', Comment.Multiline, 'comment'),

            # whitespace
            (r'\s+', Text),

            # keywords
            ('(%s)' % '|'.join(re.escape(entry) + '\\b'
                               for entry in keywords), Keyword),

            # highlight the builtins
            ('(%s)' % '|'.join(re.escape(entry) + '\\b'
                               for entry in builtins), Name.Builtin),
            (r'\b(true|false|null)\b', Name.Constant),

            # operators
            ('(%s)' % '|'.join(re.escape(entry)
                               for entry in operators), Operator),

            # word operators
            (r'\b(or|and)\b', Operator.Word),

            # punctuations
            ('(%s)' % '|'.join(re.escape(entry)
                               for entry in punctuations), Punctuation),

            # integers
            (r'[0-9]+', Number.Integer),

            # strings
            (r'"', String.Double, 'doublequote'),
            (r"''", String.Single, 'singlequote'),

            # paths
            (r'[\w.+-]*(\/[\w.+-]+)+', Literal),
            (r'\<[\w.+-]+(\/[\w.+-]+)*\>', Literal),

            # urls
            (r'[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[\w%/?:@&=+$,\\.!~*\'-]+', Literal),

            # names of variables
            (r'[\w-]+\s*=', String.Symbol),
            (r'[a-zA-Z_][\w\'-]*', Text),
        ],
        'comment': [
            (r'[^/*]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'singlequote': [
            (r"'''", String.Escape),
            (r"''\$\{", String.Escape),
            (r"''\n", String.Escape),
            (r"''\r", String.Escape),
            (r"''\t", String.Escape),
            (r"''", String.Single, '#pop'),
            (r'\$\{', String.Interpol, 'antiquote'),
            (r"[^']", String.Single),
        ],
        'doublequote': [
            (r'\\', String.Escape),
            (r'\\"', String.Escape),
            (r'\\$\{', String.Escape),
            (r'"', String.Double, '#pop'),
            (r'\$\{', String.Interpol, 'antiquote'),
            (r'[^"]', String.Double),
        ],
        'antiquote': [
            (r"\}", String.Interpol, '#pop'),
            # TODO: we should probably escape also here ''${ \${
            (r"\$\{", String.Interpol, '#push'),
            include('root'),
        ],
    }

    def analyse_text(text):
        rv = 0.0
        # TODO: let/in
        if re.search(r'import.+?<[^>]+>', text):
            rv += 0.4
        if re.search(r'mkDerivation\s+(\(|\{|rec)', text):
            rv += 0.4
        if re.search(r'=\s+mkIf\s+', text):
            rv += 0.4
        if re.search(r'\{[a-zA-Z,\s]+\}:', text):
            rv += 0.1
        return rv
Beispiel #20
0
class MaximaLexer(RegexLexer):
    """
    For `Maxima`_ source code.

    .. versionadded:: 1.2
    """
    name = 'Maxima'
    aliases = ['maxima']
    filenames = ['*.mac']
    mimetypes = ['text/x-maxima']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
        ],
        'statements': [
            # simple string (TeX friendly)
            (r'"(\\\\|\\"|[^"])*"', String),
            # C style string (with character escapes)
            (r"'", String, 'string'),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=#|?:<>/-]', Operator),
            (r'[()\[\]\{\},.;$]', Punctuation),
            (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
            (r'(and|do|else|elseif|false|for|if|in|not|or|step|then|thru|'
             r'true|while)\b', Keyword),
            # Since an asy-type-name can be also an asy-function-name,
            # in the following we test if the string "  [a-zA-Z]" follows
            # the Keyword.Type.
            # Of course it is not perfect !
            (r'(abasep|abs|absint|absolute_real_time|acos|acosh|acot|acoth|'
             r'acsc|acsch|activate|addcol|add_edge|add_edges|addmatrices|addrow|'
             r'add_vertex|add_vertices|adjacency_matrix|adjoin|adjoint|af|agd|airy_ai|'
             r'airy_bi|airy_dai|airy_dbi|algsys|alg_type|alias|allroots|alphacharp|'
             r'alphanumericp|antid|antidiff|AntiDifference|append|appendfile|apply|apply1|'
             r'apply2|applyb1|apropos|args|arithmetic|arithsum|array|arrayapply|'
             r'arrayinfo|arraymake|ascii|asec|asech|asin|asinh|askinteger|'
             r'asksign|assoc|assoc_legendre_p|assoc_legendre_q|assume|asympa|at|atan|'
             r'atan2|atanh|atensimp|atom|atvalue|augcoefmatrix|augmented_lagrangian_method|av|'
             r'average_degree|backtrace|barsplot|bashindices|batch|batchload|bc2|bdvac|'
             r'belln|bern|bernpoly|bessel|bessel_i|bessel_j|bessel_k|bessel_y|'
             r'beta|bezout|bffac|bfhzeta|bfloat|bfloatp|bfpsi|bfpsi0|'
             r'bfzeta|biconected_components|bimetric|binomial|bipartition|block|blockmatrixp|bode_gain|'
             r'bode_phase|bothcoef|box|boxplot|break|bug_report|build_info|buildq|'
             r'burn|cabs|canform|canten|cardinality|carg|cartan|cartesian_product|'
             r'catch|cbffac|cdf_bernoulli|cdf_beta|cdf_binomial|cdf_cauchy|cdf_chi2|cdf_continuous_uniform|'
             r'cdf_discrete_uniform|cdf_exp|cdf_f|cdf_gamma|cdf_geometric|cdf_gumbel|cdf_hypergeometric|cdf_laplace|'
             r'cdf_logistic|cdf_lognormal|cdf_negative_binomial|cdf_normal|cdf_pareto|cdf_poisson|cdf_rank_sum|cdf_rayleigh|'
             r'cdf_signed_rank|cdf_student_t|cdf_weibull|cdisplay|ceiling|central_moment|cequal|cequalignore|'
             r'cf|cfdisrep|cfexpand|cgeodesic|cgreaterp|cgreaterpignore|changename|changevar|'
             r'chaosgame|charat|charfun|charfun2|charlist|charp|charpoly|chebyshev_t|'
             r'chebyshev_u|checkdiv|check_overlaps|cholesky|christof|chromatic_index|chromatic_number|cint|'
             r'circulant_graph|clear_edge_weight|clear_rules|clear_vertex_label|clebsch_graph|clessp|clesspignore|close|'
             r'closefile|cmetric|coeff|coefmatrix|cograd|col|collapse|collectterms|'
             r'columnop|columnspace|columnswap|columnvector|combination|combine|comp2pui|compare|'
             r'compfile|compile|compile_file|complement_graph|complete_bipartite_graph|complete_graph|components|concan|'
             r'concat|conjugate|conmetderiv|connected_components|connect_vertices|cons|constantp|constituent|'
             r'cont2part|content|continuous_freq|contortion|contour_plot|contract|contract_edge|contragrad|'
             r'contrib_ode|convert|coord|copy|copy_graph|copylist|copymatrix|cor|'
             r'cos|cosh|cot|coth|cov|cov1|covdiff|covect|'
             r'covers|create_graph|create_list|csc|csch|csetup|cspline|ctaylor|'
             r'ct_coordsys|ctransform|ctranspose|cube_graph|cunlisp|cv|cycle_digraph|cycle_graph|'
             r'dblint|deactivate|declare|declare_translated|declare_weight|decsym|defcon|define|'
             r'define_variable|defint|defmatch|defrule|deftaylor|degree_sequence|del|delete|'
             r'deleten|delta|demo|demoivre|denom|depends|derivdegree|derivlist|'
             r'describe|desolve|determinant|dgauss_a|dgauss_b|dgeev|dgesvd|diag|'
             r'diagmatrix|diag_matrix|diagmatrixp|diameter|diff|digitcharp|dimacs_export|dimacs_import|'
             r'dimension|direct|discrete_freq|disjoin|disjointp|disolate|disp|dispcon|'
             r'dispform|dispfun|dispJordan|display|disprule|dispterms|distrib|divide|'
             r'divisors|divsum|dkummer_m|dkummer_u|dlange|dodecahedron_graph|dotproduct|dotsimp|'
             r'dpart|draw|draw2d|draw3d|draw_graph|dscalar|echelon|edge_coloring|'
             r'edges|eigens_by_jacobi|eigenvalues|eigenvectors|eighth|einstein|eivals|eivects|'
             r'elapsed_real_time|elapsed_run_time|ele2comp|ele2polynome|ele2pui|elem|elementp|eliminate|'
             r'elliptic_e|elliptic_ec|elliptic_eu|elliptic_f|elliptic_kc|elliptic_pi|ematrix|empty_graph|'
             r'emptyp|endcons|entermatrix|entertensor|entier|equal|equalp|equiv_classes|'
             r'erf|errcatch|error|errormsg|euler|ev|eval_string|evenp|'
             r'every|evolution|evolution2d|evundiff|example|exp|expand|expandwrt|'
             r'expandwrt_factored|explose|exponentialize|express|expt|exsec|extdiff|extract_linear_equations|'
             r'extremal_subset|ezgcd|f90|facsum|factcomb|factor|factorfacsum|factorial|'
             r'factorout|factorsum|facts|fast_central_elements|fast_linsolve|fasttimes|featurep|fft|'
             r'fib|fibtophi|fifth|filename_merge|file_search|file_type|fillarray|findde|'
             r'find_root|first|fix|flatten|flength|float|floatnump|floor|'
             r'flower_snark|flush|flush1deriv|flushd|flushnd|forget|fortran|fourcos|'
             r'fourexpand|fourier|fourint|fourintcos|fourintsin|foursimp|foursin|fourth|'
             r'fposition|frame_bracket|freeof|freshline|from_adjacency_matrix|frucht_graph|full_listify|fullmap|'
             r'fullmapl|fullratsimp|fullratsubst|fullsetify|funcsolve|fundef|funmake|funp|'
             r'gamma|gauss_a|gauss_b|gaussprob|gcd|gcdex|gcdivide|gcfac|'
             r'gcfactor|gd|genfact|gen_laguerre|genmatrix|geometric|geometric_mean|geosum|'
             r'get|get_edge_weight|get_lu_factors|get_pixel|get_vertex_label|gfactor|gfactorsum|ggf|'
             r'girth|global_variances|gnuplot_close|gnuplot_replot|gnuplot_reset|gnuplot_restart|gnuplot_start|go|'
             r'Gosper|GosperSum|gradef|gramschmidt|graph6_decode|graph6_encode|graph6_export|graph6_import|'
             r'graph_center|graph_charpoly|graph_eigenvalues|graph_order|graph_periphery|graph_product|graph_size|graph_union|'
             r'grid_graph|grind|grobner_basis|grotzch_graph|hamilton_cycle|hamilton_path|hankel|harmonic|'
             r'harmonic_mean|hav|heawood_graph|hermite|hessian|hilbert_matrix|hipow|histogram|'
             r'hodge|horner|ic1|ic2|ic_convert|ichr1|ichr2|icosahedron_graph|'
             r'icurvature|ident|identfor|identity|idiff|idim|idummy|ieqn|'
             r'ifactors|iframes|ifs|ift|igeodesic_coords|ilt|imagpart|imetric|'
             r'implicit_derivative|implicit_plot|indexed_tensor|indices|induced_subgraph|inferencep|inference_result|infix|'
             r'init_atensor|init_ctensor|in_neighbors|innerproduct|inpart|inprod|inrt|integerp|'
             r'integer_partitions|integrate|intersect|intersection|intervalp|intopois|intosum|invariant1|'
             r'invariant2|inverse_jacobi_cd|inverse_jacobi_cn|inverse_jacobi_cs|inverse_jacobi_dc|inverse_jacobi_dn|inverse_jacobi_ds|inverse_jacobi_nc|'
             r'inverse_jacobi_nd|inverse_jacobi_ns|inverse_jacobi_sc|inverse_jacobi_sd|inverse_jacobi_sn|invert|invert_by_lu|inv_mod|'
             r'is|is_biconnected|is_bipartite|is_connected|is_digraph|is_edge_in_graph|is_graph|is_graph_or_digraph|'
             r'ishow|is_isomorphic|isolate|isomorphism|is_planar|isqrt|is_sconnected|is_tree|'
             r'is_vertex_in_graph|items_inference|jacobi|jacobian|jacobi_cd|jacobi_cn|jacobi_cs|jacobi_dc|'
             r'jacobi_dn|jacobi_ds|jacobi_nc|jacobi_nd|jacobi_ns|jacobi_p|jacobi_sc|jacobi_sd|'
             r'jacobi_sn|JF|join|jordan|julia|kdels|kdelta|kill|'
             r'killcontext|kostka|kron_delta|kronecker_product|kummer_m|kummer_u|kurtosis|kurtosis_bernoulli|'
             r'kurtosis_beta|kurtosis_binomial|kurtosis_chi2|kurtosis_continuous_uniform|kurtosis_discrete_uniform|kurtosis_exp|kurtosis_f|kurtosis_gamma|'
             r'kurtosis_geometric|kurtosis_gumbel|kurtosis_hypergeometric|kurtosis_laplace|kurtosis_logistic|kurtosis_lognormal|kurtosis_negative_binomial|kurtosis_normal|'
             r'kurtosis_pareto|kurtosis_poisson|kurtosis_rayleigh|kurtosis_student_t|kurtosis_weibull|labels|lagrange|laguerre|'
             r'lambda|laplace|laplacian_matrix|last|lbfgs|lc2kdt|lcharp|lc_l|'
             r'lcm|lc_u|ldefint|ldisp|ldisplay|legendre_p|legendre_q|leinstein|'
             r'length|let|letrules|letsimp|levi_civita|lfreeof|lgtreillis|lhs|'
             r'li|liediff|limit|Lindstedt|linear|linearinterpol|linear_program|line_graph|'
             r'linsolve|listarray|list_correlations|listify|list_nc_monomials|listoftens|listofvars|listp|'
             r'lmax|lmin|load|loadfile|local|locate_matrix_entry|log|logand|'
             r'logarc|logcontract|logor|logxor|lopow|lorentz_gauge|lowercasep|lpart|'
             r'lratsubst|lreduce|lriemann|lsquares_estimates|lsquares_estimates_approximate|lsquares_estimates_exact|lsquares_mse|lsquares_residual_mse|'
             r'lsquares_residuals|lsum|ltreillis|lu_backsub|lu_factor|macroexpand|macroexpand1|make_array|'
             r'makebox|makefact|makegamma|make_level_picture|makelist|makeOrders|make_poly_continent|make_poly_country|'
             r'make_polygon|make_random_state|make_rgb_picture|makeset|make_transform|mandelbrot|map|mapatom|'
             r'maplist|matchdeclare|matchfix|mat_cond|mat_fullunblocker|mat_function|mat_norm|matrix|'
             r'matrixmap|matrixp|matrix_size|mattrace|mat_trace|mat_unblocker|max|max_clique|'
             r'max_degree|max_flow|maxi|maximize_lp|max_independent_set|max_matching|maybe|mean|'
             r'mean_bernoulli|mean_beta|mean_binomial|mean_chi2|mean_continuous_uniform|mean_deviation|mean_discrete_uniform|mean_exp|'
             r'mean_f|mean_gamma|mean_geometric|mean_gumbel|mean_hypergeometric|mean_laplace|mean_logistic|mean_lognormal|'
             r'mean_negative_binomial|mean_normal|mean_pareto|mean_poisson|mean_rayleigh|mean_student_t|mean_weibull|median|'
             r'median_deviation|member|metricexpandall|min|min_degree|minfactorial|mini|minimalPoly|'
             r'minimize_lp|minimum_spanning_tree|minor|mnewton|mod|mode_declare|mode_identity|ModeMatrix|'
             r'moebius|mon2schur|mono|monomial_dimensions|multi_elem|multinomial|multinomial_coeff|multi_orbit|'
             r'multi_pui|multsym|multthru|mycielski_graph|nary|nc_degree|ncexpt|ncharpoly|'
             r'negative_picture|neighbors|newcontext|newdet|new_graph|newline|newton|next_prime|'
             r'niceindices|ninth|noncentral_moment|nonmetricity|nonnegintegerp|nonscalarp|nonzeroandfreeof|notequal|'
             r'nounify|nptetrad|nroots|nterms|ntermst|nthroot|nullity|nullspace|'
             r'num|numbered_boundaries|numberp|num_distinct_partitions|numerval|numfactor|num_partitions|nusum|'
             r'odd_girth|oddp|ode2|ode_check|odelin|op|opena|openr|'
             r'openw|operatorp|opsubst|optimize|orbit|orbits|ordergreat|ordergreatp|'
             r'orderless|orderlessp|orthogonal_complement|orthopoly_recur|orthopoly_weight|outermap|out_neighbors|outofpois|'
             r'pade|parGosper|parse_string|part|part2cont|partfrac|partition|partition_set|'
             r'partpol|path_digraph|path_graph|pdf_bernoulli|pdf_beta|pdf_binomial|pdf_cauchy|pdf_chi2|'
             r'pdf_continuous_uniform|pdf_discrete_uniform|pdf_exp|pdf_f|pdf_gamma|pdf_geometric|pdf_gumbel|pdf_hypergeometric|'
             r'pdf_laplace|pdf_logistic|pdf_lognormal|pdf_negative_binomial|pdf_normal|pdf_pareto|pdf_poisson|pdf_rank_sum|'
             r'pdf_rayleigh|pdf_signed_rank|pdf_student_t|pdf_weibull|pearson_skewness|permanent|permut|permutation|'
             r'permutations|petersen_graph|petrov|pickapart|picture_equalp|picturep|piechart|planar_embedding|'
             r'playback|plog|plot2d|plot3d|plotdf|plsquares|pochhammer|poisdiff|'
             r'poisexpt|poisint|poismap|poisplus|poissimp|poissubst|poistimes|poistrim|'
             r'polarform|polartorect|poly_add|poly_buchberger|poly_buchberger_criterion|poly_colon_ideal|poly_content|polydecomp|'
             r'poly_depends_p|poly_elimination_ideal|poly_exact_divide|poly_expand|poly_expt|poly_gcd|poly_grobner|poly_grobner_equal|'
             r'poly_grobner_member|poly_grobner_subsetp|poly_ideal_intersection|poly_ideal_polysaturation|poly_ideal_polysaturation1|poly_ideal_saturation|poly_ideal_saturation1|poly_lcm|'
             r'poly_minimization|polymod|poly_multiply|polynome2ele|polynomialp|poly_normal_form|poly_normalize|poly_normalize_list|'
             r'poly_polysaturation_extension|poly_primitive_part|poly_pseudo_divide|poly_reduced_grobner|poly_reduction|poly_saturation_extension|poly_s_polynomial|poly_subtract|'
             r'polytocompanion|potential|power_mod|powers|powerseries|powerset|prev_prime|primep|'
             r'print|printf|print_graph|printpois|printprops|prodrac|product|properties|'
             r'propvars|psi|ptriangularize|pui|pui2comp|pui2ele|pui2polynome|pui_direct|'
             r'puireduc|put|qput|qrange|quad_qag|quad_qagi|quad_qags|quad_qawc|'
             r'quad_qawf|quad_qawo|quad_qaws|quantile|quantile_bernoulli|quantile_beta|quantile_binomial|quantile_cauchy|'
             r'quantile_chi2|quantile_continuous_uniform|quantile_discrete_uniform|quantile_exp|quantile_f|quantile_gamma|quantile_geometric|quantile_gumbel|'
             r'quantile_hypergeometric|quantile_laplace|quantile_logistic|quantile_lognormal|quantile_negative_binomial|quantile_normal|quantile_pareto|quantile_poisson|'
             r'quantile_rayleigh|quantile_student_t|quantile_weibull|quartile_skewness|quit|qunit|quotient|radcan|'
             r'radius|random|random_bernoulli|random_beta|random_binomial|random_cauchy|random_chi2|random_continuous_uniform|'
             r'random_digraph|random_discrete_uniform|random_exp|random_f|random_gamma|random_geometric|random_graph|random_graph1|'
             r'random_gumbel|random_hypergeometric|random_laplace|random_logistic|random_lognormal|random_negative_binomial|random_network|random_normal|'
             r'random_pareto|random_permutation|random_poisson|random_rayleigh|random_regular_graph|random_student_t|random_tournament|random_tree|'
             r'random_weibull|range|rank|rat|ratcoef|ratdenom|ratdiff|ratdisrep|'
             r'ratexpand|rational|rationalize|ratnumer|ratnump|ratp|ratsimp|ratsubst|'
             r'ratvars|ratweight|read|read_hashed_array|readline|read_lisp_array|read_list|read_matrix|'
             r'read_maxima_array|read_nested_list|readonly|read_xpm|realpart|realroots|rearray|rectform|'
             r'recttopolar|rediff|reduce_consts|reduce_order|region_boundaries|rem|remainder|remarray|'
             r'rembox|remcomps|remcon|remcoord|remfun|remfunction|remlet|remove|'
             r'remove_edge|remove_vertex|rempart|remrule|remsym|remvalue|rename|reset|'
             r'residue|resolvante|resolvante_alternee1|resolvante_bipartite|resolvante_diedrale|resolvante_klein|resolvante_klein3|resolvante_produit_sym|'
             r'resolvante_unitaire|resolvante_vierer|rest|resultant|return|reveal|reverse|revert|'
             r'revert2|rgb2level|rhs|ricci|riemann|rinvariant|risch|rk|'
             r'rncombine|romberg|room|rootscontract|row|rowop|rowswap|rreduce|'
             r'run_testsuite|save|scalarp|scaled_bessel_i|scaled_bessel_i0|scaled_bessel_i1|scalefactors|scanmap|'
             r'scatterplot|schur2comp|sconcat|scopy|scsimp|scurvature|sdowncase|sec|'
             r'sech|second|sequal|sequalignore|setdifference|set_edge_weight|setelmx|setequalp|'
             r'setify|setp|set_partitions|set_plot_option|set_random_state|setunits|setup_autoload|set_up_dot_simplifications|'
             r'set_vertex_label|seventh|sexplode|sf|shortest_path|show|showcomps|showratvars|'
             r'sign|signum|similaritytransform|simple_linear_regression|simplify_sum|simplode|simpmetderiv|simtran|'
             r'sin|sinh|sinsert|sinvertcase|sixth|skewness|skewness_bernoulli|skewness_beta|'
             r'skewness_binomial|skewness_chi2|skewness_continuous_uniform|skewness_discrete_uniform|skewness_exp|skewness_f|skewness_gamma|skewness_geometric|'
             r'skewness_gumbel|skewness_hypergeometric|skewness_laplace|skewness_logistic|skewness_lognormal|skewness_negative_binomial|skewness_normal|skewness_pareto|'
             r'skewness_poisson|skewness_rayleigh|skewness_student_t|skewness_weibull|slength|smake|smismatch|solve|'
             r'solve_rec|solve_rec_rat|some|somrac|sort|sparse6_decode|sparse6_encode|sparse6_export|'
             r'sparse6_import|specint|spherical_bessel_j|spherical_bessel_y|spherical_hankel1|spherical_hankel2|spherical_harmonic|splice|'
             r'split|sposition|sprint|sqfr|sqrt|sqrtdenest|sremove|sremovefirst|'
             r'sreverse|ssearch|ssort|sstatus|ssubst|ssubstfirst|staircase|status|'
             r'std|std1|std_bernoulli|std_beta|std_binomial|std_chi2|std_continuous_uniform|std_discrete_uniform|'
             r'std_exp|std_f|std_gamma|std_geometric|std_gumbel|std_hypergeometric|std_laplace|std_logistic|'
             r'std_lognormal|std_negative_binomial|std_normal|std_pareto|std_poisson|std_rayleigh|std_student_t|std_weibull|'
             r'stirling|stirling1|stirling2|strim|striml|strimr|string|stringout|'
             r'stringp|strong_components|sublis|sublist|sublist_indices|submatrix|subsample|subset|'
             r'subsetp|subst|substinpart|substpart|substring|subvar|subvarp|sum|'
             r'sumcontract|summand_to_rec|supcase|supcontext|symbolp|symmdifference|symmetricp|system|'
             r'take_channel|take_inference|tan|tanh|taylor|taylorinfo|taylorp|taylor_simplifier|'
             r'taytorat|tcl_output|tcontract|tellrat|tellsimp|tellsimpafter|tentex|tenth|'
             r'test_mean|test_means_difference|test_normality|test_rank_sum|test_sign|test_signed_rank|test_variance|test_variance_ratio|'
             r'tex|texput|%th|third|throw|time|timedate|timer|'
             r'timer_info|tldefint|tlimit|todd_coxeter|toeplitz|tokens|to_lisp|topological_sort|'
             r'totaldisrep|totalfourier|totient|tpartpol|trace|tracematrix|trace_options|translate|'
             r'translate_file|transpose|tree_reduce|treillis|treinat|triangularize|trigexpand|trigrat|'
             r'trigreduce|trigsimp|trunc|tr_warnings_get|ueivects|uforget|ultraspherical|underlying_graph|'
             r'undiff|union|unique|uniteigenvectors|unit_step|unitvector|unknown|unorder|'
             r'unsum|untellrat|untimer|untrace|uppercasep|uricci|uriemann|uvect|'
             r'vandermonde_matrix|var|var1|var_bernoulli|var_beta|var_binomial|var_chi2|var_continuous_uniform|'
             r'var_discrete_uniform|var_exp|var_f|var_gamma|var_geometric|var_gumbel|var_hypergeometric|var_laplace|'
             r'var_logistic|var_lognormal|var_negative_binomial|var_normal|var_pareto|var_poisson|var_rayleigh|var_student_t|'
             r'var_weibull|vectorpotential|vectorsimp|verbify|vers|vertex_coloring|vertex_degree|vertex_distance|'
             r'vertex_eccentricity|vertex_in_degree|vertex_out_degree|vertices|vertices_to_cycle|vertices_to_path|weyl|wheel_graph|'
             r'with_stdout|write_data|writefile|wronskian|xgraph_curves|xreduce|xthru|Zeilberger|'
             r'zeroequiv|zerofor|zeromatrix|zeromatrixp|zeta|zlange)\b',
             Keyword.Type),
            # Now the asy-type-name which are not asy-function-name
            # except yours !
            # Perhaps useless
            (r'(_|__|%|%%|absboxchar|activecontexts|additive|algebraic|algepsilon|'
             r'algexact|aliases|all_dotsimp_denoms|allbut|allsym|arrays|askexp|assume_pos|assume_pos_pred|'
             r'assumescalar|atomgrad|backsubst|berlefact|besselexpand|bftorat|bftrunc|boxchar|breakup|'
             r'cauchysum|cflength|cframe_flag|cnonmet_flag|context|contexts|cosnpiflag|ctaypov|ctaypt|'
             r'ctayswitch|ctayvar|ct_coords|ctorsion_flag|ctrgsimp|current_let_rule_package|debugmode|default_let_rule_package|demoivre|'
             r'dependencies|derivabbrev|derivsubst|detout|diagmetric|dim|dispflag|display2d|display_format_internal|'
             r'doallmxops|domain|domxexpt|domxmxops|domxnctimes|dontfactor|doscmxops|doscmxplus|dot0nscsimp|'
             r'dot0simp|dot1simp|dotassoc|dotconstrules|dotdistrib|dotexptsimp|dotident|dotscrules|draw_graph_program|'
             r'%edispflag|%emode|%enumer|epsilon_lp|erfflag|error|error_size|error_syms|%e_to_numlog|'
             r'evflag|evfun|expandwrt_denom|expon|exponentialize|expop|exptdispflag|exptisolate|exptsubst|'
             r'facexpand|factlim|factorflag|file_output_append|file_search_demo|file_search_lisp|file_search_maxima|find_root_abs|find_root_error|'
             r'find_root_rel|flipflag|float2bf|fortindent|fortspaces|fpprec|fpprintprec|functions|gammalim|'
             r'gdet|genindex|gensumnum|GGFCFMAX|GGFINFINITY|globalsolve|gradefs|grind|halfangles|'
             r'%iargs|ibase|icounter|idummyx|ieqnprint|iframe_bracket_form|igeowedge_flag|imetric|inchar|'
             r'infeval|inflag|infolists|in_netmath|integrate_use_rootsof|integration_constant|integration_constant_counter|intfaclim|isolate_wrt_times|'
             r'keepfloat|labels|letrat|let_rule_packages|lhospitallim|limsubst|linechar|linel|linenum|'
             r'linsolve_params|linsolvewarn|lispdisp|listarith|listconstvars|listdummyvars|lmxchar|loadprint|logabs|'
             r'logarc|logconcoeffp|logexpand|lognegint|lognumer|logsimp|m1pbranch|macroexpansion|maperror|'
             r'mapprint|matrix_element_add|matrix_element_mult|matrix_element_transpose|maxapplydepth|maxapplyheight|maxima_tempdir|maxima_userdir|maxnegex|'
             r'maxposex|maxpsifracdenom|maxpsifracnum|maxpsinegint|maxpsiposint|maxtayorder|method|mode_check_errorp|mode_checkp|'
             r'mode_check_warnp|modulus|multiplicities|myoptions|negdistrib|negsumdispflag|newtonepsilon|newtonmaxiter|niceindicespref|'
             r'nolabels|nonegative_lp|noundisp|obase|opproperties|opsubst|optimprefix|optionset|outchar|'
             r'packagefile|partswitch|pfeformat|%piargs|piece|plot_options|poislim|poly_coefficient_ring|poly_elimination_order|'
             r'poly_grobner_algorithm|poly_grobner_debug|poly_monomial_order|poly_primary_elimination_order|poly_return_term_list|poly_secondary_elimination_order|poly_top_reduction_only|powerdisp|prederror|'
             r'primep_number_of_tests|product_use_gamma|programmode|prompt|psexpand|radexpand|radsubstflag|random_beta_algorithm|random_binomial_algorithm|'
             r'random_chi2_algorithm|random_exp_algorithm|random_f_algorithm|random_gamma_algorithm|random_geometric_algorithm|random_hypergeometric_algorithm|random_negative_binomial_algorithm|random_normal_algorithm|random_poisson_algorithm|'
             r'random_student_t_algorithm|ratalgdenom|ratchristof|ratdenomdivide|rateinstein|ratepsilon|ratexpand|ratfac|ratmx|'
             r'ratprint|ratriemann|ratsimpexpons|ratvars|ratweights|ratweyl|ratwtlvl|realonly|refcheck|'
             r'rmxchar|%rnum_list|rombergabs|rombergit|rombergmin|rombergtol|rootsconmode|rootsepsilon|savedef|'
             r'savefactors|scalarmatrixp|setcheck|setcheckbreak|setval|showtime|simplify_products|simpsum|sinnpiflag|'
             r'solvedecomposes|solveexplicit|solvefactors|solve_inconsistent_error|solvenullwarn|solveradcan|solvetrigwarn|sparse|sqrtdispflag|'
             r'stardisp|stats_numer|stringdisp|sublis_apply_lambda|sumexpand|sumsplitfact|taylordepth|taylor_logexpand|taylor_order_coefficients|'
             r'taylor_truncate_polynomials|tensorkill|testsuite_files|timer_devalue|tlimswitch|transcompile|transrun|tr_array_as_ref|tr_bound_function_applyp|'
             r'tr_file_tty_messagesp|tr_float_can_branch_complex|tr_function_call_default|trigexpandplus|trigexpandtimes|triginverses|trigsign|tr_numer|tr_optimize_max_loop|'
             r'tr_semicompile|tr_state_vars|tr_warn_bad_function_calls|tr_warn_fexpr|tr_warn_meval|tr_warn_mode|tr_warn_undeclared|tr_warn_undefined_variable|tr_windy|'
             r'ttyoff|use_fast_arrays|values|vect_cross|verbose|zerobern|zeta%pi|'
             r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type),
            ('[a-zA-Z_]\w*:(?!:)', Name.Label),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r"'", String, '#pop'),
            (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'\n', String),
            (r"[^\\'\n]+", String),  # all other characters
            (r'\\\n', String),
            (r'\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
    }

    def get_tokens_unprocessed(self, text):
        from pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name and value in ASYFUNCNAME:
                token = Name.Function
            elif token is Name and value in ASYVARNAME:
                token = Name.Variable
            yield index, token, value
class CylcLexer(RegexLexer):
    """Pygments lexer for the Cylc language."""

    # Pygments tokens for flow.cylc elements which have no direct translation.
    HEADING_TOKEN = Name.Tag
    SETTING_TOKEN = Name.Variable
    GRAPH_TASK_TOKEN = Keyword.Declaration
    GRAPH_XTRIGGER_TOKEN = Keyword.Type
    PARAMETERISED_TASK_TOKEN = Name.Builtin
    EXTERNAL_SUITE_TOKEN = Name.Builtin.Pseudo
    INTERCYCLE_OFFSET_TOKEN = Name.Builtin

    EMPY_BLOCK_REGEX = (
        r'@\%(open)s('  # open empy block
        r'[^\%(open)s\%(close)s]+|'  # either not a close character
        r'\%(open)s([^\%(close)s]+)?\%(close)s)+'  # or permit 1 level nesting
        r'\%(close)s')  # close empy block

    # Pygments values.
    name = 'Cylc'
    aliases = ['cylc', 'suiterc']
    filenames = ['suite.rc', 'flow.cylc']
    # mimetypes = ['text/x-ini', 'text/inf']

    # Patterns, rules and tokens.
    tokens = {
        'root': [
            # Jinja2 opening braces:  {{  {%  {#
            include('preproc'),

            # Cylc comments:  # ...
            include('comment'),

            # Leading whitespace.
            (r'^[\s\t]+', Text),

            # Cylc headings:  [<heading>]
            (r'([\[]+)', HEADING_TOKEN, 'heading'),

            # Multi-line graph sections:  graph = """ ...
            (r'(graph)(\s+)?(=)([\s+])?(\"\"\")',
             bygroups(SETTING_TOKEN, Text, Operator, Text,
                      String.Double), 'multiline-graph'),

            # Inline graph sections:  graph = ...
            (r'(graph)(\s+)?(=)', bygroups(SETTING_TOKEN, String,
                                           Operator), 'inline-graph'),

            # Multi-line settings:  key = """ ...
            (r'([^=\n]+)(=)([\s+])?(\"\"\")',
             bygroups(SETTING_TOKEN, Operator, Text,
                      String.Double), 'multiline-setting'),

            # Inline settings:  key = ...
            (r'([^=\n]+)(=)', bygroups(SETTING_TOKEN, Operator), 'setting'),

            # Include files
            (r'(%include)( )(.*)', bygroups(Operator, Text, String)),

            # Arbitrary whitespace
            (r'\s', Text)
        ],
        'heading': [
            (r'[\]]+', HEADING_TOKEN, '#pop'),
            include('preproc'),
            include('parameterisation'),
            (r'(\\\n|.)', HEADING_TOKEN),  # Allow line continuation chars.
        ],

        # Cylc comments.
        'comment': [
            # Allow whitespace so this will work for comments following
            # headings.
            # NOTE: Does not highlight `${#`.
            (r'(\s+)?(?<!\$\{)(#.*)', bygroups(Text, Comment.Single))
        ],

        # The value in a key = value pair.
        'setting': [
            include('comment'),
            include('preproc'),
            (r'\\\n', String),
            (r'.', String),
        ],

        # The value in a key = """value""" pair.
        'multiline-setting': [(r'\"\"\"', String.Double, '#pop'),
                              include('comment'),
                              include('preproc'), (r'(\n|.)', String.Double)],

        # Graph strings:  foo => bar & baz
        'graph': [
            include('preproc'),
            include('comment'),
            include('inter-suite-trigger'),
            include('parameterisation'), (r'@\w+', GRAPH_XTRIGGER_TOKEN),
            (r'\w+', GRAPH_TASK_TOKEN), (r'\!\w+', Other), (r'\s', Text),
            (r'=>', Operator), (r'[\&\|]', Operator), (r'[\(\)]', Punctuation),
            (r'\[', Text, 'intercycle-offset'), (r'.', Comment)
        ],
        'inter-suite-trigger': [
            (
                r'(\<)'
                r'([^\>]+)'  # foreign suite
                r'(::)'
                r'([^\>]+)'  # foreign task
                r'(\>)',
                bygroups(Text, EXTERNAL_SUITE_TOKEN, Text,
                         PARAMETERISED_TASK_TOKEN, Text)),
        ],

        # Parameterised syntax:  <foo=1>
        'parameterisation': [
            (
                r'(\<)'  # Opening greater-than bracket.
                r'(\s?\w+\s?'  # Parameter name (permit whitespace).
                r'(?:[+-=]\s?\w+)?'  # [+-=] for selecting parameters.
                r'\s?'  # Permit whitespace.
                r'(?:'  # BEGIN optional extra parameter groups...
                r'(?:\s?,\s?\w+\s?'  # Comma separated parameters.
                r'(?:[+-=]\s?\w+)?'  # [+-=] for selecting parameters.
                r'\s?)'  # Permit whitespace.
                r'+)?'  # ...END optional extra parameter groups.
                r')(\>)',  # Closing lesser-than bracket.
                bygroups(Text, PARAMETERISED_TASK_TOKEN, Text)),
            (r'(\<)(.*)(\>)', bygroups(Text, Error, Text))
        ],

        # Task inter-cycle offset for graphing:  foo[-P1DT1M]
        # Legal formats: POINT, POINT[+-]OFFSET, OFFSET
        'intercycle-offset': [
            include('cycle-point'),
            include('integer-duration'),  # matches a subset of iso8601
            include('iso8601-duration'),
            (r'[\^\$]', INTERCYCLE_OFFSET_TOKEN),
            (r'\]', Text, '#pop')
        ],

        # generic Cylc cycle point:  2000
        'cycle-point': [
            # validating the cycle point as a regex [effectively] requires
            # knowledge of the cycling mode so is not *really* possible,
            # also validating iso8601 datetimes is horrible.
            (r'[\+\-]?[\d\:\-T]+(Z)?\b', INTERCYCLE_OFFSET_TOKEN)
        ],

        # An integer duration:  +P1
        'integer-duration':
        [(r'([+-])?P\d+(?![\w-])', INTERCYCLE_OFFSET_TOKEN)],

        # An ISO8601 duration:  +P1DT1H
        'iso8601-duration': [
            # Basic format.
            (
                r'([+-])?P'
                r'(?![\]\s])'  # Require something to follow.
                r'('

                # Weekly format (ISO8601-1:4.4.4.5):
                r'\d{1,2}W'
                r'|'  # OR

                # Extended Format (ISO8601-1:4.4.4.4):
                r'('
                r'\d{8}T\d{6}'
                r'|'
                r'\d{4}\-\d{2}\-\d{2}T\d{2}\:\d{2}\:\d{2}'
                r')'
                r'|'  # OR

                # Basic format (ISO8601-1:4.4.4.4):
                # ..Year
                r'(\d{1,4}Y)?'
                # ..Month
                r'(\d{1,2}M)?'
                # ..Day
                r'(\d{1,2}D)?'
                r'(T'
                # ..Hours.
                r'(\d{1,2}H)?'
                # ..Minutes.
                r'(\d{1,2}M)?'
                # ..Secconds.
                r'(\d{1,2}S)?'
                r')?'
                r')',
                INTERCYCLE_OFFSET_TOKEN),
        ],

        # Wrapper for multi-line graph strings.
        'multiline-graph': [
            (r'\"\"\"', String.Double, '#pop'),
            include('graph'),
        ],

        # Wrapper for inline graph strings.
        'inline-graph': [(r'\n', Text, '#pop'),
                         include('graph')],
        'empy': [
            (r'#![Ee]mpy', Comment.Hashbang),  # #!empy
            (r'@@', Text),  # @@
            # @[...]
            (EMPY_BLOCK_REGEX % {
                'open': '(',
                'close': ')'
            }, Comment.Preproc),
            # @{...}
            (EMPY_BLOCK_REGEX % {
                'open': '{',
                'close': '}'
            }, Comment.Preproc),
            # @(...)
            (EMPY_BLOCK_REGEX % {
                'open': '[',
                'close': ']'
            }, Comment.Preproc),
            (r'@empy\.[\w]+[^\n]+', Comment.Preproc),  # @empy...
            (r'(\s+)?@#.*', Comment.Multi),  # @# ...
            (r'@[\w.]+', Comment.Preproc)  # @...
        ],
        'jinja2': [
            (r'#![Jj]inja2', Comment.Hashbang),  # #!jinja2
            (r'\{\{((.|\n)+?)(?=\}\})\}\}', Comment.Preproc),  # {{...}}
            (r'\{\%((.|\n)+?)(?=\%\})\%\}', Comment.Preproc),  # {%...%}
            (r'\{\#((.|\n)+?)(?=\#\})\#\}', Comment.Multi),  # {#...#}
        ],
        'preproc': [include('empy'), include('jinja2')]
    }
Beispiel #22
0
class PEGLexer(RegexLexer):
    """Pygments Lexer for PEG grammar (.gram) files

    This lexer strips the following elements from the grammar:

        - Meta-tags
        - Variable assignments
        - Actions
        - Lookaheads
        - Rule types
        - Rule options
        - Rules named `invalid_*` or `incorrect_*`
    """

    name = "PEG"
    aliases = ["peg"]
    filenames = ["*.gram"]
    _name = r"([^\W\d]\w*)"
    _text_ws = r"(\s*)"

    tokens = {
        "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
        "lookaheads": [
            (r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
            (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
            (r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
            (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
        ],
        "metas": [
            (r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
            (r"^(@.*)$", bygroups(None)),
        ],
        "actions": [(r"{(.|\n)+?}", bygroups(None)),],
        "strings": [
            (r"'\w+?'", Keyword),
            (r'"\w+?"', Keyword),
            (r"'\W+?'", Text),
            (r'"\W+?"', Text),
        ],
<<<<<<< HEAD
        "variables": [(_name + _text_ws + r"(\[.*\])?" + _text_ws + "(=)", bygroups(None, None, None, None, None),),],
=======
        "variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
>>>>>>> 3.9
        "invalids": [
            (r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
            (r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
            (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
        ],
        "root": [
            include("invalids"),
            include("ws"),
            include("lookaheads"),
            include("metas"),
            include("actions"),
            include("strings"),
            include("variables"),
            (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
            (
                r"^\s*" + _name + r"\s*" + r"(\[.*\])?" + r"\s*" + r"(\(.+\))?" + r"\s*(:)",
                bygroups(Name.Function, None, None, Punctuation),
            ),
            (_name, Name.Function),
            (r"[\||\.|\+|\*|\?]", Operator),
            (r"{|}|\(|\)|\[|\]", Punctuation),
            (r".", Text),
        ],
    }
Beispiel #23
0
class ComponentPascalLexer(RegexLexer):
    """
    For `Component Pascal <http://www.oberon.ch/pdf/CP-Lang.pdf>`_ source code.

    .. versionadded:: 2.1
    """
    name = 'Component Pascal'
    aliases = ['componentpascal', 'cp']
    filenames = ['*.cp', '*.cps']
    mimetypes = ['text/x-component-pascal']

    flags = re.MULTILINE | re.DOTALL

    tokens = {
        'root': [
            include('whitespace'),
            include('comments'),
            include('punctuation'),
            include('numliterals'),
            include('strings'),
            include('operators'),
            include('builtins'),
            include('identifiers'),
        ],
        'whitespace': [
            (r'\n+', Text),  # blank lines
            (r'\s+', Text),  # whitespace
        ],
        'comments': [
            (r'\(\*([^$].*?)\*\)', Comment.Multiline),
            # TODO: nested comments (* (* ... *) ... (* ... *) *) not supported!
        ],
        'punctuation': [
            (r'[()\[\]{},.:;|]', Punctuation),
        ],
        'numliterals': [
            (r'[0-9A-F]+X\b', Number.Hex),  # char code
            (r'[0-9A-F]+[HL]\b', Number.Hex),  # hexadecimal number
            (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float),  # real number
            (r'[0-9]+\.[0-9]+', Number.Float),  # real number
            (r'[0-9]+', Number.Integer),  # decimal whole number
        ],
        'strings': [
            (r"'[^\n']*'", String),  # single quoted string
            (r'"[^\n"]*"', String),  # double quoted string
        ],
        'operators': [
            # Arithmetic Operators
            (r'[+-]', Operator),
            (r'[*/]', Operator),
            # Relational Operators
            (r'[=#<>]', Operator),
            # Dereferencing Operator
            (r'\^', Operator),
            # Logical AND Operator
            (r'&', Operator),
            # Logical NOT Operator
            (r'~', Operator),
            # Assignment Symbol
            (r':=', Operator),
            # Range Constructor
            (r'\.\.', Operator),
            (r'\$', Operator),
        ],
        'identifiers': [
            (r'([a-zA-Z_$][\w$]*)', Name),
        ],
        'builtins': [
            (words(('ANYPTR', 'ANYREC', 'BOOLEAN', 'BYTE', 'CHAR', 'INTEGER',
                    'LONGINT', 'REAL', 'SET', 'SHORTCHAR', 'SHORTINT',
                    'SHORTREAL'),
                   suffix=r'\b'), Keyword.Type),
            (words(
                ('ABS', 'ABSTRACT', 'ARRAY', 'ASH', 'ASSERT', 'BEGIN', 'BITS',
                 'BY', 'CAP', 'CASE', 'CHR', 'CLOSE', 'CONST', 'DEC', 'DIV',
                 'DO', 'ELSE', 'ELSIF', 'EMPTY', 'END', 'ENTIER', 'EXCL',
                 'EXIT', 'EXTENSIBLE', 'FOR', 'HALT', 'IF', 'IMPORT', 'IN',
                 'INC', 'INCL', 'IS', 'LEN', 'LIMITED', 'LONG', 'LOOP', 'MAX',
                 'MIN', 'MOD', 'MODULE', 'NEW', 'ODD', 'OF', 'OR', 'ORD',
                 'OUT', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
                 'SHORT', 'SHORTCHAR', 'SHORTINT', 'SIZE', 'THEN', 'TYPE',
                 'TO', 'UNTIL', 'VAR', 'WHILE', 'WITH'),
                suffix=r'\b'), Keyword.Reserved),
            (r'(TRUE|FALSE|NIL|INF)\b', Keyword.Constant),
        ]
    }

    def analyse_text(text):
        """The only other lexer using .cp is the C++ one, so we check if for
        a few common Pascal keywords here. Those are unfortunately quite
        common across various business languages as well."""
        result = 0
        if 'BEGIN' in text:
            result += 0.01
        if 'END' in text:
            result += 0.01
        if 'PROCEDURE' in text:
            result += 0.01
        if 'END' in text:
            result += 0.01

        return result
Beispiel #24
0
class VhdlLexer(RegexLexer):
    """
    For VHDL source code.

    .. versionadded:: 1.5
    """
    name = 'vhdl'
    aliases = ['vhdl']
    filenames = ['*.vhdl', '*.vhd']
    mimetypes = ['text/x-vhdl']
    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'--.*?$', Comment.Single),
            (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r"'[a-z_]\w*", Name.Attribute),
            (r'[()\[\],.;\']', Punctuation),
            (r'"[^\n\\"]*"', String),
            (r'(library)(\s+)([a-z_]\w*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)),
            (r'(use)(\s+)([a-z_][\w.]*\.)(all)',
             bygroups(Keyword, Text, Name.Namespace, Keyword)),
            (r'(use)(\s+)([a-z_][\w.]*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(std|ieee)(\.[a-z_]\w*)',
             bygroups(Name.Namespace, Name.Namespace)),
            (words(('std', 'ieee', 'work'), suffix=r'\b'), Name.Namespace),
            (r'(entity|component)(\s+)([a-z_]\w*)',
             bygroups(Keyword, Text, Name.Class)),
            (r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)'
             r'(of)(\s+)([a-z_]\w*)(\s+)(is)',
             bygroups(Keyword, Text, Name.Class, Text, Keyword, Text,
                      Name.Class, Text, Keyword)),
            (r'([a-z_]\w*)(:)(\s+)(process|for)',
             bygroups(Name.Class, Operator, Text, Keyword)),
            (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'),
            include('types'),
            include('keywords'),
            include('numbers'),
            (r'[a-z_]\w*', Name),
        ],
        'endblock': [
            include('keywords'),
            (r'[a-z_]\w*', Name.Class),
            (r'(\s+)', Text),
            (r';', Punctuation, '#pop'),
        ],
        'types': [
            (words(('boolean', 'bit', 'character', 'severity_level', 'integer',
                    'time', 'delay_length', 'natural', 'positive', 'string',
                    'bit_vector', 'file_open_kind', 'file_open_status',
                    'std_ulogic', 'std_ulogic_vector', 'std_logic',
                    'std_logic_vector', 'signed', 'unsigned'),
                   suffix=r'\b'), Keyword.Type),
        ],
        'keywords': [
            (words(
                ('abs', 'access', 'after', 'alias', 'all', 'and',
                 'architecture', 'array', 'assert', 'attribute', 'begin',
                 'block', 'body', 'buffer', 'bus', 'case', 'component',
                 'configuration', 'constant', 'disconnect', 'downto', 'else',
                 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function',
                 'generate', 'generic', 'group', 'guarded', 'if', 'impure',
                 'in', 'inertial', 'inout', 'is', 'label', 'library',
                 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new',
                 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or',
                 'others', 'out', 'package', 'port', 'postponed', 'procedure',
                 'process', 'pure', 'range', 'record', 'register', 'reject',
                 'rem', 'return', 'rol', 'ror', 'select', 'severity', 'signal',
                 'shared', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to',
                 'transport', 'type', 'units', 'until', 'use', 'variable',
                 'wait', 'when', 'while', 'with', 'xnor', 'xor'),
                suffix=r'\b'), Keyword),
        ],
        'numbers': [
            (r'\d{1,2}#[0-9a-f_]+#?', Number.Integer),
            (r'\d+', Number.Integer),
            (r'(\d+\.\d*|\.\d+|\d+)E[+-]?\d+', Number.Float),
            (r'X"[0-9a-f_]+"', Number.Hex),
            (r'O"[0-7_]+"', Number.Oct),
            (r'B"[01_]+"', Number.Bin),
        ],
    }
Beispiel #25
0
class FSharpLexer(RegexLexer):
    """
    For the F# language (version 3.0).

    AAAAACK Strings
    http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/spec.html#_Toc335818775

    .. versionadded:: 1.5
    """

    name = 'FSharp'
    aliases = ['fsharp']
    filenames = ['*.fs', '*.fsi']
    mimetypes = ['text/x-fsharp']

    keywords = [
        'abstract',
        'as',
        'assert',
        'base',
        'begin',
        'class',
        'default',
        'delegate',
        'do!',
        'do',
        'done',
        'downcast',
        'downto',
        'elif',
        'else',
        'end',
        'exception',
        'extern',
        'false',
        'finally',
        'for',
        'function',
        'fun',
        'global',
        'if',
        'inherit',
        'inline',
        'interface',
        'internal',
        'in',
        'lazy',
        'let!',
        'let',
        'match',
        'member',
        'module',
        'mutable',
        'namespace',
        'new',
        'null',
        'of',
        'open',
        'override',
        'private',
        'public',
        'rec',
        'return!',
        'return',
        'select',
        'static',
        'struct',
        'then',
        'to',
        'true',
        'try',
        'type',
        'upcast',
        'use!',
        'use',
        'val',
        'void',
        'when',
        'while',
        'with',
        'yield!',
        'yield',
    ]
    # Reserved words; cannot hurt to color them as keywords too.
    keywords += [
        'atomic',
        'break',
        'checked',
        'component',
        'const',
        'constraint',
        'constructor',
        'continue',
        'eager',
        'event',
        'external',
        'fixed',
        'functor',
        'include',
        'method',
        'mixin',
        'object',
        'parallel',
        'process',
        'protected',
        'pure',
        'sealed',
        'tailcall',
        'trait',
        'virtual',
        'volatile',
    ]
    keyopts = [
        '!=',
        '#',
        '&&',
        '&',
        '\(',
        '\)',
        '\*',
        '\+',
        ',',
        '-\.',
        '->',
        '-',
        '\.\.',
        '\.',
        '::',
        ':=',
        ':>',
        ':',
        ';;',
        ';',
        '<-',
        '<\]',
        '<',
        '>\]',
        '>',
        '\?\?',
        '\?',
        '\[<',
        '\[\|',
        '\[',
        '\]',
        '_',
        '`',
        '\{',
        '\|\]',
        '\|',
        '\}',
        '~',
        '<@@',
        '<@',
        '=',
        '@>',
        '@@>',
    ]

    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ['and', 'or', 'not']
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = [
        'sbyte',
        'byte',
        'char',
        'nativeint',
        'unativeint',
        'float32',
        'single',
        'float',
        'double',
        'int8',
        'uint8',
        'int16',
        'uint16',
        'int32',
        'uint32',
        'int64',
        'uint64',
        'decimal',
        'unit',
        'bool',
        'string',
        'list',
        'exn',
        'obj',
        'enum',
    ]

    # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or
    # http://fsharp.org/about/files/spec.pdf for reference.  Good luck.

    tokens = {
        'escape-sequence': [
            (r'\\[\\"\'ntbrafv]', String.Escape),
            (r'\\[0-9]{3}', String.Escape),
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\U[0-9a-fA-F]{8}', String.Escape),
        ],
        'root': [
            (r'\s+', Text),
            (r'\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name),
            (r'///.*?\n', String.Doc),
            (r'//.*?\n', Comment.Single),
            (r'\(\*(?!\))', Comment, 'comment'),
            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),
            (r'\b(open|module)(\s+)([\w.]+)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'\b(let!?)(\s+)(\w+)', bygroups(Keyword, Text, Name.Variable)),
            (r'\b(type)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class)),
            (r'\b(member|override)(\s+)(\w+)(\.)(\w+)',
             bygroups(Keyword, Text, Name, Punctuation, Name.Function)),
            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
            (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name),
            (r'(%s)' % '|'.join(keyopts), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
            (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n',
             Comment.Preproc),
            (r"[^\W\d][\w']*", Name),
            (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex),
            (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct),
            (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?', Number.Float),
            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element
            (r'@?"', String.Double, 'string'),
            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'dotted': [
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
            (r'[A-Z][\w\']*', Name, '#pop'),
            (r'[a-z_][\w\']*', Name, '#pop'),
            # e.g. dictionary index access
            default('#pop'),
        ],
        'comment': [
            (r'[^(*)@"]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            # comments cannot be closed within strings in comments
            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),
            (r'[(*)@]', Comment),
        ],
        'string': [
            (r'[^\\"]+', String),
            include('escape-sequence'),
            (r'\\\n', String),
            (r'\n', String),  # newlines are allowed in any string
            (r'"B?', String, '#pop'),
        ],
        'lstring': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'""', String),
            (r'"B?', String, '#pop'),
        ],
        'tqs': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'"""B?', String, '#pop'),
            (r'"', String),
        ],
    }
Beispiel #26
0
class RstLexer(RegexLexer):
    """
    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.

    .. versionadded:: 0.7

    Additional options accepted:

    `handlecodeblocks`
        Highlight the contents of ``.. sourcecode:: language``,
        ``.. code:: language`` and ``.. code-block:: language``
        directives with a lexer for the given language (default:
        ``True``).

        .. versionadded:: 0.8
    """
    name = 'reStructuredText'
    aliases = ['rst', 'rest', 'restructuredtext']
    filenames = ['*.rst', '*.rest']
    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
    flags = re.MULTILINE

    def _handle_sourcecode(self, match):
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item

    # from docutils.parsers.rst.states
    closers = u'\'")]}>\u2019\u201d\xbb!?'
    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
                         % (re.escape(unicode_delimiters),
                            re.escape(closers)))

    tokens = {
        'root': [
            # Heading with overline
            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
             r'(.+)(\n)(\1)(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading,
                      Text, Generic.Heading, Text)),
            # Plain heading
            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
            # Bulleted lists
            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered lists
            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered, but keep words at BOL from becoming lists
            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Line blocks
            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
             bygroups(Text, Operator, using(this, state='inline'))),
            # Sourcecode directives
            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
             _handle_sourcecode),
            # A directive
            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
                      using(this, state='inline'))),
            # A reference target
            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
            # A footnote/citation target
            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
            # A substitution def
            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
                      Punctuation, Text, using(this, state='inline'))),
            # Comments
            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
            # Field list
            (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
            (r'^( *)(:.*?:)([ \t]+)(.*?)$',
             bygroups(Text, Name.Class, Text, Name.Function)),
            # Definition list
            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
             bygroups(using(this, state='inline'), using(this, state='inline'))),
            # Code blocks
            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
             bygroups(String.Escape, Text, String, String, Text, String)),
            include('inline'),
        ],
        'inline': [
            (r'\\.', Text),  # escape
            (r'``', String, 'literal'),  # code
            (r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
             bygroups(String, String.Interpol, String)),
            (r'`.+?`__?', String),  # reference
            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
             bygroups(Name.Variable, Name.Attribute)),  # role
            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
            (r'\*.+?\*', Generic.Emph),  # Emphasis
            (r'\[.*?\]_', String),  # Footnote or citation
            (r'<.+?>', Name.Tag),   # Hyperlink
            (r'[^\\\n\[*`:]+', Text),
            (r'.', Text),
        ],
        'literal': [
            (r'[^`]+', String),
            (r'``' + end_string_suffix, String, '#pop'),
            (r'`', String),
        ]
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)

    def analyse_text(text):
        if text[:2] == '..' and text[2:3] != '.':
            return 0.3
        p1 = text.find("\n")
        p2 = text.find("\n", p1 + 1)
        if (p2 > -1 and              # has two lines
                p1 * 2 + 1 == p2 and     # they are the same length
                text[p1+1] in '-=' and   # the next line both starts and ends with
                text[p1+1] == text[p2-1]):  # ...a sufficiently high header
            return 0.5
Beispiel #27
0
class MapFileLexer(RegexLexer):
    name = 'mapfile'
    aliases = ['mapfile']
    filenames = ['*.map']

    flags = re.IGNORECASE
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'\[.*?\]', Name.Other),
            (r'[{}\[\]();,-.]+', Punctuation),
            (r'#.*', Comment),
            (r'(AND|OR|NOT|EQ|GT|LT|GE|LE|NE|IN|IEQ)\b', Operator.Word),
            (r'!=|==|<=|>=|=~|&&|\|\||[-~+/*%=<>&^|./\$]', Operator),
            ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
            ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
            (r'`([^`])*`', Number.Date),
            ('[uU]?"', String, combined('stringescape', 'dqs')),
            ("[uU]?'", String, combined('stringescape', 'sqs')),
#            (constants, Keyword.Constant),
#            (r"""[]{}:(),;[]""", Punctuation),
            # (r'(MAP)\b', Generic.Heading),
            (keywords, Keyword),
            (builtins, Name.Builtin),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'[0-9]+', Number.Integer)

        ],
        'dqs': [
            (r'"', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
            include('strings')
        ],
        'sqs': [
            (r"'", String, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
            include('strings')
        ],
        'tdqs': [
            (r'"""', String, '#pop'),
            include('strings'),
            include('nl')
        ],
        'tsqs': [
            (r"'''", String, '#pop'),
            include('strings'),
            include('nl')
        ],
        'strings': [
            (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
            (r'[^\\\'"%\n]+', String),
            # quotes, percents and backslashes must be parsed one at a time
            (r'[\'"\\]', String),
            # unhandled string formatting sign
            (r'%', String)
            # newlines are an error (use "nl" state)
        ],
        'nl': [
            (r'\n', String)
        ],
        'stringescape': [
            (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
        ]
    }
Beispiel #28
0
class OcamlLexer(RegexLexer):
    """
    For the OCaml language.

    *New in Pygments 0.7.*
    """

    name = 'OCaml'
    aliases = ['ocaml']
    filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
    mimetypes = ['text/x-ocaml']

    keywords = [
        'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
        'else', 'end', 'exception', 'external', 'false', 'for', 'fun',
        'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer',
        'lazy', 'let', 'match', 'method', 'module', 'mutable', 'new', 'object',
        'of', 'open', 'private', 'raise', 'rec', 'sig', 'struct', 'then', 'to',
        'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with'
    ]
    keyopts = [
        '!=', '#', '&', '&&', '\(', '\)', '\*', '\+', ',', '-', '-\.', '->',
        '\.', '\.\.', ':', '::', ':=', ':>', ';', ';;', '<', '<-', '=', '>',
        '>]', '>}', '\?', '\?\?', '\[', '\[<', '\[>', '\[\|', ']', '_', '`',
        '{', '{<', '\|', '\|]', '}', '~'
    ]

    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = [
        'unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array'
    ]

    tokens = {
        'escape-sequence': [
            (r'\\[\"\'ntbr]', String.Escape),
            (r'\\[0-9]{3}', String.Escape),
            (r'\\x[0-9a-fA-F]{2}', String.Escape),
        ],
        'root': [
            (r'\s+', Text),
            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
            (r'\(\*', Comment, 'comment'),
            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
            (r'(%s)' % '|'.join(keyopts), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
            (r"[^\W\d][\w']*", Name),
            (r'\d[\d_]*', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
            (r'0[oO][0-7][0-7_]*', Number.Oct),
            (r'0[bB][01][01_]*', Number.Binary),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element
            (r'"', String.Double, 'string'),
            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'comment': [
            (r'[^(*)]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            (r'[(*)]', Comment),
        ],
        'string': [
            (r'[^\\"]+', String.Double),
            include('escape-sequence'),
            (r'\\\n', String.Double),
            (r'"', String.Double, '#pop'),
        ],
        'dotted': [
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
            (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
            (r'[a-z][a-z0-9_\']*', Name, '#pop'),
        ],
    }
Beispiel #29
0
     (r'(and|or|not)', Operator.Word),
     (r'/\*', Comment.Multiline, 'inline-comment'),
     (r'//[^\n]*', Comment.Single),
     (r'\#[a-z0-9]{1,6}', Number.Hex),
     (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)),
     (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)),
     (r'#\{', String.Interpol, 'interpolation'),
     (r'[~^*!&%<>|+=@:,./?-]+', Operator),
     (r'[\[\]()]+', Punctuation),
     (r'"', String.Double, 'string-double'),
     (r"'", String.Single, 'string-single'),
     (r'[a-z_-][\w-]*', Name),
 ],
 'interpolation': [
     (r'\}', String.Interpol, '#pop'),
     include('value'),
 ],
 'selector': [
     (r'[ \t]+', Text),
     (r'\:', Name.Decorator, 'pseudo-class'),
     (r'\.', Name.Class, 'class'),
     (r'\#', Name.Namespace, 'id'),
     (r'[\w-]+', Name.Tag),
     (r'#\{', String.Interpol, 'interpolation'),
     (r'&', Keyword),
     (r'[~^*!&\[\]()<>|+=@:;,./?-]', Operator),
     (r'"', String.Double, 'string-double'),
     (r"'", String.Single, 'string-single'),
 ],
 'string-double': [
     (r'(\\.|#(?=[^\n{])|[^\n"#])+', String.Double),
Beispiel #30
0
     (r"\!(important|default)", Name.Exception),
     (r"(true|false)", Name.Pseudo),
     (r"(and|or|not)", Operator.Word),
     (r"/\*", Comment.Multiline, "inline-comment"),
     (r"//[^\n]*", Comment.Single),
     (r"\#[a-z0-9]{1,6}", Number.Hex),
     (r"(-?\d+)(\%|[a-z]+)?", bygroups(Number.Integer, Keyword.Type)),
     (r"(-?\d*\.\d+)(\%|[a-z]+)?", bygroups(Number.Float, Keyword.Type)),
     (r"#\{", String.Interpol, "interpolation"),
     (r"[~^*!&%<>|+=@:,./?-]+", Operator),
     (r"[\[\]()]+", Punctuation),
     (r'"', String.Double, "string-double"),
     (r"'", String.Single, "string-single"),
     (r"[a-z_-][\w-]*", Name),
 ],
 "interpolation": [(r"\}", String.Interpol, "#pop"), include("value")],
 "selector": [
     (r"[ \t]+", Text),
     (r"\:", Name.Decorator, "pseudo-class"),
     (r"\.", Name.Class, "class"),
     (r"\#", Name.Namespace, "id"),
     (r"[\w-]+", Name.Tag),
     (r"#\{", String.Interpol, "interpolation"),
     (r"&", Keyword),
     (r"[~^*!&\[\]()<>|+=@:;,./?-]", Operator),
     (r'"', String.Double, "string-double"),
     (r"'", String.Single, "string-single"),
 ],
 "string-double": [
     (r'(\\.|#(?=[^\n{])|[^\n"#])+', String.Double),
     (r"#\{", String.Interpol, "interpolation"),
Beispiel #31
0
 def _make_begin_state(compound, _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl, _punct=_punct, _string=_string,
                       _space=_space, _start_label=_start_label,
                       _stoken=_stoken, _token_terminator=_token_terminator,
                       _variable=_variable, _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix),
         (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'),
         (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'), Keyword),
          'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation),
          'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Beispiel #32
0
class RagelLexer(RegexLexer):
    """
    A pure `Ragel <http://www.complang.org/ragel/>`_ lexer.  Use this for
    fragments of Ragel.  For ``.rl`` files, use RagelEmbeddedLexer instead
    (or one of the language-specific subclasses).

    .. versionadded:: 1.1
    """

    name = 'Ragel'
    aliases = ['ragel']
    filenames = []

    tokens = {
        'whitespace': [(r'\s+', Whitespace)],
        'comments': [
            (r'\#.*$', Comment),
        ],
        'keywords':
        [(r'(access|action|alphtype)\b', Keyword),
         (r'(getkey|write|machine|include)\b', Keyword),
         (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
         (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)],
        'numbers': [
            (r'0x[0-9A-Fa-f]+', Number.Hex),
            (r'[+-]?[0-9]+', Number.Integer),
        ],
        'literals': [
            (r'"(\\\\|\\"|[^"])*"', String),  # double quote string
            (r"'(\\\\|\\'|[^'])*'", String),  # single quote string
            (r'\[(\\\\|\\\]|[^\]])*\]', String),  # square bracket literals
            (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex),  # regular expressions
        ],
        'identifiers': [
            (r'[a-zA-Z_]\w*', Name.Variable),
        ],
        'operators': [
            (r',', Operator),  # Join
            (r'\||&|--?', Operator),  # Union, Intersection and Subtraction
            (r'\.|<:|:>>?', Operator),  # Concatention
            (r':', Operator),  # Label
            (r'->', Operator),  # Epsilon Transition
            (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator),  # EOF Actions
            (r'(>|\$|%|<|@|<>)(!|err\b)', Operator),  # Global Error Actions
            (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator),  # Local Error Actions
            (r'(>|\$|%|<|@|<>)(~|to\b)', Operator),  # To-State Actions
            (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator),  # From-State Actions
            (r'>|@|\$|%', Operator),  # Transition Actions and Priorities
            (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator),  # Repetition
            (r'!|\^', Operator),  # Negation
            (r'\(|\)', Operator),  # Grouping
        ],
        'root': [
            include('literals'),
            include('whitespace'),
            include('comments'),
            include('keywords'),
            include('numbers'),
            include('identifiers'),
            include('operators'),
            (r'\{', Punctuation, 'host'),
            (r'=', Operator),
            (r';', Punctuation),
        ],
        'host': [
            (
                r'(' + r'|'.join((  # keep host code in largest possible chunks
                    r'[^{}\'"/#]+',  # exclude unsafe characters
                    r'[^\\]\\[{}]',  # allow escaped { or }

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'//.*$\n?',  # single line comment
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'\#.*$\n?',  # ruby comment

                    # regular expression: There's no reason for it to start
                    # with a * and this stops confusion with comments.
                    r'/(?!\*)(\\\\|\\/|[^/])*/',

                    # / is safe now that we've handled regex and javadoc comments
                    r'/',
                )) + r')+',
                Other),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
    }
Beispiel #33
0
        (r'/\*', Comment.Multiline, 'inline-comment'),
        (r'//[^\n]*', Comment.Single),
        (r'\#[a-z0-9]{1,6}', Number.Hex),
        (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)),
        (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)),
        (r'#\{', String.Interpol, 'interpolation'),
        (r'[~^*!&%<>|+=@:,./?-]+', Operator),
        (r'[\[\]()]+', Punctuation),
        (r'"', String.Double, 'string-double'),
        (r"'", String.Single, 'string-single'),
        (r'[a-z_-][\w-]*', Name),
    ],

    'interpolation': [
        (r'\}', String.Interpol, '#pop'),
        include('value'),
    ],

    'selector': [
        (r'[ \t]+', Text),
        (r'\:', Name.Decorator, 'pseudo-class'),
        (r'\.', Name.Class, 'class'),
        (r'\#', Name.Namespace, 'id'),
        (r'[\w-]+', Name.Tag),
        (r'#\{', String.Interpol, 'interpolation'),
        (r'&', Keyword),
        (r'[~^*!&\[\]()<>|+=@:;,./?-]', Operator),
        (r'"', String.Double, 'string-double'),
        (r"'", String.Single, 'string-single'),
    ],
Beispiel #34
0
class CssLexer(RegexLexer):
    """
    For CSS (Cascading Style Sheets).
    """

    name = 'CSS'
    aliases = ['css']
    filenames = ['*.css']
    mimetypes = ['text/css']

    tokens = {
        'root': [
            include('basics'),
        ],
        'basics': [(r'\s+', Text), (r'/\*(?:.|\n)*?\*/', Comment),
                   (r'\{', Punctuation, 'content'),
                   (r'(\:{1,2})([\w-]+)', bygroups(Punctuation,
                                                   Name.Decorator)),
                   (r'(\.)([\w-]+)', bygroups(Punctuation, Name.Class)),
                   (r'(\#)([\w-]+)', bygroups(Punctuation, Name.Namespace)),
                   (r'(@)([\w-]+)', bygroups(Punctuation, Keyword), 'atrule'),
                   (r'[\w-]+', Name.Tag),
                   (r'[~^*!%&$\[\]()<>|+=@:;,./?-]', Operator),
                   (r'"(\\\\|\\"|[^"])*"', String.Double),
                   (r"'(\\\\|\\'|[^'])*'", String.Single)],
        'atrule': [
            (r'\{', Punctuation, 'atcontent'),
            (r';', Punctuation, '#pop'),
            include('basics'),
        ],
        'atcontent': [
            include('basics'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'content': [
            (r'\s+', Text),
            (r'\}', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'^@.*?$', Comment.Preproc),
            (words(_vendor_prefixes, ), Keyword.Pseudo),
            (r'(' + r'|'.join(_css_properties) + r')(\s*)(\:)',
             bygroups(Keyword, Text, Punctuation), 'value-start'),
            (r'([a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name, Text, Punctuation),
             'value-start'),
            (r'/\*(?:.|\n)*?\*/', Comment),
        ],
        'value-start': [
            (r'\s+', Text),
            (words(_vendor_prefixes, ), Name.Builtin.Pseudo),
            include('urls'),
            (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation),
             'function-start'),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),
            # for transition-property etc.
            (words(_css_properties, suffix=r'\b'), Keyword),
            (r'\!important', Comment.Preproc),
            (r'/\*(?:.|\n)*?\*/', Comment),
            include('numeric-values'),
            (r'[~^*!%&<>|+=@:./?-]+', Operator),
            (r'[\[\](),]+', Punctuation),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[a-zA-Z_][\w-]*', Name),
            (r';', Punctuation, '#pop'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'function-start': [
            (r'\s+', Text),
            include('urls'),
            (words(_vendor_prefixes, ), Keyword.Pseudo),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),

            # function-start may be entered recursively
            (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation),
             'function-start'),
            (r'/\*(?:.|\n)*?\*/', Comment),
            include('numeric-values'),
            (r'[*+/-]', Operator),
            (r'[,]', Punctuation),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[a-zA-Z_-]\w*', Name),
            (r'\)', Punctuation, '#pop'),
        ],
        'urls': [
            (r'(url)(\()(".*?")(\))',
             bygroups(Name.Builtin, Punctuation, String.Double, Punctuation)),
            (r"(url)(\()('.*?')(\))",
             bygroups(Name.Builtin, Punctuation, String.Single, Punctuation)),
            (r'(url)(\()(.*?)(\))',
             bygroups(Name.Builtin, Punctuation, String.Other, Punctuation)),
        ],
        'numeric-values': [
            (r'\#[a-zA-Z0-9]{1,6}', Number.Hex),
            (r'[+\-]?[0-9]*[.][0-9]+', Number.Float, 'numeric-end'),
            (r'[+\-]?[0-9]+', Number.Integer, 'numeric-end'),
        ],
        'numeric-end': [
            (words(_all_units, suffix=r'\b'), Keyword.Type),
            (r'%', Keyword.Type),
            default('#pop'),
        ],
    }
Beispiel #35
0
class TreetopBaseLexer(RegexLexer):
    """
    A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
    Not for direct use; use TreetopLexer instead.

    .. versionadded:: 1.6
    """

    tokens = {
        'root': [
            include('space'),
            (r'require[ \t]+[^\n\r]+[\n\r]', Other),
            (r'module\b', Keyword.Namespace, 'module'),
            (r'grammar\b', Keyword, 'grammar'),
        ],
        'module': [
            include('space'),
            include('end'),
            (r'module\b', Keyword, '#push'),
            (r'grammar\b', Keyword, 'grammar'),
            (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
        ],
        'grammar': [
            include('space'),
            include('end'),
            (r'rule\b', Keyword, 'rule'),
            (r'include\b', Keyword, 'include'),
            (r'[A-Z]\w*', Name),
        ],
        'include': [
            include('space'),
            (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
        ],
        'rule': [
            include('space'),
            include('end'),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
            (r'[A-Za-z_]\w*', Name),
            (r'[()]', Punctuation),
            (r'[?+*/&!~]', Operator),
            (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
            (r'([0-9]*)(\.\.)([0-9]*)',
             bygroups(Number.Integer, Operator, Number.Integer)),
            (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
            (r'\{', Punctuation, 'inline_module'),
            (r'\.', String.Regex),
        ],
        'inline_module': [
            (r'\{', Other, 'ruby'),
            (r'\}', Punctuation, '#pop'),
            (r'[^{}]+', Other),
        ],
        'ruby': [
            (r'\{', Other, '#push'),
            (r'\}', Other, '#pop'),
            (r'[^{}]+', Other),
        ],
        'space': [
            (r'[ \t\n\r]+', Whitespace),
            (r'#[^\n]*', Comment.Single),
        ],
        'end': [
            (r'end\b', Keyword, '#pop'),
        ],
    }
Beispiel #36
0
class CypherLexer(RegexLexer):
    """
    For `Cypher Query Language
    <https://neo4j.com/docs/developer-manual/3.3/cypher/>`_

    For the Cypher version in Neo4j 3.3

    .. versionadded:: 2.0
    """
    name = 'Cypher'
    aliases = ['cypher']
    filenames = ['*.cyp', '*.cypher']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            include('comment'),
            include('keywords'),
            include('clauses'),
            include('relations'),
            include('strings'),
            include('whitespace'),
            include('barewords'),
        ],
        'comment': [
            (r'^.*//.*\n', Comment.Single),
        ],
        'keywords': [
            (r'(create|order|match|limit|set|skip|start|return|with|where|'
             r'delete|foreach|not|by|true|false)\b', Keyword),
        ],
        'clauses': [
            # based on https://neo4j.com/docs/cypher-refcard/3.3/
            (r'(all|any|as|asc|ascending|assert|call|case|create|'
             r'create\s+index|create\s+unique|delete|desc|descending|'
             r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|'
             r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|'
             r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|'
             r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|'
             r'remove|return|set|skip|single|start|starts\s+with|then|union|'
             r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|'
             r'with)\b', Keyword),
        ],
        'relations': [
            (r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)),
            (r'(<-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'(-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'-->|<--|\[|\]', Operator),
            (r'<|>|<>|=|<=|=>|\(|\)|\||:|,|;', Punctuation),
            (r'[.*{}]', Punctuation),
        ],
        'strings': [
            (r'"(?:\\[tbnrf\'"\\]|[^\\"])*"', String),
            (r'`(?:``|[^`])+`', Name.Variable),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'barewords': [
            (r'[a-z]\w*', Name),
            (r'\d+', Number),
        ],
    }
Beispiel #37
0
class NedLexer(RegexLexer):
    name = 'ned'
    filenames = ['*.ned']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(true|false)\b', Name.Builtin),
            (r'(<-->|-->|<--|\.\.)', Keyword),
            (r'(bool|double|int|xml)\b', Keyword.Type),
            (r'(inout|input|output)\b', Keyword.Type),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(("channel", "channelinterface", "simple", "module",
                    "network", "moduleinterface"),
                   suffix=r'\b'), Keyword),
            (words(
                ("parameters", "gates", "types", "submodules", "connections"),
                suffix=r'\b'), Keyword),
            (words(("volatile", "allowunconnected", "extends", "for", "if",
                    "import", "like", "package", "property"),
                   suffix=r'\b'), Keyword),
            (words(("sizeof", "const", "default", "ask", "this", "index",
                    "typename", "xmldoc"),
                   suffix=r'\b'), Keyword),
            (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta",
                    "binomial", "cauchy", "ceil", "chi_square", "cos",
                    "erlang_k", "exp", "exponential", "fabs", "floor", "fmod",
                    "gamma_d", "genk_exponential", "genk_intuniform",
                    "genk_normal", "genk_truncnormal", "genk_uniform",
                    "geometric", "hypergeometric", "hypot", "intuniform",
                    "log", "log10", "lognormal", "max", "min", "negbinomial",
                    "normal", "pareto_shifted", "poisson", "pow", "simTime",
                    "sin", "sqrt", "student_t", "tan", "triang", "truncnormal",
                    "uniform", "weibull", "xml", "xmldoc"),
                   suffix=r'\b'), Name.Builtin),
            ('@[a-zA-Z_]\w*', Name.Builtin),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ]
    }
Beispiel #38
0
    def gen_rubystrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[mixounse]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            # easy ones
            (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
             r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r':"', String.Symbol, 'simple-sym'),
            (r'([a-zA-Z_]\w*)(:)(?!:)',
             bygroups(String.Symbol, Punctuation)),  # Since Ruby 1.9
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-'+name] = [
                include('string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
                                    ('\\[', '\\]', 'sb'), \
                                    ('\\(', '\\)', 'pa'), \
                                    ('<', '>', 'ab'):
            states[name+'-intp-string'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
            ]
            states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
                                      name+'-intp-string'))
            states[name+'-string'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
            ]
            states['strings'].append((r'%[qsw]' + lbrace, String.Other,
                                      name+'-string'))
            states[name+'-regex'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
                (r'(?<!\\)' + lbrace, String.Regex, '#push'),
                (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + lbrace + rbrace + ']', String.Regex),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
            ]
            states['strings'].append((r'%r' + lbrace, String.Regex,
                                      name+'-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
             intp_regex_callback),
            # regular fancy strings with qsw
            (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other),
            (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
        ]

        return states
Beispiel #39
0
class QVToLexer(RegexLexer):
    """
    For the `QVT Operational Mapping language <http://www.omg.org/spec/QVT/1.1/>`_.

    Reference for implementing this: «Meta Object Facility (MOF) 2.0
    Query/View/Transformation Specification», Version 1.1 - January 2011
    (http://www.omg.org/spec/QVT/1.1/), see §8.4, «Concrete Syntax» in
    particular.

    Notable tokens assignments:

    - Name.Class is assigned to the identifier following any of the following
      keywords: metamodel, class, exception, primitive, enum, transformation
      or library

    - Name.Function is assigned to the names of mappings and queries

    - Name.Builtin.Pseudo is assigned to the pre-defined variables 'this',
      'self' and 'result'.
    """
    # With obvious borrowings & inspiration from the Java, Python and C lexers

    name = 'QVTO'
    aliases = ['qvto', 'qvt']
    filenames = ['*.qvto']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'[^\S\n]+', Text),
            (r'(--|//)(\s*)(directive:)?(.*)$',
             bygroups(Comment, Comment, Comment.Preproc, Comment)),
            # Uncomment the following if you want to distinguish between
            # '/*' and '/**', à la javadoc
            # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'\\\n', Text),
            (r'(and|not|or|xor|##?)\b', Operator.Word),
            (r'(:{1,2}=|[-+]=)\b', Operator.Word),
            (r'(@|<<|>>)\b', Keyword),  # stereotypes
            (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator),
            (r'[]{}:(),;[]', Punctuation),
            (r'(true|false|unlimited|null)\b', Keyword.Constant),
            (r'(this|self|result)\b', Name.Builtin.Pseudo),
            (r'(var)\b', Keyword.Declaration),
            (r'(from|import)\b', Keyword.Namespace, 'fromimport'),
            (r'(metamodel|class|exception|primitive|enum|transformation|'
             r'library)(\s+)(\w+)',
             bygroups(Keyword.Word, Text, Name.Class)),
            (r'(exception)(\s+)(\w+)',
             bygroups(Keyword.Word, Text, Name.Exception)),
            (r'(main)\b', Name.Function),
            (r'(mapping|helper|query)(\s+)',
             bygroups(Keyword.Declaration, Text), 'operation'),
            (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'),
            (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b',
             Keyword.Type),
            include('keywords'),
            ('"', String, combined('stringescape', 'dqs')),
            ("'", String, combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
            # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)',
            # bygroups(Text, Text, Text)),
        ],

        'fromimport': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            default('#pop'),
        ],

        'operation': [
            (r'::', Text),
            (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()',
             bygroups(Text, Name.Function, Text, Punctuation), '#pop')
        ],

        'assert': [
            (r'(warning|error|fatal)\b', Keyword, '#pop'),
            default('#pop'),  # all else: go back
        ],

        'keywords': [
            (words((
                'abstract', 'access', 'any', 'assert', 'blackbox', 'break',
                'case', 'collect', 'collectNested', 'collectOne', 'collectselect',
                'collectselectOne', 'composes', 'compute', 'configuration',
                'constructor', 'continue', 'datatype', 'default', 'derived',
                'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except',
                'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if',
                'implies', 'in', 'inherits', 'init', 'inout', 'intermediate',
                'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn',
                'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map',
                'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out',
                'package', 'population', 'property', 'raise', 'readonly',
                'references', 'refines', 'reject', 'resolve', 'resolveIn',
                'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne',
                'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef',
                'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect',
                'xmap', 'xselect'), suffix=r'\b'), Keyword),
        ],

        # There is no need to distinguish between String.Single and
        # String.Double: 'strings' is factorised for 'dqs' and 'sqs'
        'strings': [
            (r'[^\\\'"\n]+', String),
            # quotes, percents and backslashes must be parsed one at a time
            (r'[\'"\\]', String),
        ],
        'stringescape': [
            (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape)
        ],
        'dqs': [  # double-quoted string
            (r'"', String, '#pop'),
            (r'\\\\|\\"', String.Escape),
            include('strings')
        ],
        'sqs': [  # single-quoted string
            (r"'", String, '#pop'),
            (r"\\\\|\\'", String.Escape),
            include('strings')
        ],
        'name': [
            (r'[a-zA-Z_]\w*', Name),
        ],
        # numbers: excerpt taken from the python lexer
        'numbers': [
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
            (r'\d+[eE][+-]?[0-9]+', Number.Float),
            (r'\d+', Number.Integer)
        ],
    }
Beispiel #40
0
    def gen_crystalstrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ["interpolated-regex"])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[imsx]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ["interpolated-string"])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states["strings"] = [
            (r"\:@{0,2}[a-zA-Z_]\w*[!?]?", String.Symbol),
            (words(CRYSTAL_OPERATORS, prefix=r"\:@{0,2}"), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            # This allows arbitrary text after '\ for simplicity
            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
            (r':"', String.Symbol, "simple-sym"),
            # Crystal doesn't have "symbol:"s but this simplifies function args
            (r"([a-zA-Z_]\w*)(:)(?!:)", bygroups(String.Symbol, Punctuation)),
            (r'"', String.Double, "simple-string"),
            (r"(?<!\.)`", String.Backtick, "simple-backtick"),
        ]

        # double-quoted string and symbol
        for name, ttype, end in (
            ("string", String.Double, '"'),
            ("sym", String.Symbol, '"'),
            ("backtick", String.Backtick, "`"),
        ):
            states["simple-" + name] = [
                include("string-escaped" if name == "sym" else "string-intp-escaped"),
                (r"[^\\%s#]+" % end, ttype),
                (r"[\\#]", ttype),
                (end, ttype, "#pop"),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in (
            ("\\{", "\\}", "{}", "cb"),
            ("\\[", "\\]", "\\[\\]", "sb"),
            ("\\(", "\\)", "()", "pa"),
            ("<", ">", "<>", "ab"),
        ):
            states[name + "-intp-string"] = [
                (r"\\[" + lbrace + "]", String.Other),
                (lbrace, String.Other, "#push"),
                (rbrace, String.Other, "#pop"),
                include("string-intp-escaped"),
                (r"[\\#" + bracecc + "]", String.Other),
                (r"[^\\#" + bracecc + "]+", String.Other),
            ]
            states["strings"].append((r"%" + lbrace, String.Other, name + "-intp-string"))
            states[name + "-string"] = [
                (r"\\[\\" + bracecc + "]", String.Other),
                (lbrace, String.Other, "#push"),
                (rbrace, String.Other, "#pop"),
                (r"[\\#" + bracecc + "]", String.Other),
                (r"[^\\#" + bracecc + "]+", String.Other),
            ]
            # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
            states["strings"].append((r"%[wi]" + lbrace, String.Other, name + "-string"))
            states[name + "-regex"] = [
                (r"\\[\\" + bracecc + "]", String.Regex),
                (lbrace, String.Regex, "#push"),
                (rbrace + "[imsx]*", String.Regex, "#pop"),
                include("string-intp"),
                (r"[\\#" + bracecc + "]", String.Regex),
                (r"[^\\#" + bracecc + "]+", String.Regex),
            ]
            states["strings"].append((r"%r" + lbrace, String.Regex, name + "-regex"))

        # these must come after %<brace>!
        states["strings"] += [
            # %r regex
            (r"(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)", intp_regex_callback),
            # regular fancy strings with qsw
            (r"(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r"(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r"^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r"(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback),
        ]

        return states
Beispiel #41
0
class FishShellLexer(RegexLexer):
    """
    Lexer for Fish shell scripts.

    .. versionadded:: 2.1
    """

    name = 'Fish'
    aliases = ['fish', 'fishshell']
    filenames = ['*.fish', '*.load']
    mimetypes = ['application/x-fish']

    tokens = {
        'root': [
            include('basic'),
            include('data'),
            include('interp'),
        ],
        'interp': [
            (r'\$\(\(', Keyword, 'math'),
            (r'\(', Keyword, 'paren'),
            (r'\$#?(\w+|.)', Name.Variable),
        ],
        'basic': [
            (r'\b(begin|end|if|else|while|break|for|in|return|function|block|'
             r'case|continue|switch|not|and|or|set|echo|exit|pwd|true|false|'
             r'cd|count|test)(\s*)\b', bygroups(Keyword, Text)),
            (r'\b(alias|bg|bind|breakpoint|builtin|command|commandline|'
             r'complete|contains|dirh|dirs|emit|eval|exec|fg|fish|fish_config|'
             r'fish_indent|fish_pager|fish_prompt|fish_right_prompt|'
             r'fish_update_completions|fishd|funced|funcsave|functions|help|'
             r'history|isatty|jobs|math|mimedb|nextd|open|popd|prevd|psub|'
             r'pushd|random|read|set_color|source|status|trap|type|ulimit|'
             r'umask|vared|fc|getopts|hash|kill|printf|time|wait)\s*\b(?!\.)',
             Name.Builtin),
            (r'#.*\n', Comment),
            (r'\\[\w\W]', String.Escape),
            (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)),
            (r'[\[\]()=]', Operator),
            (r'<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2', String),
        ],
        'data': [
            (r'(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\$])*"', String.Double),
            (r'"', String.Double, 'string'),
            (r"(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
            (r"(?s)'.*?'", String.Single),
            (r';', Punctuation),
            (r'&|\||\^|<|>', Operator),
            (r'\s+', Text),
            (r'\d+(?= |\Z)', Number),
            (r'[^=\s\[\]{}()$"\'`\\<&|;]+', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\$])+', String.Double),
            include('interp'),
        ],
        'paren': [
            (r'\)', Keyword, '#pop'),
            include('root'),
        ],
        'math': [
            (r'\)\)', Keyword, '#pop'),
            (r'[-+*/%^|&]|\*\*|\|\|', Operator),
            (r'\d+#\d+', Number),
            (r'\d+#(?! )', Number),
            (r'\d+', Number),
            include('root'),
        ],
    }