Beispiel #1
0
class ScdocLexer(RegexLexer):
    """
    `scdoc` is a simple man page generator for POSIX systems written in C99.
    https://git.sr.ht/~sircmpwn/scdoc

    .. versionadded:: 2.5
    """
    name = 'scdoc'
    aliases = ['scdoc', 'scd']
    filenames = ['*.scd', '*.scdoc']
    flags = re.MULTILINE

    tokens = {
        'root': [
            # comment
            (r'^(;.+\n)', bygroups(Comment)),

            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)(\.+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # underlines
            (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)),
            # bold
            (r'(\s)(\*[^*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def analyse_text(text):
        """This is very similar to markdown, save for the escape characters
        needed for * and _."""
        result = 0

        if '\\*' in text:
            result += 0.01

        if '\\_' in text:
            result += 0.01

        return result
Beispiel #2
0
class BaseMakefileLexer(RegexLexer):
    """
    Lexer for simple Makefiles (no preprocessing).

    .. versionadded:: 0.10
    """

    name = 'Base Makefile'
    aliases = ['basemake']
    filenames = []
    mimetypes = []

    tokens = {
        'root': [
            # recipes (need to allow spaces because of expandtabs)
            (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
            # special variables
            (r'\$[<@$+%?|*]', Keyword),
            (r'\s+', Text),
            (r'#.*?\n', Comment),
            (r'(export)(\s+)(?=[\w${}\t -]+\n)', bygroups(Keyword,
                                                          Text), 'export'),
            (r'export\s+', Keyword),
            # assignment
            (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)',
             bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
            # strings
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
            # targets
            (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator,
                                                Text), 'block-header'),
            # expansions
            (r'\$\(', Keyword, 'expansion'),
        ],
        'expansion': [
            (r'[^\w$().-]+', Text),
            (r'[\w.-]+', Name.Variable),
            (r'\$', Keyword),
            (r'\(', Keyword, '#push'),
            (r'\)', Keyword, '#pop'),
        ],
        'export': [
            (r'[\w${}-]+', Name.Variable),
            (r'\n', Text, '#pop'),
            (r'\s+', Text),
        ],
        'block-header': [
            (r'[,|]', Punctuation),
            (r'#.*?\n', Comment, '#pop'),
            (r'\\\n', Text),  # line continuation
            (r'\$\(', Keyword, 'expansion'),
            (r'[a-zA-Z_]+', Name),
            (r'\n', Text, '#pop'),
            (r'.', Text),
        ],
    }
Beispiel #3
0
class PythonTracebackLexer(RegexLexer):
    """
    For Python tracebacks.

    .. versionadded:: 0.7
    """

    name = 'Python Traceback'
    aliases = ['pytb']
    filenames = ['*.pytb']
    mimetypes = ['text/x-python-traceback']

    tokens = {
        'root': [
            # Cover both (most recent call last) and (innermost last)
            # The optional ^C allows us to catch keyboard interrupt signals.
            (r'^(\^C)?(Traceback.*\n)', bygroups(Text,
                                                 Generic.Traceback), 'intb'),
            # SyntaxError starts with this.
            (r'^(?=  File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
            (r'^.*\n', Other),
        ],
        'intb': [
            (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
             bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
            (r'^(  File )("[^"]+")(, line )(\d+)(\n)',
             bygroups(Text, Name.Builtin, Text, Number, Text)),
            (r'^(    )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)),
            (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment,
                                                Text)),  # for doctests...
            (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name,
                                               Text), '#pop'),
            (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop')
        ],
    }
Beispiel #4
0
class GenericAspxLexer(RegexLexer):
    """
    Lexer for ASP.NET pages.
    """

    name = 'aspx-gen'
    filenames = []
    mimetypes = []

    flags = re.DOTALL

    tokens = {
        'root': [
            (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)),
            (r'(<script.*?>)(.*?)(</script>)',
             bygroups(using(XmlLexer), Other, using(XmlLexer))),
            (r'(.+?)(?=<)', using(XmlLexer)),
            (r'.+', using(XmlLexer)),
        ],
    }
Beispiel #5
0
class CsoundDocumentLexer(RegexLexer):
    """
    For `Csound <https://csound.com>`_ documents.

    .. versionadded:: 2.1
    """

    name = 'Csound Document'
    aliases = ['csound-document', 'csound-csd']
    filenames = ['*.csd']

    # These tokens are based on those in XmlLexer in pygments/lexers/html.py. Making
    # CsoundDocumentLexer a subclass of XmlLexer rather than RegexLexer may seem like a
    # better idea, since Csound Document files look like XML files. However, Csound
    # Documents can contain Csound comments (preceded by //, for example) before and
    # after the root element, unescaped bitwise AND & and less than < operators, etc. In
    # other words, while Csound Document files look like XML files, they may not actually
    # be XML files.
    tokens = {
        'root': [(r'/[*](.|\n)*?[*]/', Comment.Multiline),
                 (r'(?:;|//).*$', Comment.Single), (r'[^/;<]+|/(?!/)', Text),
                 (r'<\s*CsInstruments', Name.Tag, ('orchestra', 'tag')),
                 (r'<\s*CsScore', Name.Tag, ('score', 'tag')),
                 (r'<\s*[Hh][Tt][Mm][Ll]', Name.Tag, ('HTML', 'tag')),
                 (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
                 (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag)],
        'orchestra': [(r'<\s*/\s*CsInstruments\s*>', Name.Tag, '#pop'),
                      (r'(.|\n)+?(?=<\s*/\s*CsInstruments\s*>)',
                       using(CsoundOrchestraLexer))],
        'score':
        [(r'<\s*/\s*CsScore\s*>', Name.Tag, '#pop'),
         (r'(.|\n)+?(?=<\s*/\s*CsScore\s*>)', using(CsoundScoreLexer))],
        'HTML':
        [(r'<\s*/\s*[Hh][Tt][Mm][Ll]\s*>', Name.Tag, '#pop'),
         (r'(.|\n)+?(?=<\s*/\s*[Hh][Tt][Mm][Ll]\s*>)', using(HtmlLexer))],
        'tag': [(r'\s+', Text), (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
                (r'/?\s*>', Name.Tag, '#pop')],
        'attr': [(r'\s+', Text), (r'".*?"', String, '#pop'),
                 (r"'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop')]
    }
Beispiel #6
0
class DockerLexer(RegexLexer):
    """
    Lexer for `Docker <http://docker.io>`_ configuration files.

    .. versionadded:: 2.0
    """
    name = 'Docker'
    aliases = ['docker', 'dockerfile']
    filenames = ['Dockerfile', '*.docker']
    mimetypes = ['text/x-dockerfile-config']

    _keywords = (r'(?:MAINTAINER|EXPOSE|WORKDIR|USER|STOPSIGNAL)')
    _bash_keywords = (r'(?:RUN|CMD|ENTRYPOINT|ENV|ARG|LABEL|ADD|COPY)')
    _lb = r'(?:\s*\\?\s*)'  # dockerfile line break regex
    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            (r'#.*', Comment),
            (r'(FROM)([ \t]*)(\S*)([ \t]*)(?:(AS)([ \t]*)(\S*))?',
             bygroups(Keyword, Text, String, Text, Keyword, Text, String)),
            (r'(ONBUILD)(%s)' % (_lb, ), bygroups(Keyword, using(BashLexer))),
            (r'(HEALTHCHECK)((%s--\w+=\w+%s)*)' % (_lb, _lb),
             bygroups(Keyword, using(BashLexer))),
            (r'(VOLUME|ENTRYPOINT|CMD|SHELL)(%s)(\[.*?\])' % (_lb, ),
             bygroups(Keyword, using(BashLexer), using(JsonLexer))),
            (r'(LABEL|ENV|ARG)((%s\w+=\w+%s)*)' % (_lb, _lb),
             bygroups(Keyword, using(BashLexer))),
            (r'(%s|VOLUME)\b(.*)' % (_keywords), bygroups(Keyword, String)),
            (r'(%s)' % (_bash_keywords, ), Keyword),
            (r'(.*\\\n)*.+', using(BashLexer)),
        ]
    }
Beispiel #7
0
class DuelLexer(RegexLexer):
    """
    Lexer for Duel Views Engine (formerly JBST) markup with JavaScript code blocks.
    See http://duelengine.org/.
    See http://jsonml.org/jbst/.

    .. versionadded:: 1.4
    """

    name = 'Duel'
    aliases = ['duel', 'jbst', 'jsonml+bst']
    filenames = ['*.duel', '*.jbst']
    mimetypes = ['text/x-duel', 'text/x-jbst']

    flags = re.DOTALL

    tokens = {
        'root': [
            (r'(<%[@=#!:]?)(.*?)(%>)',
             bygroups(Name.Tag, using(JavascriptLexer), Name.Tag)),
            (r'(<%\$)(.*?)(:)(.*?)(%>)',
             bygroups(Name.Tag, Name.Function, Punctuation, String, Name.Tag)),
            (r'(<%--)(.*?)(--%>)',
             bygroups(Name.Tag, Comment.Multiline, Name.Tag)),
            (r'(<script.*?>)(.*?)(</script>)',
             bygroups(using(HtmlLexer), using(JavascriptLexer),
                      using(HtmlLexer))),
            (r'(.+?)(?=<)', using(HtmlLexer)),
            (r'.+', using(HtmlLexer)),
        ],
    }
Beispiel #8
0
def _objdump_lexer_tokens(asm_lexer):
    """
    Common objdump lexer tokens to wrap an ASM lexer.
    """
    hex_re = r'[0-9A-Za-z]'
    return {
        'root': [
            # File name & format:
            ('(.*?)(:)( +file format )(.*?)$',
             bygroups(Name.Label, Punctuation, Text, String)),
            # Section header
            ('(Disassembly of section )(.*?)(:)$',
             bygroups(Text, Name.Label, Punctuation)),
            # Function labels
            # (With offset)
            ('(' + hex_re + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation, Number.Hex, Punctuation)),
            # (Without offset)
            ('(' + hex_re + '+)( )(<)(.*?)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation)),
            # Code line with disassembled instructions
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re +
             ' )+)( *\t)([a-zA-Z].*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text,
                      using(asm_lexer))),
            # Code line with ascii
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re +
             ' )+)( *)(.*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
            # Continued code line, only raw opcodes without disassembled
            # instruction
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)$',
             bygroups(Text, Name.Label, Text, Number.Hex)),
            # Skipped a few bytes
            (r'\t\.\.\.$', Text),
            # Relocation line
            # (With offset)
            (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' +
             hex_re + '+)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant, Punctuation, Number.Hex)),
            # (Without offset)
            (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant)),
            (r'[^\n]+\n', Other)
        ]
    }
Beispiel #9
0
 def _make_redirect_state(compound,
                          _core_token_compound=_core_token_compound,
                          _nl=_nl, _punct=_punct, _stoken=_stoken,
                          _string=_string, _space=_space,
                          _variable=_variable, _ws=_ws):
     stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                        (_punct, _string, _variable, _core_token_compound))
     return [
         (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
          (_nl, _ws, _nl, _ws),
          bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
         (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
          (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
          bygroups(Number.Integer, Punctuation, using(this, state='text')))
     ]
Beispiel #10
0
class MoinWikiLexer(RegexLexer):
    """
    For MoinMoin (and Trac) Wiki markup.

    .. versionadded:: 0.7
    """

    name = 'MoinMoin/Trac Wiki markup'
    aliases = ['trac-wiki', 'moin']
    filenames = []
    mimetypes = ['text/x-trac-wiki']
    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'^#.*$', Comment),
            (r'(!)(\S+)', bygroups(Keyword, Text)),  # Ignore-next
            # Titles
            (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
             bygroups(Generic.Heading, using(this), Generic.Heading, String)),
            # Literal code blocks, with optional shebang
            (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin,
                                            Name.Namespace), 'codeblock'),
            (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment),  # Formatting
            # Lists
            (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
            (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
            # Other Formatting
            (r'\[\[\w+.*?\]\]', Keyword),  # Macro
            (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
             bygroups(Keyword, String, Keyword)),  # Link
            (r'^----+$', Keyword),  # Horizontal rules
            (r'[^\n\'\[{!_~^,|]+', Text),
            (r'\n', Text),
            (r'.', Text),
        ],
        'codeblock': [
            (r'\}\}\}', Name.Builtin, '#pop'),
            # these blocks are allowed to be nested in Trac, but not MoinMoin
            (r'\{\{\{', Text, '#push'),
            (r'[^{}]+', Comment.Preproc),  # slurp boring text
            (r'.', Comment.Preproc),  # allow loose { or }
        ],
    }
Beispiel #11
0
class SingularityLexer(RegexLexer):
    """
    Lexer for `Singularity definition files
    <https://www.sylabs.io/guides/3.0/user-guide/definition_files.html>`_.

    .. versionadded:: 2.6
    """

    name = 'Singularity'
    aliases = ['singularity']
    filenames = ['*.def', 'Singularity']
    flags = re.IGNORECASE | re.MULTILINE | re.DOTALL

    _headers = r'^(\s*)(bootstrap|from|osversion|mirrorurl|include|registry|namespace|includecmd)(:)'
    _section = r'^%(?:pre|post|setup|environment|help|labels|test|runscript|files|startscript)\b'
    _appsect = r'^%app(?:install|help|run|labels|env|test|files)\b'

    tokens = {
        'root': [
            (_section, Generic.Heading, 'script'),
            (_appsect, Generic.Heading, 'script'),
            (_headers, bygroups(Text, Keyword, Text)),
            (r'\s*#.*?\n', Comment),
            (r'\b(([0-9]+\.?[0-9]*)|(\.[0-9]+))\b', Number),
            (r'(?!^\s*%).', Text),
        ],
        'script': [
            (r'(.+?(?=^\s*%))|(.*)', using(BashLexer), '#pop'),
        ],
    }

    def analyse_text(text):
        """This is a quite simple script file, but there are a few keywords
        which seem unique to this language."""
        result = 0
        if re.search(r'\b(?:osversion|includecmd|mirrorurl)\b', text,
                     re.IGNORECASE):
            result += 0.5

        if re.search(SingularityLexer._section[1:], text):
            result += 0.49

        return result
Beispiel #12
0
class PythonTracebackLexer(RegexLexer):
    """
    For Python 3.x tracebacks, with support for chained exceptions.

    .. versionadded:: 1.0

    .. versionchanged:: 2.5
       This is now the default ``PythonTracebackLexer``.  It is still available
       as the alias ``Python3TracebackLexer``.
    """

    name = 'Python Traceback'
    aliases = ['pytb', 'py3tb']
    filenames = ['*.pytb', '*.py3tb']
    mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
            (r'^During handling of the above exception, another '
             r'exception occurred:\n\n', Generic.Traceback),
            (r'^The above exception was the direct cause of the '
             r'following exception:\n\n', Generic.Traceback),
            (r'^(?=  File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
            (r'^.*\n', Other),
        ],
        'intb': [
            (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
             bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
            (r'^(  File )("[^"]+")(, line )(\d+)(\n)',
             bygroups(Text, Name.Builtin, Text, Number, Text)),
            (r'^(    )(.+)(\n)',
             bygroups(Text, using(PythonLexer), Text)),
            (r'^([ \t]*)(\.\.\.)(\n)',
             bygroups(Text, Comment, Text)),  # for doctests...
            (r'^([^:]+)(: )(.+)(\n)',
             bygroups(Generic.Error, Text, Name, Text), '#pop'),
            (r'^([a-zA-Z_][\w.]*)(:?\n)',
             bygroups(Generic.Error, Text), '#pop')
        ],
    }
Beispiel #13
0
class MxmlLexer(RegexLexer):
    """
    For MXML markup.
    Nested AS3 in <script> tags is highlighted by the appropriate lexer.

    .. versionadded:: 1.1
    """
    flags = re.MULTILINE | re.DOTALL
    name = 'MXML'
    aliases = ['mxml']
    filenames = ['*.mxml']
    mimetimes = ['text/xml', 'application/xml']

    tokens = {
        'root': [
            ('[^<&]+', Text),
            (r'&\S*?;', Name.Entity),
            (r'(\<\!\[CDATA\[)(.*?)(\]\]\>)',
             bygroups(String, using(ActionScript3Lexer), String)),
            ('<!--', Comment, 'comment'),
            (r'<\?.*?\?>', Comment.Preproc),
            ('<![^>]*>', Comment.Preproc),
            (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
            (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
        ],
        'comment': [
            ('[^-]+', Comment),
            ('-->', Comment, '#pop'),
            ('-', Comment),
        ],
        'tag': [
            (r'\s+', Text),
            (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
            (r'/?\s*>', Name.Tag, '#pop'),
        ],
        'attr': [
            (r'\s+', Text),
            ('".*?"', String, '#pop'),
            ("'.*?'", String, '#pop'),
            (r'[^\s>]+', String, '#pop'),
        ],
    }
Beispiel #14
0
 def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct,
                            _string=_string, _variable=_variable, _ws=_ws):
     op = r'=+\-*/!~'
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'0[0-7]+', Number.Oct),
         (r'0x[\da-f]+', Number.Hex),
         (r'\d+', Number.Integer),
         (r'[(),]+', Punctuation),
         (r'([%s]|%%|\^\^)+' % op, Operator),
         (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
          (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
           r'[^)]' if compound else r'[\w\W]'),
          using(this, state='variable')),
         (r'(?=[\x00|&])', Text, '#pop'),
         include('follow')
     ]
     return state
Beispiel #15
0
class FortranFixedLexer(RegexLexer):
    """
    Lexer for fixed format Fortran.

    .. versionadded:: 2.1
    """
    name = 'FortranFixed'
    aliases = ['fortranfixed']
    filenames = ['*.f', '*.F']

    flags = re.IGNORECASE

    def _lex_fortran(self, match, ctx=None):
        """Lex a line just as free form fortran without line break."""
        lexer = FortranLexer()
        text = match.group(0) + "\n"
        for index, token, value in lexer.get_tokens_unprocessed(text):
            value = value.replace('\n', '')
            if value != '':
                yield index, token, value

    tokens = {
        'root': [
            (r'[C*].*\n', Comment),
            (r'#.*\n', Comment.Preproc),
            (r' {0,4}!.*\n', Comment),
            (r'(.{5})', Name.Label, 'cont-char'),
            (r'.*\n', using(FortranLexer)),
        ],
        'cont-char': [
            (' ', Text, 'code'),
            ('0', Comment, 'code'),
            ('.', Generic.Strong, 'code'),
        ],
        'code': [
            (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment,
                                          Text), 'root'),
            (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'),
            default('root'),
        ]
    }
Beispiel #16
0
class PhpLexer(RegexLexer):
    """
    For `PHP <http://www.php.net/>`_ source code.
    For PHP embedded in HTML, use the `HtmlPhpLexer`.

    Additional options accepted:

    `startinline`
        If given and ``True`` the lexer starts highlighting with
        php code (i.e.: no starting ``<?php`` required).  The default
        is ``False``.
    `funcnamehighlighting`
        If given and ``True``, highlight builtin function names
        (default: ``True``).
    `disabledmodules`
        If given, must be a list of module names whose function names
        should not be highlighted. By default all modules are highlighted
        except the special ``'unknown'`` module that includes functions
        that are known to php but are undocumented.

        To get a list of allowed modules have a look into the
        `_php_builtins` module:

        .. sourcecode:: pycon

            >>> from typecode._vendor.pygments.lexers._php_builtins import MODULES
            >>> MODULES.keys()
            ['PHP Options/Info', 'Zip', 'dba', ...]

        In fact the names of those modules match the module names from
        the php documentation.
    """

    name = 'PHP'
    aliases = ['php', 'php3', 'php4', 'php5']
    filenames = ['*.php', '*.php[345]', '*.inc']
    mimetypes = ['text/x-php']

    # Note that a backslash is included in the following two patterns
    # PHP uses a backslash as a namespace separator
    _ident_char = r'[\\\w]|[^\x00-\x7f]'
    _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
    _ident_end = r'(?:' + _ident_char + ')*'
    _ident_inner = _ident_begin + _ident_end

    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
    tokens = {
        'root': [
            (r'<\?(php)?', Comment.Preproc, 'php'),
            (r'[^<]+', Other),
            (r'<', Other)
        ],
        'php': [
            (r'\?>', Comment.Preproc, '#pop'),
            (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
             bygroups(String, String, String.Delimiter, String, String.Delimiter,
                      Punctuation, Text)),
            (r'\s+', Text),
            (r'#.*?\n', Comment.Single),
            (r'//.*?\n', Comment.Single),
            # put the empty comment here, it is otherwise seen as
            # the start of a docstring
            (r'/\*\*/', Comment.Multiline),
            (r'/\*\*.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(->|::)(\s*)(' + _ident_inner + ')',
             bygroups(Operator, Text, Name.Attribute)),
            (r'[~!%^&*+=|:.<>/@-]+', Operator),
            (r'\?', Operator),  # don't add to the charclass above!
            (r'[\[\]{}();,]+', Punctuation),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
            (r'(function)(\s+)(&?)(\s*)',
             bygroups(Keyword, Text, Operator, Text), 'functionname'),
            (r'(const)(\s+)(' + _ident_inner + ')',
             bygroups(Keyword, Text, Name.Constant)),
            (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
             r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
             r'FALSE|print|for|require|continue|foreach|require_once|'
             r'declare|return|default|static|do|switch|die|stdClass|'
             r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
             r'virtual|endfor|include_once|while|endforeach|global|'
             r'endif|list|endswitch|new|endwhile|not|'
             r'array|E_ALL|NULL|final|php_user_filter|interface|'
             r'implements|public|private|protected|abstract|clone|try|'
             r'catch|throw|this|use|namespace|trait|yield|'
             r'finally)\b', Keyword),
            (r'(true|false|null)\b', Keyword.Constant),
            include('magicconstants'),
            (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable),
            (r'\$+' + _ident_inner, Name.Variable),
            (_ident_inner, Name.Other),
            (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
            (r'\d+e[+-]?[0-9]+', Number.Float),
            (r'0[0-7]+', Number.Oct),
            (r'0x[a-f0-9]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'0b[01]+', Number.Bin),
            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
            (r'"', String.Double, 'string'),
        ],
        'magicfuncs': [
            # source: http://php.net/manual/en/language.oop5.magic.php
            (words((
                '__construct', '__destruct', '__call', '__callStatic', '__get', '__set',
                '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke',
                '__set_state', '__clone', '__debugInfo',), suffix=r'\b'),
             Name.Function.Magic),
        ],
        'magicconstants': [
            # source: http://php.net/manual/en/language.constants.predefined.php
            (words((
                '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__',
                '__TRAIT__', '__METHOD__', '__NAMESPACE__',),
                suffix=r'\b'),
             Name.Constant),
        ],
        'classname': [
            (_ident_inner, Name.Class, '#pop')
        ],
        'functionname': [
            include('magicfuncs'),
            (_ident_inner, Name.Function, '#pop'),
            default('#pop')
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'[^{$"\\]+', String.Double),
            (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
            (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?',
             String.Interpol),
            (r'(\{\$\{)(.*?)(\}\})',
             bygroups(String.Interpol, using(this, _startinline=True),
                      String.Interpol)),
            (r'(\{)(\$.*?)(\})',
             bygroups(String.Interpol, using(this, _startinline=True),
                      String.Interpol)),
            (r'(\$\{)(\S+)(\})',
             bygroups(String.Interpol, Name.Variable, String.Interpol)),
            (r'[${\\]', String.Double)
        ],
    }

    def __init__(self, **options):
        self.funcnamehighlighting = get_bool_opt(
            options, 'funcnamehighlighting', True)
        self.disabledmodules = get_list_opt(
            options, 'disabledmodules', ['unknown'])
        self.startinline = get_bool_opt(options, 'startinline', False)

        # private option argument for the lexer itself
        if '_startinline' in options:
            self.startinline = options.pop('_startinline')

        # collect activated functions in a set
        self._functions = set()
        if self.funcnamehighlighting:
            from typecode._vendor.pygments.lexers._php_builtins import MODULES
            for key, value in iteritems(MODULES):
                if key not in self.disabledmodules:
                    self._functions.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.startinline:
            stack.append('php')
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text, stack):
            if token is Name.Other:
                if value in self._functions:
                    yield index, Name.Builtin, value
                    continue
            yield index, token, value

    def analyse_text(text):
        if shebang_matches(text, r'php'):
            return True
        rv = 0.0
        if re.search(r'<\?(?!xml)', text):
            rv += 0.3
        return rv
Beispiel #17
0
class LlvmMirLexer(RegexLexer):
    """
    Lexer for the overall LLVM MIR document format.

    MIR is a human readable serialization format that's used to represent LLVM's
    machine specific intermediate representation. It allows LLVM's developers to
    see the state of the compilation process at various points, as well as test
    individual pieces of the compiler.

    For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.

    .. versionadded:: 2.6
    """
    name = 'LLVM-MIR'
    aliases = ['llvm-mir']
    filenames = ['*.mir']

    tokens = {
        'root': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents starting with | are LLVM-IR
            (r'--- \|$', Keyword, 'llvm_ir'),
            # Other documents are MIR
            (r'---', Keyword, 'llvm_mir'),
            # Consume everything else in one token for efficiency
            (r'[^-#]+|.', Text),
        ],
        'llvm_ir': [
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Delegate to the LlvmLexer
            (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
        ],
        'llvm_mir': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Handle the simple attributes
            (r'name:', Keyword, 'name'),
            (words(('alignment', ), suffix=':'), Keyword, 'number'),
            (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
                    'selected', 'exposesReturnsTwice'),
                   suffix=':'), Keyword, 'boolean'),
            # Handle the attributes don't highlight inside
            (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
                    'machineFunctionInfo'),
                   suffix=':'), Keyword),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'body: *\|', Keyword, 'llvm_mir_body'),
            # Consume everything else
            (r'.+', Text),
            (r'\n', Text),
        ],
        'name': [
            (r'[^\n]+', Name),
            default('#pop'),
        ],
        'boolean': [
            (r' *(true|false)', Name.Builtin),
            default('#pop'),
        ],
        'number': [
            (r' *[0-9]+', Number),
            default('#pop'),
        ],
        'llvm_mir_body': [
            # Documents end with '...' or '---'.
            # We have to pop llvm_mir_body and llvm_mir
            (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
            # The '...' is optional. If we didn't already find it then it isn't
            # there. There might be a '---' instead though.
            (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
        ],
    }
Beispiel #18
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
                 String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words(('@selector', '@private', '@protected', '@public',
                        '@encode', '@synchronized', '@try', '@throw', '@catch',
                        '@finally', '@end', '@property', '@synthesize',
                        '__bridge', '__bridge_transfer', '__autoreleasing',
                        '__block', '__weak', '__strong', 'weak', 'strong',
                        'copy', 'retain', 'assign', 'unsafe_unretained',
                        'atomic', 'nonatomic', 'readonly', 'readwrite',
                        'setter', 'getter', 'typeof', 'in', 'out', 'inout',
                        'release', 'class', '@dynamic', '@optional',
                        '@required', '@autoreleasepool', '@import'),
                       suffix=r'\b'), Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'),
                       suffix=r'\b'), Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b',
                 Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                (r'([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname':
            [(r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text),
              'oc_forward_classname'),
             (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class,
                                                     Text), '#pop')],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (
                    r'^([-+])(\s*)'  # method marker
                    r'(\(.*?\))?(\s*)'  # return type
                    r'([a-zA-Z$_][\w$]*:?)',  # begin of method name
                    bygroups(Punctuation, Text, using(this), Text,
                             Name.Function),
                    'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from typecode._vendor.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Beispiel #19
0
class ScamlLexer(ExtendedRegexLexer):
    """
    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.

    .. versionadded:: 1.4
    """

    name = 'Scaml'
    aliases = ['scaml']
    filenames = ['*.scaml']
    mimetypes = ['text/x-scaml']

    flags = re.IGNORECASE
    # Scaml does not yet support the " |\n" notation to
    # wrap long lines.  Once it does, use the custom faux
    # dot instead.
    # _dot = r'(?: \|\n(?=.* \|)|.)'
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Beispiel #20
0
class HamlLexer(ExtendedRegexLexer):
    """
    For Haml markup.

    .. versionadded:: 1.3
    """

    name = 'Haml'
    aliases = ['haml']
    filenames = ['*.haml']
    mimetypes = ['text/x-haml']

    flags = re.IGNORECASE
    # Haml can include " |\n" anywhere,
    # which is ignored and used to wrap long lines.
    # To accomodate this, use this custom faux dot instead.
    _dot = r'(?: \|\n(?=.* \|)|.)'

    # In certain places, a comma at the end of the line
    # allows line wrapping as well.
    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'),
            (r'(-)(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'haml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Beispiel #21
0
class CypherLexer(RegexLexer):
    """
    For `Cypher Query Language
    <https://neo4j.com/docs/developer-manual/3.3/cypher/>`_

    For the Cypher version in Neo4j 3.3

    .. versionadded:: 2.0
    """
    name = 'Cypher'
    aliases = ['cypher']
    filenames = ['*.cyp', '*.cypher']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            include('comment'),
            include('keywords'),
            include('clauses'),
            include('relations'),
            include('strings'),
            include('whitespace'),
            include('barewords'),
        ],
        'comment': [
            (r'^.*//.*\n', Comment.Single),
        ],
        'keywords': [
            (r'(create|order|match|limit|set|skip|start|return|with|where|'
             r'delete|foreach|not|by|true|false)\b', Keyword),
        ],
        'clauses': [
            # based on https://neo4j.com/docs/cypher-refcard/3.3/
            (r'(all|any|as|asc|ascending|assert|call|case|create|'
             r'create\s+index|create\s+unique|delete|desc|descending|'
             r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|'
             r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|'
             r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|'
             r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|'
             r'remove|return|set|skip|single|start|starts\s+with|then|union|'
             r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|'
             r'with)\b', Keyword),
        ],
        'relations': [
            (r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)),
            (r'(<-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'(-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'-->|<--|\[|\]', Operator),
            (r'<|>|<>|=|<=|=>|\(|\)|\||:|,|;', Punctuation),
            (r'[.*{}]', Punctuation),
        ],
        'strings': [
            (r'"(?:\\[tbnrf\'"\\]|[^\\"])*"', String),
            (r'`(?:``|[^`])+`', Name.Variable),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'barewords': [
            (r'[a-z]\w*', Name),
            (r'\d+', Number),
        ],
    }
Beispiel #22
0
class VCLLexer(RegexLexer):
    """
    For Varnish Configuration Language (VCL).

    .. versionadded:: 2.2
    """
    name = 'VCL'
    aliases = ['vcl']
    filenames = ['*.vcl']
    mimetypes = ['text/x-vclsrc']

    def analyse_text(text):
        # If the very first line is 'vcl 4.0;' it's pretty much guaranteed
        # that this is VCL
        if text.startswith('vcl 4.0;'):
            return 1.0
        # Skip over comments and blank lines
        # This is accurate enough that returning 0.9 is reasonable.
        # Almost no VCL files start without some comments.
        elif '\nvcl 4.0;' in text[:1000]:
            return 0.9

    tokens = {
        'probe': [
            include('whitespace'),
            include('comments'),
            (r'(\.\w+)(\s*=\s*)([^;]*)(;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'acl': [
            include('whitespace'),
            include('comments'),
            (r'[!/]+', Operator),
            (r';', Punctuation),
            (r'\d+', Number),
            (r'\}', Punctuation, '#pop'),
        ],
        'backend': [
            include('whitespace'),
            (r'(\.probe)(\s*=\s*)(\w+)(;)',
             bygroups(Name.Attribute, Operator, Name.Variable.Global,
                      Punctuation)),
            (r'(\.probe)(\s*=\s*)(\{)',
             bygroups(Name.Attribute, Operator, Punctuation), 'probe'),
            (r'(\.\w+\b)(\s*=\s*)([^;\s]*)(\s*;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'statements': [
            (r'(\d\.)?\d+[sdwhmy]', Literal.Date),
            (r'(\d\.)?\d+ms', Literal.Date),
            (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|'
             r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|'
             r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function),
            (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|'
             r'miss|fetch|restart)\b', Name.Constant),
            (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b',
             Name.Variable),
            (words(
                ('obj.status', 'req.hash_always_miss', 'beresp.backend',
                 'req.esi_level', 'req.can_gzip', 'beresp.ttl',
                 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity',
                 'req.hash_ignore_busy', 'obj.reason', 'req.xid',
                 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age',
                 'local.ip', 'beresp.uncacheable', 'req.method',
                 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts',
                 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid',
                 'bereq.between_bytes_timeout', 'req.esi',
                 'bereq.first_byte_timeout', 'bereq.method',
                 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status',
                 'beresp.do_gunzip', 'beresp.storage_hint',
                 'resp.is_streaming', 'beresp.do_stream', 'req_top.method',
                 'bereq.backend', 'beresp.backend.name', 'beresp.status',
                 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason',
                 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi',
                 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname',
                 'remote.ip', 'req.backend_hint', 'server.identity',
                 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip',
                 'bereq.uncacheable'),
                suffix=r'\b'), Name.Variable),
            (r'[!%&+*\-,/<.}{>=|~]+', Operator),
            (r'[();]', Punctuation),
            (r'[,]+', Punctuation),
            (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif',
                    'elif', 'synth', 'synthetic', 'ban', 'return', 'set',
                    'unset', 'import', 'include', 'new', 'rollback', 'call'),
                   suffix=r'\b'), Keyword),
            (r'storage\.\w+\.\w+\b', Name.Variable),
            (words(('true', 'false')), Name.Builtin),
            (r'\d+\b', Number),
            (r'(backend)(\s+\w+)(\s*\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'),
            (r'(probe\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'),
            (r'(acl\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'),
            (r'(vcl )(4.0)(;)$',
             bygroups(Keyword.Reserved, Name.Constant, Punctuation)),
            (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)',
             bygroups(Keyword, Name.Function, Punctuation)),
            (r'([a-zA-Z_]\w*)'
             r'(\.)'
             r'([a-zA-Z_]\w*)'
             r'(\s*\(.*\))',
             bygroups(Name.Function, Punctuation, Name.Function, using(this))),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*$', Comment),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'[^"\n]+', String),  # all other characters
        ],
        'multistring': [
            (r'[^"}]', String),
            (r'"\}', String, '#pop'),
            (r'["}]', String),
        ],
        'whitespace': [
            (r'L?"', String, 'string'),
            (r'\{"', String, 'multistring'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
        ],
        'root': [
            include('whitespace'),
            include('comments'),
            include('statements'),
            (r'\s+', Text),
        ],
    }
Beispiel #23
0
class VhdlLexer(RegexLexer):
    """
    For VHDL source code.

    .. versionadded:: 1.5
    """
    name = 'vhdl'
    aliases = ['vhdl']
    filenames = ['*.vhdl', '*.vhd']
    mimetypes = ['text/x-vhdl']
    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'--.*?$', Comment.Single),
            (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r"'[a-z_]\w*", Name.Attribute),
            (r'[()\[\],.;\']', Punctuation),
            (r'"[^\n\\"]*"', String),
            (r'(library)(\s+)([a-z_]\w*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)),
            (r'(use)(\s+)([a-z_][\w.]*\.)(all)',
             bygroups(Keyword, Text, Name.Namespace, Keyword)),
            (r'(use)(\s+)([a-z_][\w.]*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(std|ieee)(\.[a-z_]\w*)',
             bygroups(Name.Namespace, Name.Namespace)),
            (words(('std', 'ieee', 'work'), suffix=r'\b'), Name.Namespace),
            (r'(entity|component)(\s+)([a-z_]\w*)',
             bygroups(Keyword, Text, Name.Class)),
            (r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)'
             r'(of)(\s+)([a-z_]\w*)(\s+)(is)',
             bygroups(Keyword, Text, Name.Class, Text, Keyword, Text,
                      Name.Class, Text, Keyword)),
            (r'([a-z_]\w*)(:)(\s+)(process|for)',
             bygroups(Name.Class, Operator, Text, Keyword)),
            (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'),
            include('types'),
            include('keywords'),
            include('numbers'),
            (r'[a-z_]\w*', Name),
        ],
        'endblock': [
            include('keywords'),
            (r'[a-z_]\w*', Name.Class),
            (r'(\s+)', Text),
            (r';', Punctuation, '#pop'),
        ],
        'types': [
            (words(('boolean', 'bit', 'character', 'severity_level', 'integer',
                    'time', 'delay_length', 'natural', 'positive', 'string',
                    'bit_vector', 'file_open_kind', 'file_open_status',
                    'std_ulogic', 'std_ulogic_vector', 'std_logic',
                    'std_logic_vector', 'signed', 'unsigned'),
                   suffix=r'\b'), Keyword.Type),
        ],
        'keywords': [
            (words(
                ('abs', 'access', 'after', 'alias', 'all', 'and',
                 'architecture', 'array', 'assert', 'attribute', 'begin',
                 'block', 'body', 'buffer', 'bus', 'case', 'component',
                 'configuration', 'constant', 'disconnect', 'downto', 'else',
                 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function',
                 'generate', 'generic', 'group', 'guarded', 'if', 'impure',
                 'in', 'inertial', 'inout', 'is', 'label', 'library',
                 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new',
                 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or',
                 'others', 'out', 'package', 'port', 'postponed', 'procedure',
                 'process', 'pure', 'range', 'record', 'register', 'reject',
                 'rem', 'return', 'rol', 'ror', 'select', 'severity', 'signal',
                 'shared', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to',
                 'transport', 'type', 'units', 'until', 'use', 'variable',
                 'wait', 'when', 'while', 'with', 'xnor', 'xor'),
                suffix=r'\b'), Keyword),
        ],
        'numbers': [
            (r'\d{1,2}#[0-9a-f_]+#?', Number.Integer),
            (r'\d+', Number.Integer),
            (r'(\d+\.\d*|\.\d+|\d+)E[+-]?\d+', Number.Float),
            (r'X"[0-9a-f_]+"', Number.Hex),
            (r'O"[0-7_]+"', Number.Oct),
            (r'B"[01_]+"', Number.Bin),
        ],
    }
Beispiel #24
0
class RPMSpecLexer(RegexLexer):
    """
    For RPM ``.spec`` files.

    .. versionadded:: 1.6
    """

    name = 'RPMSpec'
    aliases = ['spec']
    filenames = ['*.spec']
    mimetypes = ['text/x-rpm-spec']

    _directives = ('(?:package|prep|build|install|clean|check|pre[a-z]*|'
                   'post[a-z]*|trigger[a-z]*|files)')

    tokens = {
        'root': [
            (r'#.*\n', Comment),
            include('basic'),
        ],
        'description': [
            (r'^(%' + _directives + ')(.*)$',
             bygroups(Name.Decorator, Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'changelog': [
            (r'\*.*\n', Generic.Subheading),
            (r'^(%' + _directives + ')(.*)$',
             bygroups(Name.Decorator, Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
            include('interpol'),
            (r'.', String.Double),
        ],
        'basic': [
            include('macro'),
            (r'(?i)^(Name|Version|Release|Epoch|Summary|Group|License|Packager|'
             r'Vendor|Icon|URL|Distribution|Prefix|Patch[0-9]*|Source[0-9]*|'
             r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Suggests|Provides|Conflicts|'
             r'Build[a-z]+|[a-z]+Arch|Auto[a-z]+)(:)(.*)$',
             bygroups(Generic.Heading, Punctuation, using(this))),
            (r'^%description', Name.Decorator, 'description'),
            (r'^%changelog', Name.Decorator, 'changelog'),
            (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text)),
            (r'%(attr|defattr|dir|doc(?:dir)?|setup|config(?:ure)?|'
             r'make(?:install)|ghost|patch[0-9]+|find_lang|exclude|verify)',
             Keyword),
            include('interpol'),
            (r"'.*?'", String.Single),
            (r'"', String.Double, 'string'),
            (r'.', Text),
        ],
        'macro': [
            (r'%define.*\n', Comment.Preproc),
            (r'%\{\!\?.*%define.*\}', Comment.Preproc),
            (r'(%(?:if(?:n?arch)?|else(?:if)?|endif))(.*)$',
             bygroups(Comment.Preproc, Text)),
        ],
        'interpol': [
            (r'%\{?__[a-z_]+\}?', Name.Function),
            (r'%\{?_([a-z_]+dir|[a-z_]+path|prefix)\}?', Keyword.Pseudo),
            (r'%\{\?\w+\}', Name.Variable),
            (r'\$\{?RPM_[A-Z0-9_]+\}?', Name.Variable.Global),
            (r'%\{[a-zA-Z]\w+\}', Keyword.Constant),
        ]
    }
Beispiel #25
0
class VimLexer(RegexLexer):
    """
    Lexer for VimL script files.

    .. versionadded:: 0.8
    """
    name = 'VimL'
    aliases = ['vim']
    filenames = [
        '*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc',
        'vimrc', 'gvimrc'
    ]
    mimetypes = ['text/x-vim']
    flags = re.MULTILINE

    _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'

    tokens = {
        'root': [
            (r'^([ \t:]*)(' + _python +
             r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
             bygroups(using(this), Keyword, Text, Operator, Text, Text,
                      using(PythonLexer), Text)),
            (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
             bygroups(using(this), Keyword, Text, using(PythonLexer))),
            (r'^\s*".*', Comment),
            (r'[ \t]+', Text),
            # TODO: regexes can have other delims
            (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
            (r'"(\\\\|\\"|[^\n"])*"', String.Double),
            (r"'(''|[^\n'])*'", String.Single),

            # Who decided that doublequote was a good comment character??
            (r'(?<=\s)"[^\-:.%#=*].*', Comment),
            (r'-?\d+', Number),
            (r'#[0-9a-f]{6}', Number.Hex),
            (r'^:', Punctuation),
            (r'[()<>+=!|,~-]', Punctuation),  # Inexact list.  Looks decent.
            (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
             Keyword),
            (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
            (r'\b\w+\b', Name.Other),  # These are postprocessed below
            (r'.', Text),
        ],
    }

    def __init__(self, **options):
        from typecode._vendor.pygments.lexers._vim_builtins import command, option, auto
        self._cmd = command
        self._opt = option
        self._aut = auto

        RegexLexer.__init__(self, **options)

    def is_in(self, w, mapping):
        r"""
        It's kind of difficult to decide if something might be a keyword
        in VimL because it allows you to abbreviate them.  In fact,
        'ab[breviate]' is a good example.  :ab, :abbre, or :abbreviate are
        valid ways to call it so rather than making really awful regexps
        like::

            \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b

        we match `\b\w+\b` and then call is_in() on those tokens.  See
        `scripts/get_vimkw.py` for how the lists are extracted.
        """
        p = bisect(mapping, (w, ))
        if p > 0:
            if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
               mapping[p-1][1][:len(w)] == w:
                return True
        if p < len(mapping):
            return mapping[p][0] == w[:len(mapping[p][0])] and \
                mapping[p][1][:len(w)] == w
        return False

    def get_tokens_unprocessed(self, text):
        # TODO: builtins are only subsequent tokens on lines
        #       and 'keywords' only happen at the beginning except
        #       for :au ones
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name.Other:
                if self.is_in(value, self._cmd):
                    yield index, Keyword, value
                elif self.is_in(value, self._opt) or \
                        self.is_in(value, self._aut):
                    yield index, Name.Builtin, value
                else:
                    yield index, Text, value
            else:
                yield index, token, value
Beispiel #26
0
class PugLexer(ExtendedRegexLexer):
    """
    For Pug markup.
    Pug is a variant of Scaml, see:
    http://scalate.fusesource.org/documentation/scaml-reference.html

    .. versionadded:: 1.4
    """

    name = 'Pug'
    aliases = ['pug', 'jade']
    filenames = ['*.pug', '*.jade']
    mimetypes = ['text/x-pug', 'text/x-jade']

    flags = re.IGNORECASE
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            (r'\|', Text, 'eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Beispiel #27
0
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('asm', 'auto', 'break', 'case', 'const', 'continue',
                 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                 'if', 'register', 'restricted', 'return', 'sizeof', 'static',
                 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'),
                suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')([^\n]+)',
             bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = set(
        ('size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
         'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
         'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'))
    c99_types = set(
        ('_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
         'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
         'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t',
         'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t',
         'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t',
         'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t',
         'uintptr_t', 'intmax_t', 'uintmax_t'))
    linux_types = set(
        ('clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
         'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
         'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
         'timer_t', 'uid_t'))

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Beispiel #28
0
class BatchLexer(RegexLexer):
    """
    Lexer for the DOS/Windows Batch file format.

    .. versionadded:: 0.7
    """
    name = 'Batchfile'
    aliases = ['bat', 'batch', 'dosbatch', 'winbatch']
    filenames = ['*.bat', '*.cmd']
    mimetypes = ['application/x-dos-batch']

    flags = re.MULTILINE | re.IGNORECASE

    _nl = r'\n\x1a'
    _punct = r'&<>|'
    _ws = r'\t\v\f\r ,;=\xa0'
    _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws)
    _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' %
                           (_nl, _ws, _nl, _punct))
    _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl)
    _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws
    _label = r'(?:(?:[^%s%s%s+:^]|\^[%s]?[\w\W])*)' % (_nl, _punct, _ws, _nl)
    _label_compound = (r'(?:(?:[^%s%s%s+:^)]|\^[%s]?[^)])*)' %
                       (_nl, _punct, _ws, _nl))
    _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator
    _opword = r'(?:equ|geq|gtr|leq|lss|neq)'
    _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl)
    _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|'
                 r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|'
                 r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|'
                 r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:'
                 r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' %
                 (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl))
    _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s])+)' % (_nl, _nl, _punct, _ws)
    _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s)])+)' % (_nl, _nl,
                                                                  _punct, _ws)
    _token = r'(?:[%s]+|%s)' % (_punct, _core_token)
    _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound)
    _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
               (_punct, _string, _variable, _core_token))

    def _make_begin_state(compound, _core_token=_core_token,
                          _core_token_compound=_core_token_compound,
                          _keyword_terminator=_keyword_terminator,
                          _nl=_nl, _punct=_punct, _string=_string,
                          _space=_space, _start_label=_start_label,
                          _stoken=_stoken, _token_terminator=_token_terminator,
                          _variable=_variable, _ws=_ws):
        rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                            ')' if compound else '')
        rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
        rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
        set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
        suffix = ''
        if compound:
            _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
            _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
            suffix = '/compound'
        return [
            ((r'\)', Punctuation, '#pop') if compound else
             (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
              Comment.Single)),
            (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
            (_space, using(this, state='text')),
            include('redirect%s' % suffix),
            (r'[%s]+' % _nl, Text),
            (r'\(', Punctuation, 'root/compound'),
            (r'@+', Punctuation),
            (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
             r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
             (_nl, _token_terminator, _space,
              _core_token_compound if compound else _core_token, _nl, _nl),
             bygroups(Keyword, using(this, state='text')),
             'follow%s' % suffix),
            (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
             (_keyword_terminator, rest, _nl, _nl, rest),
             bygroups(Keyword, using(this, state='text')),
             'follow%s' % suffix),
            (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                    'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                    'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                    'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                    'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                    'title', 'type', 'ver', 'verify', 'vol'),
                   suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
            (r'(call)(%s?)(:)' % _space,
             bygroups(Keyword, using(this, state='text'), Punctuation),
             'call%s' % suffix),
            (r'call%s' % _keyword_terminator, Keyword),
            (r'(for%s(?!\^))(%s)(/f%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'), Keyword),
             ('for/f', 'for')),
            (r'(for%s(?!\^))(%s)(/l%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'), Keyword),
             ('for/l', 'for')),
            (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
            (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
             bygroups(Keyword, using(this, state='text'), Punctuation),
             'label%s' % suffix),
            (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
             (_token_terminator, _space, _token_terminator, _space,
              _token_terminator, _space),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), Keyword,
                      using(this, state='text')), ('(?', 'if')),
            (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
             (_token_terminator, _space, _stoken, _keyword_terminator,
              rest_of_line_compound if compound else rest_of_line),
             Comment.Single, 'follow%s' % suffix),
            (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
             bygroups(Keyword, using(this, state='text'), Keyword),
             'arithmetic%s' % suffix),
            (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
             r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
             (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
              ')' if compound else '', _nl, _nl),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), using(this, state='variable'),
                      Punctuation),
             'follow%s' % suffix),
            default('follow%s' % suffix)
        ]

    def _make_follow_state(compound, _label=_label,
                           _label_compound=_label_compound, _nl=_nl,
                           _space=_space, _start_label=_start_label,
                           _token=_token, _token_compound=_token_compound,
                           _ws=_ws):
        suffix = '/compound' if compound else ''
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [
            (r'%s([%s]*)(%s)(.*)' %
             (_start_label, _ws, _label_compound if compound else _label),
             bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
            include('redirect%s' % suffix),
            (r'(?=[%s])' % _nl, Text, '#pop'),
            (r'\|\|?|&&?', Punctuation, '#pop'),
            include('text')
        ]
        return state

    def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct,
                               _string=_string, _variable=_variable, _ws=_ws):
        op = r'=+\-*/!~'
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [
            (r'0[0-7]+', Number.Oct),
            (r'0x[\da-f]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'[(),]+', Punctuation),
            (r'([%s]|%%|\^\^)+' % op, Operator),
            (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
             (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
              r'[^)]' if compound else r'[\w\W]'),
             using(this, state='variable')),
            (r'(?=[\x00|&])', Text, '#pop'),
            include('follow')
        ]
        return state

    def _make_call_state(compound, _label=_label,
                         _label_compound=_label_compound):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                      bygroups(Punctuation, Name.Label), '#pop'))
        return state

    def _make_label_state(compound, _label=_label,
                          _label_compound=_label_compound, _nl=_nl,
                          _punct=_punct, _string=_string, _variable=_variable):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append((r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' %
                      (_label_compound if compound else _label, _string,
                       _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl,
                       _punct, r')' if compound else ''),
                      bygroups(Name.Label, Comment.Single), '#pop'))
        return state

    def _make_redirect_state(compound,
                             _core_token_compound=_core_token_compound,
                             _nl=_nl, _punct=_punct, _stoken=_stoken,
                             _string=_string, _space=_space,
                             _variable=_variable, _ws=_ws):
        stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                           (_punct, _string, _variable, _core_token_compound))
        return [
            (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
             (_nl, _ws, _nl, _ws),
             bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
            (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
             (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
             bygroups(Number.Integer, Punctuation, using(this, state='text')))
        ]

    tokens = {
        'root': _make_begin_state(False),
        'follow': _make_follow_state(False),
        'arithmetic': _make_arithmetic_state(False),
        'call': _make_call_state(False),
        'label': _make_label_state(False),
        'redirect': _make_redirect_state(False),
        'root/compound': _make_begin_state(True),
        'follow/compound': _make_follow_state(True),
        'arithmetic/compound': _make_arithmetic_state(True),
        'call/compound': _make_call_state(True),
        'label/compound': _make_label_state(True),
        'redirect/compound': _make_redirect_state(True),
        'variable-or-escape': [
            (_variable, Name.Variable),
            (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (_variable, Name.Variable),
            (r'\^!|%%', String.Escape),
            (r'[^"%%^%s]+|[%%^]' % _nl, String.Double),
            default('#pop')
        ],
        'sqstring': [
            include('variable-or-escape'),
            (r'[^%]+|%', String.Single)
        ],
        'bqstring': [
            include('variable-or-escape'),
            (r'[^%]+|%', String.Backtick)
        ],
        'text': [
            (r'"', String.Double, 'string'),
            include('variable-or-escape'),
            (r'[^"%%^%s%s%s\d)]+|.' % (_nl, _punct, _ws), Text)
        ],
        'variable': [
            (r'"', String.Double, 'string'),
            include('variable-or-escape'),
            (r'[^"%%^%s]+|.' % _nl, Name.Variable)
        ],
        'for': [
            (r'(%s)(in)(%s)(\()' % (_space, _space),
             bygroups(using(this, state='text'), Keyword,
                      using(this, state='text'), Punctuation), '#pop'),
            include('follow')
        ],
        'for2': [
            (r'\)', Punctuation),
            (r'(%s)(do%s)' % (_space, _token_terminator),
             bygroups(using(this, state='text'), Keyword), '#pop'),
            (r'[%s]+' % _nl, Text),
            include('follow')
        ],
        'for/f': [
            (r'(")((?:%s|[^"])*?")([%s%s]*)(\))' % (_variable, _nl, _ws),
             bygroups(String.Double, using(this, state='string'), Text,
                      Punctuation)),
            (r'"', String.Double, ('#pop', 'for2', 'string')),
            (r"('(?:%%%%|%s|[\w\W])*?')([%s%s]*)(\))" % (_variable, _nl, _ws),
             bygroups(using(this, state='sqstring'), Text, Punctuation)),
            (r'(`(?:%%%%|%s|[\w\W])*?`)([%s%s]*)(\))' % (_variable, _nl, _ws),
             bygroups(using(this, state='bqstring'), Text, Punctuation)),
            include('for2')
        ],
        'for/l': [
            (r'-?\d+', Number.Integer),
            include('for2')
        ],
        'if': [
            (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' %
             (_token_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Number.Integer), '#pop'),
            (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text'),
                      using(this, state='variable')), '#pop'),
            (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text')), '#pop'),
            (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number),
             bygroups(using(this, state='arithmetic'), Operator.Word,
                      using(this, state='arithmetic')), '#pop'),
            (_stoken, using(this, state='text'), ('#pop', 'if2')),
        ],
        'if2': [
            (r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken),
             bygroups(using(this, state='text'), Operator,
                      using(this, state='text')), '#pop'),
            (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken),
             bygroups(using(this, state='text'), Operator.Word,
                      using(this, state='text')), '#pop')
        ],
        '(?': [
            (_space, using(this, state='text')),
            (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')),
            default('#pop')
        ],
        'else?': [
            (_space, using(this, state='text')),
            (r'else%s' % _token_terminator, Keyword, '#pop'),
            default('#pop')
        ]
    }
Beispiel #29
0
class HtmlLexer(RegexLexer):
    """
    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
    by the appropriate lexer.
    """

    name = 'HTML'
    aliases = ['html']
    filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
    mimetypes = ['text/html', 'application/xhtml+xml']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            ('[^<&]+', Text),
            (r'&\S*?;', Name.Entity),
            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
            ('<!--', Comment, 'comment'),
            (r'<\?.*?\?>', Comment.Preproc),
            ('<![^>]*>', Comment.Preproc),
            (r'(<)(\s*)(script)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('script-content', 'tag')),
            (r'(<)(\s*)(style)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('style-content', 'tag')),
            # note: this allows tag names not used in HTML like <x:with-dash>,
            # this is to support yet-unknown template engines and the like
            (r'(<)(\s*)([\w:.-]+)', bygroups(Punctuation, Text,
                                             Name.Tag), 'tag'),
            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation)),
        ],
        'comment': [
            ('[^-]+', Comment),
            ('-->', Comment, '#pop'),
            ('-', Comment),
        ],
        'tag': [
            (r'\s+', Text),
            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator,
                                               Text), 'attr'),
            (r'[\w:-]+', Name.Attribute),
            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text,
                                       Punctuation), '#pop'),
        ],
        'script-content': [
            (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
            # fallback cases for when there is no closing script tag
            # first look for newline and then go back into root state
            # if that fails just read the rest of the file
            # this is similar to the error handling logic in lexer.py
            (r'.+?\n', using(JavascriptLexer), '#pop'),
            (r'.+', using(JavascriptLexer), '#pop'),
        ],
        'style-content': [
            (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
            # fallback cases for when there is no closing style tag
            # first look for newline and then go back into root state
            # if that fails just read the rest of the file
            # this is similar to the error handling logic in lexer.py
            (r'.+?\n', using(CssLexer), '#pop'),
            (r'.+', using(CssLexer), '#pop'),
        ],
        'attr': [
            ('".*?"', String, '#pop'),
            ("'.*?'", String, '#pop'),
            (r'[^\s>]+', String, '#pop'),
        ],
    }

    def analyse_text(text):
        if html_doctype_matches(text):
            return 0.5
Beispiel #30
0
 def _make_begin_state(compound, _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl, _punct=_punct, _string=_string,
                       _space=_space, _start_label=_start_label,
                       _stoken=_stoken, _token_terminator=_token_terminator,
                       _variable=_variable, _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix),
         (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'),
         (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'), Keyword),
          'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation),
          'follow%s' % suffix),
         default('follow%s' % suffix)
     ]