Example #1
0
 def _make_redirect_state(compound,
                          _core_token_compound=_core_token_compound,
                          _nl=_nl, _punct=_punct, _stoken=_stoken,
                          _string=_string, _space=_space,
                          _variable=_variable, _ws=_ws):
     stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                        (_punct, _string, _variable, _core_token_compound))
     return [
         (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
          (_nl, _ws, _nl, _ws),
          bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
         (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
          (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
          bygroups(Number.Integer, Punctuation, using(this, state='text')))
     ]
def _objdump_lexer_tokens(asm_lexer):
    """
    Common objdump lexer tokens to wrap an ASM lexer.
    """
    hex_re = r'[0-9A-Za-z]'
    return {
        'root': [
            # File name & format:
            ('(.*?)(:)( +file format )(.*?)$',
                bygroups(Name.Label, Punctuation, Text, String)),
            # Section header
            ('(Disassembly of section )(.*?)(:)$',
                bygroups(Text, Name.Label, Punctuation)),
            # Function labels
            # (With offset)
            ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
                bygroups(Number.Hex, Text, Punctuation, Name.Function,
                         Punctuation, Number.Hex, Punctuation)),
            # (Without offset)
            ('('+hex_re+'+)( )(<)(.*?)(>:)$',
                bygroups(Number.Hex, Text, Punctuation, Name.Function,
                         Punctuation)),
            # Code line with disassembled instructions
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
                bygroups(Text, Name.Label, Text, Number.Hex, Text,
                         using(asm_lexer))),
            # Code line with ascii
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
                bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
            # Continued code line, only raw opcodes without disassembled
            # instruction
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
                bygroups(Text, Name.Label, Text, Number.Hex)),
            # Skipped a few bytes
            (r'\t\.\.\.$', Text),
            # Relocation line
            # (With offset)
            (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
                bygroups(Text, Name.Label, Text, Name.Property, Text,
                         Name.Constant, Punctuation, Number.Hex)),
            # (Without offset)
            (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
                bygroups(Text, Name.Label, Text, Name.Property, Text,
                         Name.Constant)),
            (r'[^\n]+\n', Other)
        ]
    }
Example #3
0
 def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct,
                            _string=_string, _variable=_variable, _ws=_ws):
     op = r'=+\-*/!~'
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'0[0-7]+', Number.Oct),
         (r'0x[\da-f]+', Number.Hex),
         (r'\d+', Number.Integer),
         (r'[(),]+', Punctuation),
         (r'([%s]|%%|\^\^)+' % op, Operator),
         (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
          (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
           r'[^)]' if compound else r'[\w\W]'),
          using(this, state='variable')),
         (r'(?=[\x00|&])', Text, '#pop'),
         include('follow')
     ]
     return state
Example #4
0
from pygments.lexer import (
    Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
)
from pygments.token import (
    Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
)
from pygments.util import get_bool_opt

# Local
from IPython.testing.skipdoctest import skip_doctest

line_re = re.compile('.*?\n')

ipython_tokens = [
  (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
                                       using(BashLexer), Text)),
  (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
  (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
]

def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.

    The lexer inherits from an appropriate Python lexer and then adds
    information about IPython specific keywords (i.e. magic commands,
    shell commands, etc.)

    Parameters
    ----------
    python3 : bool
        If `True`, then build an IPython lexer from a Python 3 lexer.
Example #5
0
 def _make_begin_state(compound, _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl, _punct=_punct, _string=_string,
                       _space=_space, _start_label=_start_label,
                       _stoken=_stoken, _token_terminator=_token_terminator,
                       _variable=_variable, _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix),
         (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'),
         (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'), Keyword),
          'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation),
          'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Example #6
0
class NedLexer(RegexLexer):
    name = 'ned'
    filenames = ['*.ned']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(true|false)\b', Name.Builtin),
            (r'(<-->|-->|<--|\.\.)', Keyword),
            (r'(bool|double|int|xml)\b', Keyword.Type),
            (r'(inout|input|output)\b', Keyword.Type),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(("channel", "channelinterface", "simple", "module",
                    "network", "moduleinterface"),
                   suffix=r'\b'), Keyword),
            (words(
                ("parameters", "gates", "types", "submodules", "connections"),
                suffix=r'\b'), Keyword),
            (words(("volatile", "allowunconnected", "extends", "for", "if",
                    "import", "like", "package", "property"),
                   suffix=r'\b'), Keyword),
            (words(("sizeof", "const", "default", "ask", "this", "index",
                    "typename", "xmldoc"),
                   suffix=r'\b'), Keyword),
            (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta",
                    "binomial", "cauchy", "ceil", "chi_square", "cos",
                    "erlang_k", "exp", "exponential", "fabs", "floor", "fmod",
                    "gamma_d", "genk_exponential", "genk_intuniform",
                    "genk_normal", "genk_truncnormal", "genk_uniform",
                    "geometric", "hypergeometric", "hypot", "intuniform",
                    "log", "log10", "lognormal", "max", "min", "negbinomial",
                    "normal", "pareto_shifted", "poisson", "pow", "simTime",
                    "sin", "sqrt", "student_t", "tan", "triang", "truncnormal",
                    "uniform", "weibull", "xml", "xmldoc"),
                   suffix=r'\b'), Name.Builtin),
            ('@[a-zA-Z_]\w*', Name.Builtin),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ]
    }
Example #7
0
    from pygments.lexer import using, bygroups
    from pygments.token import Punctuation, Other, Generic
    from pygments.styles import get_style_by_name
    SQL_STYLE = get_style_by_name('colorful')
    JSON_STYLE = get_style_by_name('tango')

    class InsideStringJavascriptLexer(JavascriptLexer):
        def get_tokens_unprocessed(self, text, stack=('root',)):
            text = text[1:-1]
            text = text.replace('\\n', '\n')
            return JavascriptLexer.get_tokens_unprocessed(self, text, stack)

    mongo_tokens = {}
    mongo_tokens.update(JavascriptLexer.tokens)
    mongo_tokens['root'].insert(0, (r'"(function(\\\\|\\"|[^"])*)"',
        using(InsideStringJavascriptLexer)))

    class MongoLexer(JavascriptLexer):
        tokens = mongo_tokens

    HAVE_PYGMENTS = True
except ImportError:  # pragma: no cover
    HAVE_PYGMENTS = False


def get_root_controller():
    """Return the root controller of the application."""
    module = config['application_root_module']
    if module not in sys.modules:
        __import__(module)
    return sys.modules[module].RootController
Example #8
0
class CSharpLexer(RegexLexer):
    """
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

      .. versionadded:: 0.8
    """

    name = 'C#'
    aliases = ['csharp', 'c#']
    filenames = ['*.cs']
    mimetypes = ['text/x-csharp']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        '@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in iteritems(levels):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|async|await|base|break|by|case|catch|'
                 r'checked|const|continue|default|delegate|'
                 r'do|else|enum|event|explicit|extern|false|finally|'
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
                 r'internal|is|let|lock|new|null|on|operator|'
                 r'out|override|params|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|stackalloc|static|'
                 r'switch|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|add|remove|value|alias|ascending|'
                 r'descending|from|group|into|orderby|select|thenby|where|'
                 r'join|equals)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|'
                 r'sbyte|short|string|uint|ulong|ushort|var)\b\??',
                 Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [
                (cs_ident, Name.Class, '#pop'),
                default('#pop'),
            ],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'),
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Example #9
0
class ActionScript3Lexer(RegexLexer):
    """
    For ActionScript 3 source code.

    .. versionadded:: 0.11
    """

    name = 'ActionScript 3'
    aliases = ['as3', 'actionscript3']
    filenames = ['*.as']
    mimetypes = [
        'application/x-actionscript3', 'text/x-actionscript3',
        'text/actionscript3'
    ]

    identifier = r'[$a-zA-Z_]\w*'
    typeidentifier = identifier + r'(?:\.<\w+>)?'

    flags = re.DOTALL | re.MULTILINE
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(function\s+)(' + identifier + r')(\s*)(\()',
             bygroups(Keyword.Declaration, Name.Function, Text,
                      Operator), 'funcparams'),
            (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' +
             typeidentifier + r')',
             bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text,
                      Keyword.Type)),
            (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)',
             bygroups(Keyword, Text, Name.Namespace, Text)),
            (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()',
             bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'/(\\\\|\\[^\\]|[^\\\n])*/[gisx]*', String.Regex),
            (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)),
            (r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
             r'throw|try|catch|with|new|typeof|arguments|instanceof|this|'
             r'switch|import|include|as|is)\b', Keyword),
            (r'(class|public|final|internal|native|override|private|protected|'
             r'static|import|extends|implements|interface|intrinsic|return|super|'
             r'dynamic|function|const|get|namespace|package|set)\b',
             Keyword.Declaration),
            (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b',
             Keyword.Constant),
            (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|'
             r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|'
             r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|'
             r'unescape)\b', Name.Function),
            (identifier, Name),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-f]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
            (r'[~^*!%&<>|+=:;,/?\\{}\[\]().-]+', Operator),
        ],
        'funcparams': [(r'\s+', Text),
                       (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' +
                        typeidentifier + r'|\*)(\s*)',
                        bygroups(Text, Punctuation, Name, Text, Operator, Text,
                                 Keyword.Type, Text), 'defval'),
                       (r'\)', Operator, 'type')],
        'type': [(r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)',
                  bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'),
                 (r'\s+', Text, '#pop:2'),
                 default('#pop:2')],
        'defval': [(r'(=)(\s*)([^(),]+)(\s*)(,?)',
                    bygroups(Operator, Text, using(this), Text,
                             Operator), '#pop'), (r',', Operator, '#pop'),
                   default('#pop')]
    }

    def analyse_text(text):
        if re.match(r'\w+\s*:\s*\w', text):
            return 0.3
        return 0
Example #10
0
class PhpLexer(RegexLexer):
    """
    For `PHP <http://www.php.net/>`_ source code.
    For PHP embedded in HTML, use the `HtmlPhpLexer`.

    Additional options accepted:

    `startinline`
        If given and ``True`` the lexer starts highlighting with
        php code (i.e.: no starting ``<?php`` required).  The default
        is ``False``.
    `funcnamehighlighting`
        If given and ``True``, highlight builtin function names
        (default: ``True``).
    `disabledmodules`
        If given, must be a list of module names whose function names
        should not be highlighted. By default all modules are highlighted
        except the special ``'unknown'`` module that includes functions
        that are known to php but are undocumented.

        To get a list of allowed modules have a look into the
        `_phpbuiltins` module:

        .. sourcecode:: pycon

            >>> from pygments.lexers._phpbuiltins import MODULES
            >>> MODULES.keys()
            ['PHP Options/Info', 'Zip', 'dba', ...]

        In fact the names of those modules match the module names from
        the php documentation.
    """

    name = 'PHP'
    aliases = ['php', 'php3', 'php4', 'php5']
    filenames = ['*.php', '*.php[345]']
    mimetypes = ['text/x-php']

    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
    tokens = {
        'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other),
                 (r'<', Other)],
        'php': [
            (r'\?>', Comment.Preproc, '#pop'),
            (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String),
            (r'\s+', Text),
            (r'#.*?\n', Comment),
            (r'//.*?\n', Comment),
            (r'/\*\*.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment),
            (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Operator, Text, Name.Attribute)),
            (r'[~!%^&*+=|:.<>/?@-]+', Operator),
            (r'[\[\]{}();,]+', Punctuation),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(function)(\s+)(&?)(\s*)',
             bygroups(Keyword, Text, Operator, Text), 'functionname'),
            (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Keyword, Text, Name.Constant)),
            (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
             r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
             r'FALSE|print|for|require|continue|foreach|require_once|'
             r'declare|return|default|static|do|switch|die|stdClass|'
             r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
             r'virtual|endfor|include_once|while|endforeach|global|__FILE__|'
             r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|'
             r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|'
             r'implements|public|private|protected|abstract|clone|try|'
             r'catch|throw|this)\b', Keyword),
            ('(true|false|null)\b', Keyword.Constant),
            (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable),
            (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
            (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
             r"0[xX][0-9a-fA-F]+[Ll]?", Number),
            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
            (r'"', String.Double, 'string'),
        ],
        'classname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')],
        'functionname': [(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')],
        'string':
        [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double),
         (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape),
         (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?',
          String.Interpol),
         (r'(\{\$\{)(.*?)(\}\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\{)(\$.*?)(\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\$\{)(\S+)(\})',
          bygroups(String.Interpol, Name.Variable, String.Interpol)),
         (r'[${\\]+', String.Double)],
    }

    def __init__(self, **options):
        self.funcnamehighlighting = get_bool_opt(options,
                                                 'funcnamehighlighting', True)
        self.disabledmodules = get_list_opt(options, 'disabledmodules',
                                            ['unknown'])
        self.startinline = get_bool_opt(options, 'startinline', False)

        # private option argument for the lexer itself
        if '_startinline' in options:
            self.startinline = options.pop('_startinline')

        # collect activated functions in a set
        self._functions = set()
        if self.funcnamehighlighting:
            from pygments.lexers._phpbuiltins import MODULES
            for key, value in MODULES.iteritems():
                if key not in self.disabledmodules:
                    self._functions.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.startinline:
            stack.append('php')
        for index, token, value in \
            RegexLexer.get_tokens_unprocessed(self, text, stack):
            if token is Name.Other:
                if value in self._functions:
                    yield index, Name.Builtin, value
                    continue
            yield index, token, value

    def analyse_text(text):
        rv = 0.0
        if re.search(r'<\?(?!xml)', text):
            rv += 0.3
        if '?>' in text:
            rv += 0.1
        return rv
Example #11
0
class AdaLexer(RegexLexer):
    """
    For Ada source code.

    .. versionadded:: 1.3
    """

    name = 'Ada'
    aliases = ['ada', 'ada95', 'ada2005']
    filenames = ['*.adb', '*.ads', '*.ada']
    mimetypes = ['text/x-ada']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'--.*?\n', Comment.Single),
            (r'[^\S\n]+', Text),
            (r'function|procedure|entry', Keyword.Declaration, 'subprogram'),
            (r'(subtype|type)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
            (r'task|protected', Keyword.Declaration),
            (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)),
            (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'),
            (r'(pragma)(\s+)(\w+)',
             bygroups(Keyword.Reserved, Text, Comment.Preproc)),
            (r'(true|false|null)\b', Keyword.Constant),
            (words(
                ('Address', 'Byte', 'Boolean', 'Character', 'Controlled',
                 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type',
                 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer',
                 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive',
                 'Reference_Type', 'Short_Float', 'Short_Integer',
                 'Short_Short_Float', 'Short_Short_Integer', 'String',
                 'Wide_Character', 'Wide_String'),
                suffix=r'\b'), Keyword.Type),
            (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word),
            (r'generic|private', Keyword.Declaration),
            (r'package', Keyword.Declaration, 'package'),
            (r'array\b', Keyword.Reserved, 'array_def'),
            (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
            (r'(\w+)(\s*)(:)(\s*)(constant)',
             bygroups(Name.Constant, Text, Punctuation, Text,
                      Keyword.Reserved)),
            (r'<<\w+>>', Name.Label),
            (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)',
             bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)),
            (words(
                ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased',
                 'all', 'array', 'at', 'begin', 'body', 'case', 'constant',
                 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif',
                 'end', 'entry', 'exception', 'exit', 'interface', 'for',
                 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of',
                 'or', 'others', 'out', 'overriding', 'pragma', 'protected',
                 'raise', 'range', 'record', 'renames', 'requeue', 'return',
                 'reverse', 'select', 'separate', 'some', 'subtype',
                 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type',
                 'until', 'when', 'while', 'xor'),
                prefix=r'\b',
                suffix=r'\b'), Keyword.Reserved),
            (r'"[^"]*"', String),
            include('attribute'),
            include('numbers'),
            (r"'[^']'", String.Character),
            (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
            (r"(<>|=>|:=|[()|:;,.'])", Punctuation),
            (r'[*<>+=/&-]', Operator),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'[0-9_]+#[0-9a-f_\.]+#', Number.Hex),
            (r'[0-9_]+\.[0-9_]*', Number.Float),
            (r'[0-9_]+', Number.Integer),
        ],
        'attribute': [
            (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)),
        ],
        'subprogram': [
            (r'\(', Punctuation, ('#pop', 'formal_part')),
            (r';', Punctuation, '#pop'),
            (r'is\b', Keyword.Reserved, '#pop'),
            (r'"[^"]+"|\w+', Name.Function),
            include('root'),
        ],
        'end': [
            ('(if|case|record|loop|select)', Keyword.Reserved),
            (r'"[^"]+"|[\w.]+', Name.Function),
            (r'\s+', Text),
            (';', Punctuation, '#pop'),
        ],
        'type_def': [
            (r';', Punctuation, '#pop'),
            (r'\(', Punctuation, 'formal_part'),
            (r'with|and|use', Keyword.Reserved),
            (r'array\b', Keyword.Reserved, ('#pop', 'array_def')),
            (r'record\b', Keyword.Reserved, ('record_def')),
            (r'(null record)(;)', bygroups(Keyword.Reserved,
                                           Punctuation), '#pop'),
            include('root'),
        ],
        'array_def': [
            (r';', Punctuation, '#pop'),
            (r'(\w+)(\s+)(range)',
             bygroups(Keyword.Type, Text, Keyword.Reserved)),
            include('root'),
        ],
        'record_def': [
            (r'end record', Keyword.Reserved, '#pop'),
            include('root'),
        ],
        'import': [
            (r'[\w.]+', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'formal_part': [
            (r'\)', Punctuation, '#pop'),
            (r'\w+', Name.Variable),
            (r',|:[^=]', Punctuation),
            (r'(in|not|null|out|access)\b', Keyword.Reserved),
            include('root'),
        ],
        'package': [
            ('body', Keyword.Declaration),
            (r'is\s+new|renames', Keyword.Reserved),
            ('is', Keyword.Reserved, '#pop'),
            (';', Punctuation, '#pop'),
            (r'\(', Punctuation, 'package_instantiation'),
            (r'([\w.]+)', Name.Class),
            include('root'),
        ],
        'package_instantiation': [
            (r'("[^"]+"|\w+)(\s+)(=>)',
             bygroups(Name.Variable, Text, Punctuation)),
            (r'[\w.\'"]', Text),
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
    }
Example #12
0
class FantomLexer(RegexLexer):
    """
    For Fantom source code.

    .. versionadded:: 1.5
    """
    name = 'Fantom'
    aliases = ['fan']
    filenames = ['*.fan']
    mimetypes = ['application/x-fantom']

    # often used regexes
    def s(str):
        return Template(str).substitute(
            dict(
                pod=r'[\"\w\.]+',
                eos=r'\n|;',
                id=r'[a-zA-Z_]\w*',
                # all chars which can be part of type definition. Starts with
                # either letter, or [ (maps), or | (funcs)
                type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?',
            )
        )

    tokens = {
        'comments': [
            (r'(?s)/\*.*?\*/', Comment.Multiline),  # Multiline
            (r'//.*?\n', Comment.Single),  # Single line
            # TODO: highlight references in fandocs
            (r'\*\*.*?\n', Comment.Special),  # Fandoc
            (r'#.*\n', Comment.Single)  # Shell-style
        ],
        'literals': [
            (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number),  # Duration
            (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)', Number),  # Duration with dot
            (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float),  # Float/Decimal
            (r'\b-?0x[0-9a-fA-F_]+', Number.Hex),  # Hex
            (r'\b-?[\d_]+', Number.Integer),  # Int
            (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),  # Char
            (r'"', Punctuation, 'insideStr'),  # Opening quote
            (r'`', Punctuation, 'insideUri'),  # Opening accent
            (r'\b(true|false|null)\b', Keyword.Constant),  # Bool & null
            (r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)',  # DSL
             bygroups(Name.Namespace, Punctuation, Name.Class,
                      Punctuation, String, Punctuation)),
            (r'(?:(\w+)(::))?(\w+)?(#)(\w+)?',  # Type/slot literal
             bygroups(Name.Namespace, Punctuation, Name.Class,
                      Punctuation, Name.Function)),
            (r'\[,\]', Literal),  # Empty list
            (s(r'($type)(\[,\])'),  # Typed empty list
             bygroups(using(this, state='inType'), Literal)),
            (r'\[:\]', Literal),  # Empty Map
            (s(r'($type)(\[:\])'),
             bygroups(using(this, state='inType'), Literal)),
        ],
        'insideStr': [
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'"', Punctuation, '#pop'),  # Closing quot
            (r'.', String)  # String content
        ],
        'insideUri': [  # TODO: remove copy/paste str/uri
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'`', Punctuation, '#pop'),  # Closing tick
            (r'.', String.Backtick)  # URI content
        ],
        'protectionKeywords': [
            (r'\b(public|protected|private|internal)\b', Keyword),
        ],
        'typeKeywords': [
            (r'\b(abstract|final|const|native|facet|enum)\b', Keyword),
        ],
        'methodKeywords': [
            (r'\b(abstract|native|once|override|static|virtual|final)\b',
             Keyword),
        ],
        'fieldKeywords': [
            (r'\b(abstract|const|final|native|override|static|virtual|'
             r'readonly)\b', Keyword)
        ],
        'otherKeywords': [
            (words((
                'try', 'catch', 'throw', 'finally', 'for', 'if', 'else', 'while',
                'as', 'is', 'isnot', 'switch', 'case', 'default', 'continue',
                'break', 'do', 'return', 'get', 'set'), prefix=r'\b', suffix=r'\b'),
             Keyword),
            (r'\b(it|this|super)\b', Name.Builtin.Pseudo),
        ],
        'operators': [
            (r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator)
        ],
        'inType': [
            (r'[\[\]|\->:?]', Punctuation),
            (s(r'$id'), Name.Class),
            default('#pop'),

        ],
        'root': [
            include('comments'),
            include('protectionKeywords'),
            include('typeKeywords'),
            include('methodKeywords'),
            include('fieldKeywords'),
            include('literals'),
            include('otherKeywords'),
            include('operators'),
            (r'using\b', Keyword.Namespace, 'using'),  # Using stmt
            (r'@\w+', Name.Decorator, 'facet'),  # Symbol
            (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class),
             'inheritance'),  # Inheritance list

            # Type var := val
            (s(r'($type)([ \t]+)($id)(\s*)(:=)'),
             bygroups(using(this, state='inType'), Text,
                      Name.Variable, Text, Operator)),

            # var := val
            (s(r'($id)(\s*)(:=)'),
             bygroups(Name.Variable, Text, Operator)),

            # .someId( or ->someId( ###
            (s(r'(\.|(?:\->))($id)(\s*)(\()'),
             bygroups(Operator, Name.Function, Text, Punctuation),
             'insideParen'),

            # .someId  or ->someId
            (s(r'(\.|(?:\->))($id)'),
             bygroups(Operator, Name.Function)),

            # new makeXXX (
            (r'(new)(\s+)(make\w*)(\s*)(\()',
             bygroups(Keyword, Text, Name.Function, Text, Punctuation),
             'insideMethodDeclArgs'),

            # Type name (
            (s(r'($type)([ \t]+)'  # Return type and whitespace
               r'($id)(\s*)(\()'),  # method name + open brace
             bygroups(using(this, state='inType'), Text,
                      Name.Function, Text, Punctuation),
             'insideMethodDeclArgs'),

            # ArgType argName,
            (s(r'($type)(\s+)($id)(\s*)(,)'),
             bygroups(using(this, state='inType'), Text, Name.Variable,
                      Text, Punctuation)),

            # ArgType argName)
            # Covered in 'insideParen' state

            # ArgType argName -> ArgType|
            (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'),
             bygroups(using(this, state='inType'), Text, Name.Variable,
                      Text, Punctuation, Text, using(this, state='inType'),
                      Punctuation)),

            # ArgType argName|
            (s(r'($type)(\s+)($id)(\s*)(\|)'),
             bygroups(using(this, state='inType'), Text, Name.Variable,
                      Text, Punctuation)),

            # Type var
            (s(r'($type)([ \t]+)($id)'),
             bygroups(using(this, state='inType'), Text,
                      Name.Variable)),

            (r'\(', Punctuation, 'insideParen'),
            (r'\{', Punctuation, 'insideBrace'),
            (r'.', Text)
        ],
        'insideParen': [
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
        'insideMethodDeclArgs': [
            (r'\)', Punctuation, '#pop'),
            (s(r'($type)(\s+)($id)(\s*)(\))'),
             bygroups(using(this, state='inType'), Text, Name.Variable,
                      Text, Punctuation), '#pop'),
            include('root'),
        ],
        'insideBrace': [
            (r'\}', Punctuation, '#pop'),
            include('root'),
        ],
        'inheritance': [
            (r'\s+', Text),  # Whitespace
            (r':|,', Punctuation),
            (r'(?:(\w+)(::))?(\w+)',
             bygroups(Name.Namespace, Punctuation, Name.Class)),
            (r'\{', Punctuation, '#pop')
        ],
        'using': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(\[)(\w+)(\])',
             bygroups(Punctuation, Comment.Special, Punctuation)),  # ffi
            (r'(\")?([\w.]+)(\")?',
             bygroups(Punctuation, Name.Namespace, Punctuation)),  # podname
            (r'::', Punctuation, 'usingClass'),
            default('#pop')
        ],
        'usingClass': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(as)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Name.Class), '#pop:2'),
            (r'[\w$]+', Name.Class),
            default('#pop:2')  # jump out to root state
        ],
        'facet': [
            (r'\s+', Text),
            (r'\{', Punctuation, 'facetFields'),
            default('#pop')
        ],
        'facetFields': [
            include('comments'),
            include('literals'),
            include('operators'),
            (r'\s+', Text),
            (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)),
            (r'\}', Punctuation, '#pop'),
            (r'.', Text)
        ],
    }
Example #13
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
                 String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words(
                    ('@selector', '@private', '@protected', '@public',
                     '@encode', '@synchronized', '@try', '@throw', '@catch',
                     '@finally', '@end', '@property', '@synthesize',
                     '__bridge', '__bridge_transfer', '__autoreleasing',
                     '__block', '__weak', '__strong', 'weak', 'strong', 'copy',
                     'retain', 'assign', 'unsafe_unretained', 'atomic',
                     'nonatomic', 'readonly', 'readwrite', 'setter', 'getter',
                     'typeof', 'in', 'out', 'inout', 'release', 'class',
                     '@dynamic', '@optional', '@required', '@autoreleasepool'),
                    suffix=r'\b'), Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'),
                       suffix=r'\b'), Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b',
                 Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                ('([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text,
                          Punctuation), ('#pop', 'oc_ivars')),
                ('([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname':
            [('([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text),
              'oc_forward_classname'),
             ('([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class,
                                                    Text), '#pop')],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (
                    r'^([-+])(\s*)'  # method marker
                    r'(\(.*?\))?(\s*)'  # return type
                    r'([a-zA-Z$_][\w$]*:?)',  # begin of method name
                    bygroups(Punctuation, Text, using(this), Text,
                             Name.Function),
                    'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Example #14
0
class ModelicaLexer(RegexLexer):
    """
    For `Modelica <http://www.modelica.org/>`_ source code.

    .. versionadded:: 1.1
    """
    name = 'Modelica'
    aliases = ['modelica']
    filenames = ['*.mo']
    mimetypes = ['text/x-modelica']

    flags = re.DOTALL | re.MULTILINE

    _name = r"(?:'(?:[^\\']|\\.)+'|[a-zA-Z_]\w*)"

    tokens = {
        'whitespace': [
            (r'[\s\ufeff]+', Text),
            (r'//[^\n]*\n?', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline)
        ],
        'root': [
            include('whitespace'),
            (r'"', String.Double, 'string'),
            (r'[()\[\]{},;]+', Punctuation),
            (r'\.?[*^/+-]|\.|<>|[<>:=]=?', Operator),
            (r'\d+(\.?\d*[eE][-+]?\d+|\.\d*)', Number.Float),
            (r'\d+', Number.Integer),
            (r'(abs|acos|actualStream|array|asin|assert|AssertionLevel|atan|'
             r'atan2|backSample|Boolean|cardinality|cat|ceil|change|Clock|'
             r'Connections|cos|cosh|cross|delay|diagonal|div|edge|exp|'
             r'ExternalObject|fill|floor|getInstanceName|hold|homotopy|'
             r'identity|inStream|integer|Integer|interval|inverse|isPresent|'
             r'linspace|log|log10|matrix|max|min|mod|ndims|noClock|noEvent|'
             r'ones|outerProduct|pre|previous|product|Real|reinit|rem|rooted|'
             r'sample|scalar|semiLinear|shiftSample|sign|sin|sinh|size|skew|'
             r'smooth|spatialDistribution|sqrt|StateSelect|String|subSample|'
             r'sum|superSample|symmetric|tan|tanh|terminal|terminate|time|'
             r'transpose|vector|zeros)\b', Name.Builtin),
            (r'(algorithm|annotation|break|connect|constant|constrainedby|der|'
             r'discrete|each|else|elseif|elsewhen|encapsulated|enumeration|'
             r'equation|exit|expandable|extends|external|firstTick|final|flow|for|if|'
             r'import|impure|in|initial|inner|input|interval|loop|nondiscrete|outer|'
             r'output|parameter|partial|protected|public|pure|redeclare|'
             r'replaceable|return|stream|then|when|while)\b',
             Keyword.Reserved),
            (r'(and|not|or)\b', Operator.Word),
            (r'(block|class|connector|end|function|model|operator|package|'
             r'record|type)\b', Keyword.Reserved, 'class'),
            (r'(false|true)\b', Keyword.Constant),
            (r'within\b', Keyword.Reserved, 'package-prefix'),
            (_name, Name)
        ],
        'class': [
            include('whitespace'),
            (r'(function|record)\b', Keyword.Reserved),
            (r'(if|for|when|while)\b', Keyword.Reserved, '#pop'),
            (_name, Name.Class, '#pop'),
            default('#pop')
        ],
        'package-prefix': [
            include('whitespace'),
            (_name, Name.Namespace, '#pop'),
            default('#pop')
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\[\'"?\\abfnrtv]', String.Escape),
            (r'(?i)<\s*html\s*>([^\\"]|\\.)+?(<\s*/\s*html\s*>|(?="))',
             using(HtmlLexer)),
            (r'<|\\?[^"\\<]+', String.Double)
        ]
    }
Example #15
0
class LlvmMirLexer(RegexLexer):
    """
    Lexer for the overall LLVM MIR document format.

    MIR is a human readable serialization format that's used to represent LLVM's
    machine specific intermediate representation. It allows LLVM's developers to
    see the state of the compilation process at various points, as well as test
    individual pieces of the compiler.

    For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.

    .. versionadded:: 2.6
    """
    name = 'LLVM-MIR'
    aliases = ['llvm-mir']
    filenames = ['*.mir']

    tokens = {
        'root': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents starting with | are LLVM-IR
            (r'--- \|$', Keyword, 'llvm_ir'),
            # Other documents are MIR
            (r'---', Keyword, 'llvm_mir'),
            # Consume everything else in one token for efficiency
            (r'[^-#]+|.', Text),
        ],
        'llvm_ir': [
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Delegate to the LlvmLexer
            (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
        ],
        'llvm_mir': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Handle the simple attributes
            (r'name:', Keyword, 'name'),
            (words(('alignment', ), suffix=':'), Keyword, 'number'),
            (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
                    'selected', 'exposesReturnsTwice'),
                   suffix=':'), Keyword, 'boolean'),
            # Handle the attributes don't highlight inside
            (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
                    'machineFunctionInfo'),
                   suffix=':'), Keyword),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'body: *\|', Keyword, 'llvm_mir_body'),
            # Consume everything else
            (r'.+', Text),
            (r'\n', Text),
        ],
        'name': [(r'[^\n]+', Name), default('#pop')],
        'boolean': [(r' *(true|false)', Name.Builtin),
                    default('#pop')],
        'number': [(r' *[0-9]+', Number),
                   default('#pop')],
        'llvm_mir_body': [
            # Documents end with '...' or '---'.
            # We have to pop llvm_mir_body and llvm_mir
            (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
            # The '...' is optional. If we didn't already find it then it isn't
            # there. There might be a '---' instead though.
            (r'(?!\.\.\.|---)((.|\n)+)',
             bygroups(using(LlvmMirBodyLexer), Keyword)),
        ],
    }
Example #16
0
class ScdocLexer(RegexLexer):
    """
    `scdoc` is a simple man page generator for POSIX systems written in C99.
    https://git.sr.ht/~sircmpwn/scdoc

    .. versionadded:: 2.5
    """
    name = 'scdoc'
    aliases = ['scdoc', 'scd']
    filenames = ['*.scd', '*.scdoc']
    flags = re.MULTILINE

    tokens = {
        'root': [
            # comment
            (r'^(;.+\n)', bygroups(Comment)),

            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
            bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)(\.+\.)( .+\n)',
            bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),

            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # underlines
            (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)),
            # bold
            (r'(\s)(\*[^*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def analyse_text(text):
        """This is very similar to markdown, save for the escape characters
        needed for * and _."""
        result = 0

        if '\\*' in text:
            result += 0.01

        if '\\_' in text:
            result += 0.01

        return result
Example #17
0
class SlimLexer(ExtendedRegexLexer):
    """
    For Slim markup.

    .. versionadded:: 2.0
    """

    name = 'Slim'
    aliases = ['slim']
    filenames = ['*.slim']
    mimetypes = ['text/x-slim']

    flags = re.IGNORECASE
    _dot = r'(?: \|\n(?=.* \|)|.)'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'([ \t]*==?)(.*\n)', bygroups(Punctuation,
                                            using(RubyLexer)), 'root'),
            (r'[ \t]+[\w:-]+(?==)', Name.Attribute, 'html-attributes'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'[\w:-]+:[ \t]*\n', Text, 'plain'),
            (r'(-)(.*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r'\|' + _dot + r'*\n', _starts_block(Text, 'plain'), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'slim-comment-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            (r'[ \t]+\n', Punctuation, '#pop:2'),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(.*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'=', Punctuation),
            (r'"[^"]+"', using(RubyLexer), 'tag'),
            (r'\'[^\']+\'', using(RubyLexer), 'tag'),
            (r'\w+', Text, 'tag'),
        ],
        'slim-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
    }
Example #18
0
    from pygments.token import Punctuation, Other, Generic
    from pygments.styles import get_style_by_name
    SQL_STYLE = get_style_by_name('colorful')
    JSON_STYLE = get_style_by_name('tango')

    class InsideStringJavascriptLexer(JavascriptLexer):
        def get_tokens_unprocessed(self, text, stack=('root', )):
            text = text[1:-1]
            text = text.replace('\\n', '\n')
            return JavascriptLexer.get_tokens_unprocessed(self, text, stack)

    mongo_tokens = {}
    mongo_tokens.update(JavascriptLexer.tokens)
    mongo_tokens['root'].insert(
        0,
        (r'"(function(\\\\|\\"|[^"])*)"', using(InsideStringJavascriptLexer)))

    class MongoLexer(JavascriptLexer):
        tokens = mongo_tokens

    HAVE_PYGMENTS = True
except ImportError:  # pragma: no cover
    HAVE_PYGMENTS = False


def get_root_controller():
    """Return the root controller of the application."""
    module = config['application_root_module']
    if module not in sys.modules:
        __import__(module)
    return sys.modules[module].RootController
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    # Hexadecimal part in an hexadecimal integer/floating-point literal.
    # This includes decimal separators matching.
    _hexpart = r'[0-9a-fA-F](\'?[0-9a-fA-F])*'
    # Decimal part in an decimal integer/floating-point literal.
    # This includes decimal separators matching.
    _decpart = r'\d(\'?\d)*'
    # Integer literal suffix (e.g. 'ull' or 'll').
    _intsuffix = r'(([uU][lL]{0,2})|[lL]{1,2}[uU]?)?'

    # Identifier regex with C and C++ Universal Character Name (UCN) support.
    _ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*'
    _namespaced_ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)*'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Whitespace),
            (r'[^\S\n]+', Whitespace),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            include('keywords'),
            include('types'),
            (r'([LuU]|u8)?(")', bygroups(String.Affix, String), 'string'),
            (r"([LuU]|u8)?(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),

            # Hexadecimal floating-point literals (C11, C++17)
            (r'0[xX](' + _hexpart + r'\.' + _hexpart + r'|\.' + _hexpart +
             r'|' + _hexpart + r')[pP][+-]?' + _hexpart + r'[lL]?',
             Number.Float),
            (r'(-)?(' + _decpart + r'\.' + _decpart + r'|\.' + _decpart +
             r'|' + _decpart + r')[eE][+-]?' + _decpart + r'[fFlL]?',
             Number.Float),
            (r'(-)?((' + _decpart + r'\.(' + _decpart + r')?|\.' + _decpart +
             r')[fFlL]?)|(' + _decpart + r'[fFlL])', Number.Float),
            (r'(-)?0[xX]' + _hexpart + _intsuffix, Number.Hex),
            (r'(-)?0[bB][01](\'?[01])*' + _intsuffix, Number.Bin),
            (r'(-)?0(\'?[0-7])+' + _intsuffix, Number.Oct),
            (r'(-)?' + _decpart + _intsuffix, Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'(' + _ident + r')(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (_ident, Name)
        ],
        'types': [(words(('int8', 'int16', 'int32', 'int64', 'wchar_t'),
                         prefix=r'__',
                         suffix=r'\b'), Keyword.Reserved),
                  (words(('bool', 'int', 'long', 'float', 'short', 'double',
                          'char', 'unsigned', 'signed', 'void'),
                         suffix=r'\b'), Keyword.Type)],
        'keywords': [
            (r'(struct|union)(\s+)', bygroups(Keyword, Text), 'classname'),
            (words(('asm', 'auto', 'break', 'case', 'const', 'continue',
                    'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                    'if', 'register', 'restricted', 'return', 'sizeof',
                    'struct', 'static', 'switch', 'typedef', 'volatile',
                    'while', 'union', 'thread_local', 'alignas', 'alignof',
                    'static_assert', '_Pragma'),
                   suffix=r'\b'), Keyword),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'based', 'except', 'stdcall', 'cdecl', 'fastcall',
                    'declspec', 'finally', 'try', 'leave', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved)
        ],
        'root': [
            include('whitespace'),
            include('keywords'),
            # functions
            (
                r'(' + _namespaced_ident + r'(?:[&*\s])+)'  # return arguments
                r'(' + _namespaced_ident + r')'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'(' + _namespaced_ident + r'(?:[&*\s])+)'  # return arguments
                r'(' + _namespaced_ident + r')'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            (r'\}', Punctuation),
            (r'[{;]', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(' + _ws1 + r')(include)(' + _ws1 + r')("[^"]+")([^\n]*)',
             bygroups(using(this), Comment.Preproc, using(this),
                      Comment.PreprocFile, Comment.Single)),
            (r'(' + _ws1 + r')(include)(' + _ws1 + r')(<[^>]+>)([^\n]*)',
             bygroups(using(this), Comment.Preproc, using(this),
                      Comment.PreprocFile, Comment.Single)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ],
        'classname': [
            (_ident, Name.Class, '#pop'),
            # template specification
            (r'\s*(?=>)', Text, '#pop'),
            default('#pop')
        ]
    }

    stdlib_types = {
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
        'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
        'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'
    }
    c99_types = {
        'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t',
        'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t',
        'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t',
        'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t',
        'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t',
        'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t',
        'uintmax_t'
    }
    linux_types = {
        'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
        'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
        'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
        'timer_t', 'uid_t'
    }
    c11_atomic_types = {
        'atomic_bool', 'atomic_char', 'atomic_schar', 'atomic_uchar',
        'atomic_short', 'atomic_ushort', 'atomic_int', 'atomic_uint',
        'atomic_long', 'atomic_ulong', 'atomic_llong', 'atomic_ullong',
        'atomic_char16_t', 'atomic_char32_t', 'atomic_wchar_t',
        'atomic_int_least8_t', 'atomic_uint_least8_t', 'atomic_int_least16_t',
        'atomic_uint_least16_t', 'atomic_int_least32_t',
        'atomic_uint_least32_t', 'atomic_int_least64_t',
        'atomic_uint_least64_t', 'atomic_int_fast8_t', 'atomic_uint_fast8_t',
        'atomic_int_fast16_t', 'atomic_uint_fast16_t', 'atomic_int_fast32_t',
        'atomic_uint_fast32_t', 'atomic_int_fast64_t', 'atomic_uint_fast64_t',
        'atomic_intptr_t', 'atomic_uintptr_t', 'atomic_size_t',
        'atomic_ptrdiff_t', 'atomic_intmax_t', 'atomic_uintmax_t'
    }

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.c11highlighting = get_bool_opt(options, 'c11highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.c11highlighting and value in self.c11_atomic_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
class HttpPromptLexer(RegexLexer):

    name = 'HttpPrompt'
    aliases = ['http-prompt']
    filenames = ['*.http-prompt']

    tokens = {
        'root':
        [(r'\s+', Text), (r'(cd)(\s*)', bygroups(Keyword, Text), 'cd'),
         (r'(rm)(\s*)', bygroups(Keyword, Text), 'rm_option'),
         (r'(httpie|curl)(\s*)', bygroups(Keyword, Text), 'action'),
         (words(HTTP_METHODS, prefix='(?i)', suffix='(?!\S)(\s*)'),
          bygroups(Keyword, Text), combined('redir_out', 'urlpath')),
         (r'(clear)(\s*)', bygroups(Keyword, Text), 'end'),
         (r'(exit)(\s*)', bygroups(Keyword, Text), 'end'),
         (r'(help)(\s)*', bygroups(Keyword, Text), 'end'),
         (r'(env)(\s*)', bygroups(Keyword,
                                  Text), combined('redir_out', 'pipe')),
         (r'(source)(\s*)', bygroups(Keyword, Text), 'file_path'),
         (r'(exec)(\s*)', bygroups(Keyword, Text), 'file_path'),
         (r'(ls)(\s*)', bygroups(Keyword,
                                 Text), combined('redir_out', 'urlpath')),
         (r'', Text, 'concat_mut')],
        'cd':
        string_rules('end'),
        'rm_option': [(r'(\-(?:h|o|b|q))(\s*)', bygroups(Name,
                                                         Text), 'rm_name'),
                      (r'(\*)(\s*)', bygroups(Name, Text), 'end')],
        'rm_name':
        string_rules('end'),
        'shell_command': [
            (r'(`)([^`]*)(`)', bygroups(Text, using(BashLexer), Text)),
        ],
        'pipe': [
            (r'(\s*)(\|)(.*)', bygroups(Text, Operator, using(BashLexer))),
        ],
        'concat_mut': [
            (r'$', Text, 'end'),
            (r'\s+', Text),

            # Flag options, such as (--form) and (--json)
            (words(FLAG_OPTIONS, suffix=r'\b'), Name, 'concat_mut'),

            # Options with values, such as (--style=default) and (--pretty all)
            (words(VALUE_OPTIONS,
                   suffix=r'\b'), Name, combined('shell_command',
                                                 'option_op')),
            include('shell_command'),

            # Unquoted or value-quoted request mutation,
            # such as (name="John Doe") and (name=John\ Doe)
            (r'((?:[^\s\'"\\=:]|(?:\\.))*)(:=|:|==|=)',
             bygroups(Name, Operator), combined('shell_command',
                                                'unquoted_mut')),

            # Full single-quoted request mutation, such as ('name=John Doe')
            (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)(:=|:|==|=)",
             bygroups(Text, Name,
                      Operator), combined('shell_command', 'squoted_mut')),

            # Full double-quoted request mutation, such as ("name=John Doe")
            (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)(:=|:|==|=)',
             bygroups(Text, Name,
                      Operator), combined('shell_command', 'dquoted_mut'))
        ],
        'option_op': [
            (r'(\s+|=)', Operator, 'option_value'),
        ],
        'option_value':
        string_rules('#pop:2'),
        'file_path':
        string_rules('end'),
        'redir_out': [(r'(?i)(>>?)(\s*)', bygroups(Operator,
                                                   Text), 'file_path')],
        'unquoted_mut':
        string_rules('#pop'),
        'squoted_mut':
        [(r"((?:[^\r\n'\\]|(?:\\.))+)(')", bygroups(String, Text), '#pop'),
         (r"([^\r\n'\\]|(\\.))+", String, '#pop')],
        'dquoted_mut':
        [(r'((?:[^\r\n"\\]|(?:\\.))+)(")', bygroups(String, Text), '#pop'),
         (r'([^\r\n"\\]|(\\.))+', String, '#pop')],
        'action': [(words(HTTP_METHODS, prefix='(?i)',
                          suffix='(\s*)'), bygroups(Keyword, Text),
                    combined('redir_out', 'pipe', 'urlpath')),
                   (r'', Text, combined('redir_out', 'pipe', 'urlpath'))],
        'urlpath':
        [(r'https?://([^\s"\'\\]|(\\.))+', String,
          combined('concat_mut', 'redir_out', 'pipe')),
         (r'(")(https?://(?:[^\r\n"\\]|(?:\\.))+)(")',
          bygroups(Text, String,
                   Text), combined('concat_mut', 'redir_out', 'pipe')),
         (r'(")(https?://(?:[^\r\n"\\]|(?:\\.))+)', bygroups(Text, String)),
         (r"(')(https?://(?:[^\r\n'\\]|(?:\\.))+)(')",
          bygroups(Text, String,
                   Text), combined('concat_mut', 'redir_out', 'pipe')),
         (r"(')(https?://(?:[^\r\n'\\]|(?:\\.))+)", bygroups(Text, String)),
         (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)(")', bygroups(Text, String, Text),
          combined('concat_mut', 'redir_out', 'pipe')),
         (r'(")((?:[^\r\n"\\=:]|(?:\\.))+)', bygroups(Text, String)),
         (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)(')", bygroups(Text, String, Text),
          combined('concat_mut', 'redir_out', 'pipe')),
         (r"(')((?:[^\r\n'\\=:]|(?:\\.))+)", bygroups(Text, String)),
         (r'([^\-](?:[^\s"\'\\=:]|(?:\\.))+)(\s+|$)', bygroups(String, Text),
          combined('concat_mut', 'redir_out', 'pipe')),
         (r'', Text, combined('concat_mut', 'redir_out', 'pipe'))],
        'end': [(r'\n', Text, 'root')]
    }
Example #21
0
class AsymptoteLexer(RegexLexer):
    """
    For `Asymptote <http://asymptote.sf.net/>`_ source code.

    .. versionadded:: 1.2
    """
    name = 'Asymptote'
    aliases = ['asy', 'asymptote']
    filenames = ['*.asy']
    mimetypes = ['text/x-asymptote']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
        ],
        'statements': [
            # simple string (TeX friendly)
            (r'"(\\\\|\\"|[^"])*"', String),
            # C style string (with character escapes)
            (r"'", String, 'string'),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
            (r'(and|controls|tension|atleast|curl|if|else|while|for|do|'
             r'return|break|continue|struct|typedef|new|access|import|'
             r'unravel|from|include|quote|static|public|private|restricted|'
             r'this|explicit|true|false|null|cycle|newframe|operator)\b', Keyword),
            # Since an asy-type-name can be also an asy-function-name,
            # in the following we test if the string "  [a-zA-Z]" follows
            # the Keyword.Type.
            # Of course it is not perfect !
            (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|'
             r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|'
             r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|'
             r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|'
             r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|'
             r'path3|pen|picture|point|position|projection|real|revolution|'
             r'scaleT|scientific|segment|side|slice|splitface|string|surface|'
             r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|'
             r'transformation|tree|triangle|trilinear|triple|vector|'
             r'vertex|void)(?=\s+[a-zA-Z])', Keyword.Type),
            # Now the asy-type-name which are not asy-function-name
            # except yours !
            # Perhaps useless
            (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|'
             r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|'
             r'picture|position|real|revolution|slice|splitface|ticksgridT|'
             r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type),
            ('[a-zA-Z_]\w*:(?!:)', Name.Label),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
             r'([a-zA-Z_]\w*)'            # method name
             r'(\s*\([^;]*?\))'           # signature
             r'(' + _ws + r')(\{)',
             bygroups(using(this), Name.Function, using(this), using(this),
                      Punctuation),
             'function'),
            # function declarations
            (r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
             r'([a-zA-Z_]\w*)'            # method name
             r'(\s*\([^;]*?\))'           # signature
             r'(' + _ws + r')(;)',
             bygroups(using(this), Name.Function, using(this), using(this),
                      Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r"'", String, '#pop'),
            (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
            (r'\n', String),
            (r"[^\\'\n]+", String),  # all other characters
            (r'\\\n', String),
            (r'\\n', String),        # line continuation
            (r'\\', String),         # stray backslash
        ],
    }

    def get_tokens_unprocessed(self, text):
        from pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name and value in ASYFUNCNAME:
                token = Name.Function
            elif token is Name and value in ASYVARNAME:
                token = Name.Variable
            yield index, token, value
Example #22
0
class BatchLexer(RegexLexer):
    """
    Lexer for the DOS/Windows Batch file format.

    .. versionadded:: 0.7
    """
    name = 'Batchfile'
    aliases = ['bat', 'batch', 'dosbatch', 'winbatch']
    filenames = ['*.bat', '*.cmd']
    mimetypes = ['application/x-dos-batch']

    flags = re.MULTILINE | re.IGNORECASE

    _nl = r'\n\x1a'
    _punct = r'&<>|'
    _ws = r'\t\v\f\r ,;=\xa0'
    _nlws = r'\s\x1a\xa0,;='
    _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws)
    _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' %
                           (_nl, _ws, _nl, _punct))
    _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl)
    _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws
    _label = r'(?:(?:[^%s%s+:^]|\^[%s]?[\w\W])*)' % (_nlws, _punct, _nl)
    _label_compound = r'(?:(?:[^%s%s+:^)]|\^[%s]?[^)])*)' % (_nlws, _punct,
                                                             _nl)
    _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator
    _opword = r'(?:equ|geq|gtr|leq|lss|neq)'
    _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl)
    _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|'
                 r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|'
                 r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|'
                 r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:'
                 r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' %
                 (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl))
    _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s])+)' % (_nl, _nlws, _punct)
    _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s)])+)' % (_nl, _nlws,
                                                                _punct)
    _token = r'(?:[%s]+|%s)' % (_punct, _core_token)
    _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound)
    _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
               (_punct, _string, _variable, _core_token))

    def _make_begin_state(compound,
                          _core_token=_core_token,
                          _core_token_compound=_core_token_compound,
                          _keyword_terminator=_keyword_terminator,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _space=_space,
                          _start_label=_start_label,
                          _stoken=_stoken,
                          _token_terminator=_token_terminator,
                          _variable=_variable,
                          _ws=_ws):
        rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                            ')' if compound else '')
        rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
        rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
        set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
        suffix = ''
        if compound:
            _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
            _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
            suffix = '/compound'
        return [
            ((r'\)', Punctuation, '#pop') if compound else
             (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
              Comment.Single)),
            (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
            (_space, using(this, state='text')),
            include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
            (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
            (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
             r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
             (_nl, _token_terminator, _space,
              _core_token_compound if compound else _core_token, _nl, _nl),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
             (_keyword_terminator, rest, _nl, _nl, rest),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                    'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                    'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                    'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                    'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                    'title', 'type', 'ver', 'verify', 'vol'),
                   suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
            (r'(call)(%s?)(:)' % _space,
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'call%s' % suffix),
            (r'call%s' % _keyword_terminator, Keyword),
            (r'(for%s(?!\^))(%s)(/f%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/f', 'for')),
            (r'(for%s(?!\^))(%s)(/l%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/l', 'for')),
            (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
            (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'label%s' % suffix),
            (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
             (_token_terminator, _space, _token_terminator, _space,
              _token_terminator, _space),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), Keyword,
                      using(this, state='text')), ('(?', 'if')),
            (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
             (_token_terminator, _space, _stoken, _keyword_terminator,
              rest_of_line_compound if compound else rest_of_line),
             Comment.Single, 'follow%s' % suffix),
            (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), 'arithmetic%s' % suffix),
            (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
             r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
             (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
              ')' if compound else '', _nl, _nl),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), using(this, state='variable'),
                      Punctuation), 'follow%s' % suffix),
            default('follow%s' % suffix)
        ]

    def _make_follow_state(compound,
                           _label=_label,
                           _label_compound=_label_compound,
                           _nl=_nl,
                           _space=_space,
                           _start_label=_start_label,
                           _token=_token,
                           _token_compound=_token_compound,
                           _ws=_ws):
        suffix = '/compound' if compound else ''
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [
            (r'%s([%s]*)(%s)(.*)' %
             (_start_label, _ws, _label_compound if compound else _label),
             bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
            include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'),
            (r'\|\|?|&&?', Punctuation, '#pop'),
            include('text')
        ]
        return state

    def _make_arithmetic_state(compound,
                               _nl=_nl,
                               _punct=_punct,
                               _string=_string,
                               _variable=_variable,
                               _ws=_ws,
                               _nlws=_nlws):
        op = r'=+\-*/!~'
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex),
                  (r'\d+', Number.Integer), (r'[(),]+', Punctuation),
                  (r'([%s]|%%|\^\^)+' % op, Operator),
                  (r'(%s|%s|(\^[%s]?)?[^()%s%%\^"%s%s]|\^[%s]?%s)+' %
                   (_string, _variable, _nl, op, _nlws, _punct, _nlws,
                    r'[^)]' if compound else r'[\w\W]'),
                   using(this, state='variable')),
                  (r'(?=[\x00|&])', Text, '#pop'),
                  include('follow')]
        return state

    def _make_call_state(compound,
                         _label=_label,
                         _label_compound=_label_compound):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                      bygroups(Punctuation, Name.Label), '#pop'))
        return state

    def _make_label_state(compound,
                          _label=_label,
                          _label_compound=_label_compound,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _variable=_variable):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append(
            (r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' %
             (_label_compound if compound else _label, _string, _variable, _nl,
              r'[^)]' if compound else r'[\w\W]', _nl, _punct,
              r')' if compound else ''), bygroups(Name.Label,
                                                  Comment.Single), '#pop'))
        return state

    def _make_redirect_state(compound,
                             _core_token_compound=_core_token_compound,
                             _nl=_nl,
                             _punct=_punct,
                             _stoken=_stoken,
                             _string=_string,
                             _space=_space,
                             _variable=_variable,
                             _nlws=_nlws):
        stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                           (_punct, _string, _variable, _core_token_compound))
        return [
            (r'((?:(?<=[%s])\d)?)(>>?&|<&)([%s]*)(\d)' % (_nlws, _nlws),
             bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
            (r'((?:(?<=[%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
             (_nlws, _nl, _space, stoken_compound if compound else _stoken),
             bygroups(Number.Integer, Punctuation, using(this, state='text')))
        ]

    tokens = {
        'root':
        _make_begin_state(False),
        'follow':
        _make_follow_state(False),
        'arithmetic':
        _make_arithmetic_state(False),
        'call':
        _make_call_state(False),
        'label':
        _make_label_state(False),
        'redirect':
        _make_redirect_state(False),
        'root/compound':
        _make_begin_state(True),
        'follow/compound':
        _make_follow_state(True),
        'arithmetic/compound':
        _make_arithmetic_state(True),
        'call/compound':
        _make_call_state(True),
        'label/compound':
        _make_label_state(True),
        'redirect/compound':
        _make_redirect_state(True),
        'variable-or-escape':
        [(_variable, Name.Variable),
         (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)],
        'string': [(r'"', String.Double, '#pop'), (_variable, Name.Variable),
                   (r'\^!|%%', String.Escape),
                   (r'[^"%%^%s]+|[%%^]' % _nl, String.Double),
                   default('#pop')],
        'sqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Single)],
        'bqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Backtick)],
        'text': [(r'"', String.Double, 'string'),
                 include('variable-or-escape'),
                 (r'[^"%%^%s%s\d)]+|.' % (_nlws, _punct), Text)],
        'variable': [(r'"', String.Double, 'string'),
                     include('variable-or-escape'),
                     (r'[^"%%^%s]+|.' % _nl, Name.Variable)],
        'for': [(r'(%s)(in)(%s)(\()' % (_space, _space),
                 bygroups(using(this, state='text'), Keyword,
                          using(this, state='text'), Punctuation), '#pop'),
                include('follow')],
        'for2': [(r'\)', Punctuation),
                 (r'(%s)(do%s)' % (_space, _token_terminator),
                  bygroups(using(this, state='text'), Keyword), '#pop'),
                 (r'[%s]+' % _nl, Text),
                 include('follow')],
        'for/f': [(r'(")((?:%s|[^"])*?")([%s]*)(\))' % (_variable, _nlws),
                   bygroups(String.Double, using(this, state='string'), Text,
                            Punctuation)),
                  (r'"', String.Double, ('#pop', 'for2', 'string')),
                  (r"('(?:%%%%|%s|[\w\W])*?')([%s]*)(\))" % (_variable, _nlws),
                   bygroups(using(this, state='sqstring'), Text, Punctuation)),
                  (r'(`(?:%%%%|%s|[\w\W])*?`)([%s]*)(\))' % (_variable, _nlws),
                   bygroups(using(this, state='bqstring'), Text, Punctuation)),
                  include('for2')],
        'for/l': [(r'-?\d+', Number.Integer),
                  include('for2')],
        'if': [
            (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' %
             (_token_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Number.Integer), '#pop'),
            (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text'),
                      using(this, state='variable')), '#pop'),
            (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text')), '#pop'),
            (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number),
             bygroups(using(this, state='arithmetic'), Operator.Word,
                      using(this, state='arithmetic')), '#pop'),
            (_stoken, using(this, state='text'), ('#pop', 'if2')),
        ],
        'if2': [(r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken),
                 bygroups(using(this, state='text'), Operator,
                          using(this, state='text')), '#pop'),
                (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken),
                 bygroups(using(this, state='text'), Operator.Word,
                          using(this, state='text')), '#pop')],
        '(?': [(_space, using(this, state='text')),
               (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')),
               default('#pop')],
        'else?': [(_space, using(this, state='text')),
                  (r'else%s' % _token_terminator, Keyword, '#pop'),
                  default('#pop')]
    }
Example #23
0
class NemerleLexer(RegexLexer):
    """
    For `Nemerle <http://nemerle.org>`_ source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

    .. versionadded:: 1.5
    """

    name = 'Nemerle'
    aliases = ['nemerle']
    filenames = ['*.n']
    mimetypes = ['text/x-nemerle']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        '@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in iteritems(levels):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'\$\s*"', String, 'splice-string'),
                (r'\$\s*<#', String, 'splice-string2'),
                (r'<#', String, 'recursive-string'),
                (r'(<\[)\s*(' + cs_ident + ':)?', Keyword),
                (r'\]\>', Keyword),

                # quasiquotation only
                (r'\$' + cs_ident, Name),
                (r'(\$)(\()', bygroups(Name,
                                       Punctuation), 'splice-string-content'),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|and|as|base|catch|def|delegate|'
                 r'enum|event|extern|false|finally|'
                 r'fun|implements|interface|internal|'
                 r'is|macro|match|matches|module|mutable|new|'
                 r'null|out|override|params|partial|private|'
                 r'protected|public|ref|sealed|static|'
                 r'syntax|this|throw|true|try|type|typeof|'
                 r'virtual|volatile|when|where|with|'
                 r'assert|assert2|async|break|checked|continue|do|else|'
                 r'ensures|for|foreach|if|late|lock|new|nolate|'
                 r'otherwise|regexp|repeat|requires|return|surroundwith|'
                 r'unchecked|unless|using|while|yield)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
                 r'short|string|uint|ulong|ushort|void|array|list)\b\??',
                 Keyword.Type),
                (r'(:>?)\s*(' + cs_ident + r'\??)',
                 bygroups(Punctuation, Keyword.Type)),
                (r'(class|struct|variant|module)(\s+)',
                 bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [(cs_ident, Name.Class, '#pop')],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
            ],
            'splice-string':
            [(r'[^"$]', String), (r'\$' + cs_ident, Name),
             (r'(\$)(\()', bygroups(Name,
                                    Punctuation), 'splice-string-content'),
             (r'\\"', String), (r'"', String, '#pop')],
            'splice-string2':
            [(r'[^#<>$]', String), (r'\$' + cs_ident, Name),
             (r'(\$)(\()', bygroups(Name,
                                    Punctuation), 'splice-string-content'),
             (r'<#', String, '#push'), (r'#>', String, '#pop')],
            'recursive-string': [(r'[^#<>]', String), (r'<#', String, '#push'),
                                 (r'#>', String, '#pop')],
            'splice-string-content':
            [(r'if|match', Keyword),
             (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation), (cs_ident, Name),
             (r'\d+', Number), (r'\(', Punctuation, '#push'),
             (r'\)', Punctuation, '#pop')]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Example #24
0
 def _make_begin_state(compound,
                       _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl,
                       _punct=_punct,
                       _string=_string,
                       _space=_space,
                       _start_label=_start_label,
                       _stoken=_stoken,
                       _token_terminator=_token_terminator,
                       _variable=_variable,
                       _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), 'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation), 'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
class ThriftLexer(RegexLexer):
    """
    For `Thrift <https://thrift.apache.org/>`__ interface definitions.

    .. versionadded:: 2.1
    """
    name = 'Thrift'
    aliases = ['thrift']
    filenames = ['*.thrift']
    mimetypes = ['application/x-thrift']

    tokens = {
        'root': [
            include('whitespace'),
            include('comments'),
            (r'"', String.Double, combined('stringescape', 'dqs')),
            (r'\'', String.Single, combined('stringescape', 'sqs')),
            (r'(namespace)(\s+)',
                bygroups(Keyword.Namespace, Text.Whitespace), 'namespace'),
            (r'(enum|union|struct|service|exception)(\s+)',
                bygroups(Keyword.Declaration, Text.Whitespace), 'class'),
            (r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)'  # return arguments
             r'((?:[^\W\d]|\$)[\w$]*)'                  # method name
             r'(\s*)(\()',                              # signature start
             bygroups(using(this), Name.Function, Text, Operator)),
            include('keywords'),
            include('numbers'),
            (r'[&=]', Operator),
            (r'[:;,{}()<>\[\]]', Punctuation),
            (r'[a-zA-Z_](\.\w|\w)*', Name),
        ],
        'whitespace': [
            (r'\n', Text.Whitespace),
            (r'\s+', Text.Whitespace),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'//.*?\n', Comment),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
        ],
        'stringescape': [
            (r'\\([\\nrt"\'])', String.Escape),
        ],
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'[^\\"\n]+', String.Double),
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r'[^\\\'\n]+', String.Single),
        ],
        'namespace': [
            (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'class': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'keywords': [
            (r'(async|oneway|extends|throws|required|optional)\b', Keyword),
            (r'(true|false)\b', Keyword.Constant),
            (r'(const|typedef)\b', Keyword.Declaration),
            (words((
                'cpp_namespace', 'cpp_include', 'cpp_type', 'java_package',
                'cocoa_prefix', 'csharp_namespace', 'delphi_namespace',
                'php_namespace', 'py_module', 'perl_package',
                'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix',
                'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace',
                'xsd_attrs', 'include'), suffix=r'\b'),
             Keyword.Namespace),
            (words((
                'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
                'string', 'binary', 'map', 'list', 'set', 'slist',
                'senum'), suffix=r'\b'),
             Keyword.Type),
            (words((
                'BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__',
                '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
                'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin',
                'break', 'case', 'catch', 'class', 'clone', 'continue',
                'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic',
                'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare',
                'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile',
                'ensure', 'except', 'exec', 'finally', 'float', 'for',
                'foreach', 'function', 'global', 'goto', 'if', 'implements',
                'import', 'in', 'inline', 'instanceof', 'interface', 'is',
                'lambda', 'module', 'native', 'new', 'next', 'nil', 'not',
                'or', 'pass', 'public', 'print', 'private', 'protected',
                'raise', 'redo', 'rescue', 'retry', 'register', 'return',
                'self', 'sizeof', 'static', 'super', 'switch', 'synchronized',
                'then', 'this', 'throw', 'transient', 'try', 'undef',
                'unless', 'unsigned', 'until', 'use', 'var', 'virtual',
                'volatile', 'when', 'while', 'with', 'xor', 'yield'),
                prefix=r'\b', suffix=r'\b'),
             Keyword.Reserved),
        ],
        'numbers': [
            (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float),
            (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex),
            (r'[+-]?[0-9]+', Number.Integer),
        ],
    }
Example #26
0
class HamlLexer(ExtendedRegexLexer):
    """
    For Haml markup.

    .. versionadded:: 1.3
    """

    name = 'Haml'
    aliases = ['haml']
    filenames = ['*.haml']
    mimetypes = ['text/x-haml']

    flags = re.IGNORECASE
    # Haml can include " |\n" anywhere,
    # which is ignored and used to wrap long lines.
    # To accommodate this, use this custom faux dot instead.
    _dot = r'(?: \|\n(?=.* \|)|.)'

    # In certain places, a comma at the end of the line
    # allows line wrapping as well.
    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'),
            (r'(-)(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'haml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Example #27
0
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)*'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            ('^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
        ],
        'statements': [
            (r'L?"', String, 'string'),
            (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('auto', 'break', 'case', 'const', 'continue', 'default', 'do',
                 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register',
                 'restricted', 'return', 'sizeof', 'static', 'struct',
                 'switch', 'typedef', 'union', 'volatile', 'while'),
                suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void|'
             r'[a-z_][a-z0-9_]*_t)\b', Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')?(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')?(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = [
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
        'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
        'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'
    ]
    c99_types = [
        '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
        'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
        'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t',
        'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t',
        'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t',
        'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t',
        'uintptr_t', 'intmax_t', 'uintmax_t'
    ]

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
            yield index, token, value
Example #28
0
class HtmlLexer(RegexLexer):
    """
    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
    by the appropriate lexer.
    """

    name = 'HTML'
    aliases = ['html']
    filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
    mimetypes = ['text/html', 'application/xhtml+xml']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            ('[^<&]+', Text),
            (r'&\S*?;', Name.Entity),
            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
            (r'<!--(.|\n)*?-->', Comment.Multiline),
            (r'<\?.*?\?>', Comment.Preproc),
            ('<![^>]*>', Comment.Preproc),
            (r'(<)(\s*)(script)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('script-content', 'tag')),
            (r'(<)(\s*)(style)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('style-content', 'tag')),
            # note: this allows tag names not used in HTML like <x:with-dash>,
            # this is to support yet-unknown template engines and the like
            (r'(<)(\s*)([\w:.-]+)', bygroups(Punctuation, Text,
                                             Name.Tag), 'tag'),
            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation)),
        ],
        'tag': [
            (r'\s+', Text),
            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator,
                                               Text), 'attr'),
            (r'[\w:-]+', Name.Attribute),
            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text,
                                       Punctuation), '#pop'),
        ],
        'script-content': [
            (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
            # fallback cases for when there is no closing script tag
            # first look for newline and then go back into root state
            # if that fails just read the rest of the file
            # this is similar to the error handling logic in lexer.py
            (r'.+?\n', using(JavascriptLexer), '#pop'),
            (r'.+', using(JavascriptLexer), '#pop'),
        ],
        'style-content': [
            (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
            # fallback cases for when there is no closing style tag
            # first look for newline and then go back into root state
            # if that fails just read the rest of the file
            # this is similar to the error handling logic in lexer.py
            (r'.+?\n', using(CssLexer), '#pop'),
            (r'.+', using(CssLexer), '#pop'),
        ],
        'attr': [
            ('".*?"', String, '#pop'),
            ("'.*?'", String, '#pop'),
            (r'[^\s>]+', String, '#pop'),
        ],
    }

    def analyse_text(text):
        if html_doctype_matches(text):
            return 0.5
Example #29
0
class VhdlLexer(RegexLexer):
    """
    For VHDL source code.

    .. versionadded:: 1.5
    """
    name = 'vhdl'
    aliases = ['vhdl']
    filenames = ['*.vhdl', '*.vhd']
    mimetypes = ['text/x-vhdl']
    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single),
            (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r"'[a-zA-Z_][a-zA-Z0-9_]*", Name.Attribute),
            (r'[()\[\],.;\']', Punctuation),
            (r'"[^\n\\]*"', String),
            (r'(library)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)),
            (r'(use)(\s+)([a-zA-Z_][\.a-zA-Z0-9_]*)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'(entity|component)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Keyword, Text, Name.Class)),
            (r'(architecture|configuration)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)'
             r'(of)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)(is)',
             bygroups(Keyword, Text, Name.Class, Text, Keyword, Text,
                      Name.Class, Text, Keyword)),
            (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'),
            include('types'),
            include('keywords'),
            include('numbers'),
            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
        ],
        'endblock': [
            include('keywords'),
            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class),
            (r'(\s+)', Text),
            (r';', Punctuation, '#pop'),
        ],
        'types': [
            (r'(boolean|bit|character|severity_level|integer|time|delay_length|'
             r'natural|positive|string|bit_vector|file_open_kind|'
             r'file_open_status|std_ulogic|std_ulogic_vector|std_logic|'
             r'std_logic_vector)\b', Keyword.Type),
        ],
        'keywords': [
            (r'(abs|access|after|alias|all|and|'
             r'architecture|array|assert|attribute|begin|block|'
             r'body|buffer|bus|case|component|configuration|'
             r'constant|disconnect|downto|else|elsif|end|'
             r'entity|exit|file|for|function|generate|'
             r'generic|group|guarded|if|impure|in|'
             r'inertial|inout|is|label|library|linkage|'
             r'literal|loop|map|mod|nand|new|'
             r'next|nor|not|null|of|on|'
             r'open|or|others|out|package|port|'
             r'postponed|procedure|process|pure|range|record|'
             r'register|reject|return|rol|ror|select|'
             r'severity|signal|shared|sla|sli|sra|'
             r'srl|subtype|then|to|transport|type|'
             r'units|until|use|variable|wait|when|'
             r'while|with|xnor|xor)\b', Keyword),
        ],
        'numbers': [
            (r'\d{1,2}#[0-9a-fA-F_]+#?', Number.Integer),
            (r'[0-1_]+(\.[0-1_])', Number.Integer),
            (r'\d+', Number.Integer),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float),
            (r'H"[0-9a-fA-F_]+"', Number.Oct),
            (r'O"[0-7_]+"', Number.Oct),
            (r'B"[0-1_]+"', Number.Oct),
        ],
    }
Example #30
0
class ScamlLexer(ExtendedRegexLexer):
    """
    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.

    .. versionadded:: 1.4
    """

    name = 'Scaml'
    aliases = ['scaml']
    filenames = ['*.scaml']
    mimetypes = ['text/x-scaml']

    flags = re.IGNORECASE
    # Scaml does not yet support the " |\n" notation to
    # wrap long lines.  Once it does, use the custom faux
    # dot instead.
    # _dot = r'(?: \|\n(?=.* \|)|.)'
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Example #31
0
from pygments.token import (
    Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
)
from pygments.util import get_bool_opt

# Local

line_re = re.compile('.*?\n')

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
           'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
           'IPythonConsoleLexer', 'IPyLexer']

ipython_tokens = [
  (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
  (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
  (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
  (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
  (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                       using(BashLexer), Text)),
  (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
  (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
  (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
  (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
  (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
]

def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.

    The lexer inherits from an appropriate Python lexer and then adds
Example #32
0
class PugLexer(ExtendedRegexLexer):
    """
    For Pug markup.
    Pug is a variant of Scaml, see:
    http://scalate.fusesource.org/documentation/scaml-reference.html

    .. versionadded:: 1.4
    """

    name = 'Pug'
    aliases = ['pug', 'jade']
    filenames = ['*.pug', '*.jade']
    mimetypes = ['text/x-pug', 'text/x-jade']

    flags = re.IGNORECASE
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            (r'\|', Text, 'eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Example #33
0
    tokens = {"root": [(r"`[^`]*?`", String.Backtick), inherit]}


ROOT_TOKENS = [
    (r"\?", Keyword),
    (r"\$\w+", Name.Variable),
    (r"\$\{", Keyword, ("pymode",)),
    (r"\$\(", Keyword, ("subproc",)),
    (r"\$\[", Keyword, ("subproc",)),
    (r"@\(", Keyword, ("pymode",)),
    inherit,
]

PYMODE_TOKENS = [
    (r"(.+)(\))", bygroups(using(this), Keyword), "#pop"),
    (r"(.+)(\})", bygroups(using(this), Keyword), "#pop"),
]

SUBPROC_TOKENS = [
    (r"(.+)(\))", bygroups(using(XonshSubprocLexer), Keyword), "#pop"),
    (r"(.+)(\])", bygroups(using(XonshSubprocLexer), Keyword), "#pop"),
]


class XonshLexer(PythonLexer):
    """Xonsh console lexer for pygments."""

    name = "Xonsh lexer"
    aliases = ["xonsh", "xsh"]
    filenames = ["*.xsh", "*xonshrc"]
Example #34
0
class RagelEmbeddedLexer(RegexLexer):
    """
    A lexer for `Ragel`_ embedded in a host language file.

    This will only highlight Ragel statements. If you want host language
    highlighting then call the language-specific Ragel lexer.

    *New in Pygments 1.1.*
    """

    name = 'Embedded Ragel'
    aliases = ['ragel-em']
    filenames = ['*.rl']

    tokens = {
        'root': [
            (
                r'(' + r'|'.join((  # keep host code in largest possible chunks
                    r'[^%\'"/#]+',  # exclude unsafe characters
                    r'%(?=[^%]|$)',  # a single % sign is okay, just not 2 of them

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                    r'/(?!\*)(\\\\|\\/|[^/])*/',  # regular expression

                    # / is safe now that we've handled regex and javadoc comments
                    r'/',
                )) + r')+',
                Other),

            # Single Line FSM.
            # Please don't put a quoted newline in a single line FSM.
            # That's just mean. It will break this.
            (r'(%%)(?![{%])(.*)($|;)(\n?)',
             bygroups(Punctuation, using(RagelLexer), Punctuation, Text)),

            # Multi Line FSM.
            (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
        ],
        'multi-line-fsm': [
            (
                r'(' +
                r'|'.join((  # keep ragel code in largest possible chunks.
                    r'(' + r'|'.join((
                        r'[^}\'"\[/#]',  # exclude unsafe characters
                        r'}(?=[^%]|$)',  # } is okay as long as it's not followed by %
                        r'}%(?=[^%]|$)',  # ...well, one %'s okay, just not two...
                        r'[^\\][\\][{}]',  # ...and } is okay if it's escaped

                        # allow / if it's preceded with one of these symbols
                        # (ragel EOF actions)
                        r'(>|\$|%|<|@|<>)/',

                        # specifically allow regex followed immediately by *
                        # so it doesn't get mistaken for a comment
                        r'/(?!\*)(\\\\|\\/|[^/])*/\*',

                        # allow / as long as it's not followed by another / or by a *
                        r'/(?=[^/\*]|$)',

                        # We want to match as many of these as we can in one block.
                        # Not sure if we need the + sign here,
                        # does it help performance?
                    )) + r')+',

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r"\[(\\\\|\\\]|[^\]])*\]",  # square bracket literal
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                )) + r')+',
                using(RagelLexer)),
            (r'}%%', Punctuation, '#pop'),
        ]
    }

    def analyse_text(text):
        return '@LANG: indep' in text or 0.1
Example #35
0
class GosuLexer(RegexLexer):
    """
    For Gosu source code.

    *New in Pygments 1.5.*
    """

    name = 'Gosu'
    aliases = ['gosu']
    filenames = ['*.gs', '*.gsx', '*.gsp', '*.vark']
    mimetypes = ['text/x-gosu']

    flags = re.MULTILINE | re.DOTALL

    tokens = {
        'root': [
            # method names
            (
                r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)'  # modifiers etc.
                r'([a-zA-Z_][a-zA-Z0-9_]*)'  # method name
                r'(\s*)(\()',  # signature start
                bygroups(using(this), Name.Function, Text, Operator)),
            (r'[^\S\n]+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator),
            (r'(in|as|typeof|statictypeof|typeis|typeas|if|else|foreach|for|'
             r'index|while|do|continue|break|return|try|catch|finally|this|'
             r'throw|new|switch|case|default|eval|super|outer|classpath|'
             r'using)\b', Keyword),
            (r'(var|delegate|construct|function|private|internal|protected|'
             r'public|abstract|override|final|static|extends|transient|'
             r'implements|represents|readonly)\b', Keyword.Declaration),
            (r'(property\s+)(get|set)?', Keyword.Declaration),
            (r'(boolean|byte|char|double|float|int|long|short|void|block)\b',
             Keyword.Type),
            (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)),
            (r'(true|false|null|NaN|Infinity)\b', Keyword.Constant),
            (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Keyword.Declaration, Text, Name.Class)),
            (r'(uses)(\s+)([a-zA-Z0-9_.]+\*?)',
             bygroups(Keyword.Namespace, Text, Name.Namespace)),
            (r'"', String, 'string'),
            (r'(\??[\.#])([a-zA-Z_][a-zA-Z0-9_]*)',
             bygroups(Operator, Name.Attribute)),
            (r'(:)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator,
                                                      Name.Attribute)),
            (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name),
            (r'and|or|not|[\\~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'\n', Text)
        ],
        'templateText': [(r'(\\<)|(\\\$)', String),
                         (r'(<%@\s+)(extends|params)',
                          bygroups(Operator,
                                   Name.Decorator), 'stringTemplate'),
                         (r'<%!--.*?--%>', Comment.Multiline),
                         (r'(<%)|(<%=)', Operator, 'stringTemplate'),
                         (r'\$\{', Operator, 'stringTemplateShorthand'),
                         (r'.', String)],
        'string': [(r'"', String, '#pop'),
                   include('templateText')],
        'stringTemplate': [(r'"', String, 'string'), (r'%>', Operator, '#pop'),
                           include('root')],
        'stringTemplateShorthand': [(r'"', String, 'string'),
                                    (r'\{', Operator,
                                     'stringTemplateShorthand'),
                                    (r'\}', Operator, '#pop'),
                                    include('root')],
    }
Example #36
0
class CeylonLexer(RegexLexer):
    """
    For `Ceylon <http://ceylon-lang.org/>`_ source code.

    *New in Pygments 1.6.*
    """

    name = 'Ceylon'
    aliases = ['ceylon']
    filenames = ['*.ceylon']
    mimetypes = ['text/x-ceylon']

    flags = re.MULTILINE | re.DOTALL

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    tokens = {
        'root': [
            # method names
            (
                r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)'  # return arguments
                r'([a-zA-Z_][a-zA-Z0-9_]*)'  # method name
                r'(\s*)(\()',  # signature start
                bygroups(using(this), Name.Function, Text, Operator)),
            (r'[^\S\n]+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(variable|shared|abstract|doc|by|formal|actual|late|native)',
             Name.Decorator),
            (r'(break|case|catch|continue|default|else|finally|for|in|'
             r'variable|if|return|switch|this|throw|try|while|is|exists|dynamic|'
             r'nonempty|then|outer|assert)\b', Keyword),
            (r'(abstracts|extends|satisfies|adapts|'
             r'super|given|of|out|assign|'
             r'transient|volatile)\b', Keyword.Declaration),
            (r'(function|value|void)\b', Keyword.Type),
            (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)),
            (r'(true|false|null)\b', Keyword.Constant),
            (r'(class|interface|object|alias)(\s+)',
             bygroups(Keyword.Declaration, Text), 'class'),
            (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
            (r'"(\\\\|\\"|[^"])*"', String),
            (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char),
            (r'".*``.*``.*"', String.Interpol),
            (r'(\.)([a-z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)),
            (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label),
            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
            (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator),
            (r'\d{1,3}(_\d{3})+\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float),
            (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?',
             Number.Float),
            (r'[0-9][0-9]*\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float),
            (r'[0-9][0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?',
             Number.Float),
            (r'#([0-9a-fA-F]{4})(_[0-9a-fA-F]{4})+', Number.Hex),
            (r'#[0-9a-fA-F]+', Number.Hex),
            (r'\$([01]{4})(_[01]{4})+', Number.Integer),
            (r'\$[01]+', Number.Integer),
            (r'\d{1,3}(_\d{3})+[kMGTP]?', Number.Integer),
            (r'[0-9]+[kMGTP]?', Number.Integer),
            (r'\n', Text)
        ],
        'class': [(r'[A-Za-z_][a-zA-Z0-9_]*', Name.Class, '#pop')],
        'import': [(r'[a-z][a-zA-Z0-9_.]*', Name.Namespace, '#pop')],
    }
def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.

    The lexer inherits from an appropriate Python lexer and then adds
    information about IPython specific keywords (i.e. magic commands,
    shell commands, etc.)

    Parameters
    ----------
    python3 : bool
        If `True`, then build an IPython lexer from a Python 3 lexer.

    """
    # It would be nice to have a single IPython lexer class which takes
    # a boolean `python3`.  But since there are two Python lexer classes,
    # we will also have two IPython lexer classes.
    if python3:
        PyLexer = Python3Lexer
        name = 'IPython3'
        aliases = ['ipython3']
        doc = """IPython3 Lexer"""
    else:
        PyLexer = PythonLexer
        name = 'IPython'
        aliases = ['ipython2', 'ipython']
        doc = """IPython Lexer"""

    ipython_tokens = [
       (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
        (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
        (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
        (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
        (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
        (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
        (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
        (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
        (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
        (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
        (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
        (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
        (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                using(BashLexer), Text)),
        (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
        (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
        (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
        (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
        (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
    ]

    tokens = PyLexer.tokens.copy()
    tokens['root'] = ipython_tokens + tokens['root']

    attrs = {'name': name, 'aliases': aliases, 'filenames': [],
             '__doc__': doc, 'tokens': tokens}

    return type(name, (PyLexer,), attrs)
Example #38
0
class KotlinLexer(RegexLexer):
    """
    For `Kotlin <http://confluence.jetbrains.net/display/Kotlin/>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

    *New in Pygments 1.5.*
    """

    name = 'Kotlin'
    aliases = ['kotlin']
    filenames = ['*.kt']
    mimetypes = ['text/x-kotlin']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers,
    # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        '@?[_a-zA-Z][a-zA-Z0-9_]*',
        'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
                  '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd +
                  uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in list(levels.items()):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*](.|\n)*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|break|catch|'
                 r'fun|continue|default|delegate|'
                 r'do|else|enum|extern|false|finally|'
                 r'fixed|for|goto|if|implicit|in|interface|'
                 r'internal|is|lock|null|'
                 r'out|override|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|'
                 r'when|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|val|var)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|'
                 r'short)\b\??', Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(package|using)(\s+)', bygroups(Keyword, Text), 'package'),
                (cs_ident, Name),
            ],
            'class': [(cs_ident, Name.Class, '#pop')],
            'package': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel',
                               list(self.tokens.keys()), 'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Example #39
0
class XonshSubprocLexer(BashLexer):
    """Lexer for xonsh subproc mode."""
    name = 'Xonsh subprocess lexer'
    tokens = {'root': [(SearchPath, String.Backtick), inherit, ]}


ROOT_TOKENS = [(r'\?', Keyword),
               (r'\$\w+', Name.Variable),
               (r'\$\{', Keyword, ('pymode', )),
               (r'[\!\$]\(', Keyword, ('subproc', )),
               (r'[\!\$]\[', Keyword, ('subproc', )),
               (r'@\$\(', Keyword, ('subproc', )),
               (r'@\(', Keyword, ('pymode', )),
               inherit, ]

PYMODE_TOKENS = [(r'(.+)(\))', bygroups(using(this), Keyword), '#pop'),
                 (r'(.+)(\})', bygroups(using(this), Keyword), '#pop'), ]

SUBPROC_TOKENS = [
    (r'(.+)(\))', bygroups(using(XonshSubprocLexer), Keyword), '#pop'),
    (r'(.+)(\])', bygroups(using(XonshSubprocLexer), Keyword), '#pop'),
]


class XonshLexer(PythonLexer):
    """Xonsh console lexer for pygments."""

    name = 'Xonsh lexer'
    aliases = ['xonsh', 'xsh']
    filenames = ['*.xsh', '*xonshrc']
Example #40
0
class VCLLexer(RegexLexer):
    """
    For Varnish Configuration Language (VCL).

    .. versionadded:: 2.2
    """
    name = 'VCL'
    aliases = ['vcl']
    filenames = ['*.vcl']
    mimetypes = ['text/x-vclsrc']

    def analyse_text(text):
        # If the very first line is 'vcl 4.0;' it's pretty much guaranteed
        # that this is VCL
        if text.startswith('vcl 4.0;'):
            return 1.0
        # Skip over comments and blank lines
        # This is accurate enough that returning 0.9 is reasonable.
        # Almost no VCL files start without some comments.
        elif '\nvcl 4\.0;' in text[:1000]:
            return 0.9

    tokens = {
        'probe': [
            include('whitespace'),
            include('comments'),
            (r'(\.\w+)(\s*=\s*)([^;]*)(;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'acl': [
            include('whitespace'),
            include('comments'),
            (r'[!/]+', Operator),
            (r';', Punctuation),
            (r'\d+', Number),
            (r'\}', Punctuation, '#pop'),
        ],
        'backend': [
            include('whitespace'),
            (r'(\.probe)(\s*=\s*)(\w+)(;)',
             bygroups(Name.Attribute, Operator, Name.Variable.Global,
                      Punctuation)),
            (r'(\.probe)(\s*=\s*)(\{)',
             bygroups(Name.Attribute, Operator, Punctuation), 'probe'),
            (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'statements': [
            (r'(\d\.)?\d+[sdwhmy]', Literal.Date),
            (r'(\d\.)?\d+ms', Literal.Date),
            (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|'
             r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|'
             r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function),
            (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|'
             r'miss|fetch|restart)\b', Name.Constant),
            (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b',
             Name.Variable),
            (words(
                ('obj.status', 'req.hash_always_miss', 'beresp.backend',
                 'req.esi_level', 'req.can_gzip', 'beresp.ttl',
                 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity',
                 'req.hash_ignore_busy', 'obj.reason', 'req.xid',
                 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age',
                 'local.ip', 'beresp.uncacheable', 'req.method',
                 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts',
                 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid',
                 'bereq.between_bytes_timeout', 'req.esi',
                 'bereq.first_byte_timeout', 'bereq.method',
                 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status',
                 'beresp.do_gunzip', 'beresp.storage_hint',
                 'resp.is_streaming', 'beresp.do_stream', 'req_top.method',
                 'bereq.backend', 'beresp.backend.name', 'beresp.status',
                 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason',
                 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi',
                 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname',
                 'remote.ip', 'req.backend_hint', 'server.identity',
                 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip',
                 'bereq.uncacheable'),
                suffix=r'\b'), Name.Variable),
            (r'[!%&+*\-,/<.}{>=|~]+', Operator),
            (r'[();]', Punctuation),
            (r'[,]+', Punctuation),
            (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif',
                    'elif', 'synth', 'synthetic', 'ban', 'return', 'set',
                    'unset', 'import', 'include', 'new', 'rollback', 'call'),
                   suffix=r'\b'), Keyword),
            (r'storage\.\w+\.\w+\b', Name.Variable),
            (words(('true', 'false')), Name.Builtin),
            (r'\d+\b', Number),
            (r'(backend)(\s+\w+)(\s*\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'),
            (r'(probe\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'),
            (r'(acl\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'),
            (r'(vcl )(4.0)(;)$',
             bygroups(Keyword.Reserved, Name.Constant, Punctuation)),
            (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)',
             bygroups(Keyword, Name.Function, Punctuation)),
            (r'([a-zA-Z_]\w*)'
             r'(\.)'
             r'([a-zA-Z_]\w*)'
             r'(\s*\(.*\))',
             bygroups(Name.Function, Punctuation, Name.Function, using(this))),
            ('[a-zA-Z_]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*$', Comment),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'[^"\n]+', String),  # all other characters
        ],
        'multistring': [
            (r'[^"}]', String),
            (r'"\}', String, '#pop'),
            (r'["}]', String),
        ],
        'whitespace': [
            (r'L?"', String, 'string'),
            (r'\{"', String, 'multistring'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
        ],
        'root': [
            include('whitespace'),
            include('comments'),
            include('statements'),
            (r'\s+', Text),
        ],
    }