Exemple #1
0
class RPMSpecLexer(RegexLexer):
    """
    For RPM ``.spec`` files.

    .. versionadded:: 1.6
    """

    name = 'RPMSpec'
    aliases = ['spec']
    filenames = ['*.spec']
    mimetypes = ['text/x-rpm-spec']

    _directives = ('(?:package|prep|build|install|clean|check|pre[a-z]*|'
                   'post[a-z]*|trigger[a-z]*|files)')

    tokens = {
        'root': [
            (r'#.*\n', Comment),
            include('basic'),
        ],
        'description': [
            (r'^(%' + _directives + ')(.*)$',
             bygroups(Name.Decorator, Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'changelog': [
            (r'\*.*\n', Generic.Subheading),
            (r'^(%' + _directives + ')(.*)$',
             bygroups(Name.Decorator, Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
            include('interpol'),
            (r'.', String.Double),
        ],
        'basic': [
            include('macro'),
            (r'(?i)^(Name|Version|Release|Epoch|Summary|Group|License|Packager|'
             r'Vendor|Icon|URL|Distribution|Prefix|Patch[0-9]*|Source[0-9]*|'
             r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Suggests|Provides|Conflicts|'
             r'Build[a-z]+|[a-z]+Arch|Auto[a-z]+)(:)(.*)$',
             bygroups(Generic.Heading, Punctuation, using(this))),
            (r'^%description', Name.Decorator, 'description'),
            (r'^%changelog', Name.Decorator, 'changelog'),
            (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text)),
            (r'%(attr|defattr|dir|doc(?:dir)?|setup|config(?:ure)?|'
             r'make(?:install)|ghost|patch[0-9]+|find_lang|exclude|verify)',
             Keyword),
            include('interpol'),
            (r"'.*?'", String.Single),
            (r'"', String.Double, 'string'),
            (r'.', Text),
        ],
        'macro': [
            (r'%define.*\n', Comment.Preproc),
            (r'%\{\!\?.*%define.*\}', Comment.Preproc),
            (r'(%(?:if(?:n?arch)?|else(?:if)?|endif))(.*)$',
             bygroups(Comment.Preproc, Text)),
        ],
        'interpol': [
            (r'%\{?__[a-z_]+\}?', Name.Function),
            (r'%\{?_([a-z_]+dir|[a-z_]+path|prefix)\}?', Keyword.Pseudo),
            (r'%\{\?\w+\}', Name.Variable),
            (r'\$\{?RPM_[A-Z0-9_]+\}?', Name.Variable.Global),
            (r'%\{[a-zA-Z]\w+\}', Keyword.Constant),
        ]
    }
Exemple #2
0
class SASLexer(RegexLexer):
    """
    For `SAS <http://www.sas.com/>`_ files.

    .. versionadded:: 2.2
    """
    # Syntax from syntax/sas.vim by James Kidd <*****@*****.**>

    name = 'SAS'
    aliases = ['sas']
    filenames = ['*.SAS', '*.sas']
    mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas']
    flags = re.IGNORECASE | re.MULTILINE

    builtins_macros = ("bquote", "nrbquote", "cmpres", "qcmpres", "compstor",
                       "datatyp", "display", "do", "else", "end", "eval",
                       "global", "goto", "if", "index", "input", "keydef",
                       "label", "left", "length", "let", "local", "lowcase",
                       "macro", "mend", "nrquote", "nrstr", "put", "qleft",
                       "qlowcase", "qscan", "qsubstr", "qsysfunc", "qtrim",
                       "quote", "qupcase", "scan", "str", "substr", "superq",
                       "syscall", "sysevalf", "sysexec", "sysfunc", "sysget",
                       "syslput", "sysprod", "sysrc", "sysrput", "then", "to",
                       "trim", "unquote", "until", "upcase", "verify", "while",
                       "window")

    builtins_conditionals = ("do", "if", "then", "else", "end", "until",
                             "while")

    builtins_statements = (
        "abort", "array", "attrib", "by", "call", "cards", "cards4", "catname",
        "continue", "datalines", "datalines4", "delete", "delim", "delimiter",
        "display", "dm", "drop", "endsas", "error", "file", "filename",
        "footnote", "format", "goto", "in", "infile", "informat", "input",
        "keep", "label", "leave", "length", "libname", "link", "list",
        "lostcard", "merge", "missing", "modify", "options", "output", "out",
        "page", "put", "redirect", "remove", "rename", "replace", "retain",
        "return", "select", "set", "skip", "startsas", "stop", "title",
        "update", "waitsas", "where", "window", "x", "systask")

    builtins_sql = ("add", "and", "alter", "as", "cascade", "check", "create",
                    "delete", "describe", "distinct", "drop", "foreign",
                    "from", "group", "having", "index", "insert", "into", "in",
                    "key", "like", "message", "modify", "msgtype", "not",
                    "null", "on", "or", "order", "primary", "references",
                    "reset", "restrict", "select", "set", "table", "unique",
                    "update", "validate", "view", "where")

    builtins_functions = (
        "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc", "attrn",
        "band", "betainv", "blshift", "bnot", "bor", "brshift", "bxor", "byte",
        "cdf", "ceil", "cexist", "cinv", "close", "cnonct", "collate",
        "compbl", "compound", "compress", "cos", "cosh", "css", "curobs", "cv",
        "daccdb", "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date",
        "datejul", "datepart", "datetime", "day", "dclose", "depdb", "depdbsl",
        "depsl", "depsyd", "deptab", "dequote", "dhms", "dif", "digamma",
        "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum", "dread",
        "dropnote", "dsname", "erf", "erfc", "exist", "exp", "fappend",
        "fclose", "fcol", "fdelete", "fetch", "fetchobs", "fexist", "fget",
        "fileexist", "filename", "fileref", "finfo", "finv", "fipname",
        "fipnamel", "fipstate", "floor", "fnonct", "fnote", "fopen",
        "foptname", "foptnum", "fpoint", "fpos", "fput", "fread", "frewind",
        "frlen", "fsep", "fuzz", "fwrite", "gaminv", "gamma", "getoption",
        "getvarc", "getvarn", "hbound", "hms", "hosthelp", "hour", "ibessel",
        "index", "indexc", "indexw", "input", "inputc", "inputn", "int",
        "intck", "intnx", "intrr", "irr", "jbessel", "juldate", "kurtosis",
        "lag", "lbound", "left", "length", "lgamma", "libname", "libref",
        "log", "log10", "log2", "logpdf", "logpmf", "logsdf", "lowcase", "max",
        "mdy", "mean", "min", "minute", "mod", "month", "mopen", "mort", "n",
        "netpv", "nmiss", "normal", "note", "npv", "open", "ordinal",
        "pathname", "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke",
        "probbeta", "probbnml", "probchi", "probf", "probgam", "probhypr",
        "probit", "probnegb", "probnorm", "probt", "put", "putc", "putn",
        "qtr", "quote", "ranbin", "rancau", "ranexp", "rangam", "range",
        "rank", "rannor", "ranpoi", "rantbl", "rantri", "ranuni", "repeat",
        "resolve", "reverse", "rewind", "right", "round", "saving", "scan",
        "sdf", "second", "sign", "sin", "sinh", "skewness", "soundex",
        "spedis", "sqrt", "std", "stderr", "stfips", "stname", "stnamel",
        "substr", "sum", "symget", "sysget", "sysmsg", "sysprod", "sysrc",
        "system", "tan", "tanh", "time", "timepart", "tinv", "tnonct", "today",
        "translate", "tranwrd", "trigamma", "trim", "trimn", "trunc",
        "uniform", "upcase", "uss", "var", "varfmt", "varinfmt", "varlabel",
        "varlen", "varname", "varnum", "varray", "varrayx", "vartype",
        "verify", "vformat", "vformatd", "vformatdx", "vformatn", "vformatnx",
        "vformatw", "vformatwx", "vformatx", "vinarray", "vinarrayx",
        "vinformat", "vinformatd", "vinformatdx", "vinformatn", "vinformatnx",
        "vinformatw", "vinformatwx", "vinformatx", "vlabel", "vlabelx",
        "vlength", "vlengthx", "vname", "vnamex", "vtype", "vtypex", "weekday",
        "year", "yyq", "zipfips", "zipname", "zipnamel", "zipstate")

    tokens = {
        'root': [
            include('comments'),
            include('proc-data'),
            include('cards-datalines'),
            include('logs'),
            include('general'),
            (r'.', Text),
        ],
        # SAS is multi-line regardless, but * is ended by ;
        'comments': [
            (r'^\s*\*.*?;', Comment),
            (r'/\*.*?\*/', Comment),
            (r'^\s*\*(.|\n)*?;', Comment.Multiline),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
        ],
        # Special highlight for proc, data, quit, run
        'proc-data': [
            (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]', Keyword.Reserved),
        ],
        # Special highlight cards and datalines
        'cards-datalines': [
            (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'),
        ],
        'data': [
            (r'(.|\n)*^\s*;\s*$', Other, '#pop'),
        ],
        # Special highlight for put NOTE|ERROR|WARNING (order matters)
        'logs': [
            (r'\n?^\s*%?put ', Keyword, 'log-messages'),
        ],
        'log-messages': [
            (r'NOTE(:|-).*', Generic, '#pop'),
            (r'WARNING(:|-).*', Generic.Emph, '#pop'),
            (r'ERROR(:|-).*', Generic.Error, '#pop'),
            include('general'),
        ],
        'general': [
            include('keywords'),
            include('vars-strings'),
            include('special'),
            include('numbers'),
        ],
        # Keywords, statements, functions, macros
        'keywords': [
            (words(builtins_statements, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(builtins_sql, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(builtins_conditionals, prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (words(builtins_macros, prefix=r'%', suffix=r'\b'), Name.Builtin),
            (words(builtins_functions, prefix=r'\b',
                   suffix=r'\('), Name.Builtin),
        ],
        # Strings and user-defined variables and macros (order matters)
        'vars-strings': [
            (r'&[a-z_]\w{0,31}\.?', Name.Variable),
            (r'%[a-z_]\w{0,31}', Name.Function),
            (r'\'', String, 'string_squote'),
            (r'"', String, 'string_dquote'),
        ],
        'string_squote': [
            ('\'', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),
            # AFAIK, macro variables are not evaluated in single quotes
            # (r'&', Name.Variable, 'validvar'),
            (r'[^$\'\\]+', String),
            (r'[$\'\\]', String),
        ],
        'string_dquote': [
            (r'"', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),
            (r'&', Name.Variable, 'validvar'),
            (r'[^$&"\\]+', String),
            (r'[$"\\]', String),
        ],
        'validvar': [
            (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'),
        ],
        # SAS numbers and special variables
        'numbers': [
            (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b',
             Number),
        ],
        'special': [
            (r'(null|missing|_all_|_automatic_|_character_|_n_|'
             r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)',
             Keyword.Constant),
        ],
        # 'operators': [
        #     (r'(-|=|<=|>=|<|>|<>|&|!=|'
        #      r'\||\*|\+|\^|/|!|~|~=)', Operator)
        # ],
    }
Exemple #3
0
class QmlLexer(RegexLexer):
    """
    For QML files. See http://doc.qt.digia.com/4.7/qdeclarativeintroduction.html.

    .. versionadded:: 1.6
    """

    # QML is based on javascript, so much of this is taken from the
    # JavascriptLexer above.

    name = 'QML'
    aliases = ['qml', 'qbs']
    filenames = ['*.qml', '*.qbs']
    mimetypes = ['application/x-qml', 'application/x-qt.qbs+qml']

    # pasted from JavascriptLexer, with some additions
    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),

            # QML insertions
            (r'\bid\s*:\s*[A-Za-z][\w.]*', Keyword.Declaration,
             'slashstartsregex'),
            (r'\b[A-Za-z][\w.]*\s*:', Keyword, 'slashstartsregex'),

            # the rest from JavascriptLexer
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'this)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
             r'extends|final|float|goto|implements|import|int|interface|long|native|'
             r'package|private|protected|public|short|static|super|synchronized|throws|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
             r'window)\b', Name.Builtin),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
        ]
    }
Exemple #4
0
class FloScriptLexer(RegexLexer):
    """
    For `FloScript <https://github.com/ioflo/ioflo>`_ configuration language source code.

    .. versionadded:: 2.4
    """

    name = 'FloScript'
    aliases = ['floscript', 'flo']
    filenames = ['*.flo']

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%', ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        'root': [
            (r'\n', Text),
            (r'[^\S\n]+', Text),

            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(to|by|with|from|per|for|cum|qua|via|as|at|in|of|on|re|is|if|be|into|'
             r'and|not)\b', Operator.Word),
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
            (r'(load|init|server|logger|log|loggee|first|over|under|next|done|timeout|'
             r'repeat|native|benter|enter|recur|exit|precur|renter|rexit|print|put|inc|'
             r'copy|set|aux|rear|raze|go|let|do|bid|ready|start|stop|run|abort|use|flo|'
             r'give|take)\b', Name.Builtin),
            (r'(frame|framer|house)\b', Keyword),
            ('"', String, 'string'),

            include('name'),
            include('numbers'),
            (r'#.+$', Comment.Singleline),
        ],
        'string': [
            ('[^"]+', String),
            ('"', String, '#pop'),
        ],
        'numbers': [
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
            (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
            (r'0[0-7]+j?', Number.Oct),
            (r'0[bB][01]+', Number.Bin),
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+j?', Number.Integer)
        ],

        'name': [
            (r'@[\w.]+', Name.Decorator),
            (r'[a-zA-Z_]\w*', Name),
        ],
    }
Exemple #5
0
class HexdumpLexer(RegexLexer):
    """
    For typical hex dump output formats by the UNIX and GNU/Linux tools ``hexdump``,
    ``hd``, ``hexcat``, ``od`` and ``xxd``, and the DOS tool ``DEBUG``. For example:

    .. sourcecode:: hexdump

        00000000  7f 45 4c 46 02 01 01 00  00 00 00 00 00 00 00 00  |.ELF............|
        00000010  02 00 3e 00 01 00 00 00  c5 48 40 00 00 00 00 00  |..>......H@.....|

    The specific supported formats are the outputs of:

    * ``hexdump FILE``
    * ``hexdump -C FILE`` -- the `canonical` format used in the example.
    * ``hd FILE`` -- same as ``hexdump -C FILE``.
    * ``hexcat FILE``
    * ``od -t x1z FILE``
    * ``xxd FILE``
    * ``DEBUG.EXE FILE.COM`` and entering ``d`` to the prompt.

    .. versionadded:: 2.1
    """
    name = 'Hexdump'
    aliases = ['hexdump']

    hd = r'[0-9A-Ha-h]'

    tokens = {
        'root': [
            (r'\n', Text),
            include('offset'),
            (r'(' + hd + r'{2})(\-)(' + hd + r'{2})',
             bygroups(Number.Hex, Punctuation, Number.Hex)),
            (hd + r'{2}', Number.Hex),
            (r'(\s{2,3})(\>)(.{16})(\<)$',
             bygroups(Text, Punctuation, String,
                      Punctuation), 'bracket-strings'),
            (r'(\s{2,3})(\|)(.{16})(\|)$',
             bygroups(Text, Punctuation, String,
                      Punctuation), 'piped-strings'),
            (r'(\s{2,3})(\>)(.{1,15})(\<)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'(\s{2,3})(\|)(.{1,15})(\|)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'(\s{2,3})(.{1,15})$', bygroups(Text, String)),
            (r'(\s{2,3})(.{16}|.{20})$', bygroups(Text,
                                                  String), 'nonpiped-strings'),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'offset': [
            (r'^(' + hd + '+)(:)', bygroups(Name.Label,
                                            Punctuation), 'offset-mode'),
            (r'^' + hd + '+', Name.Label),
        ],
        'offset-mode': [(r'\s', Text, '#pop'), (hd + '+', Name.Label),
                        (r':', Punctuation)],
        'piped-strings': [
            (r'\n', Text),
            include('offset'),
            (hd + r'{2}', Number.Hex),
            (r'(\s{2,3})(\|)(.{1,16})(\|)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'bracket-strings': [
            (r'\n', Text),
            include('offset'),
            (hd + r'{2}', Number.Hex),
            (r'(\s{2,3})(\>)(.{1,16})(\<)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'nonpiped-strings': [
            (r'\n', Text),
            include('offset'),
            (r'(' + hd + r'{2})(\-)(' + hd + r'{2})',
             bygroups(Number.Hex, Punctuation, Number.Hex)),
            (hd + r'{2}', Number.Hex),
            (r'(\s{19,})(.{1,20}?)$', bygroups(Text, String)),
            (r'(\s{2,3})(.{1,20})$', bygroups(Text, String)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
    }
Exemple #6
0
class DtdLexer(RegexLexer):
    """
    A lexer for DTDs (Document Type Definitions).

    .. versionadded:: 1.5
    """

    flags = re.MULTILINE | re.DOTALL

    name = 'DTD'
    aliases = ['dtd']
    filenames = ['*.dtd']
    mimetypes = ['application/xml-dtd']

    tokens = {
        'root': [
            include('common'),
            (r'(<!ELEMENT)(\s+)(\S+)', bygroups(Keyword, Text,
                                                Name.Tag), 'element'),
            (r'(<!ATTLIST)(\s+)(\S+)', bygroups(Keyword, Text,
                                                Name.Tag), 'attlist'),
            (r'(<!ENTITY)(\s+)(\S+)', bygroups(Keyword, Text,
                                               Name.Entity), 'entity'),
            (r'(<!NOTATION)(\s+)(\S+)', bygroups(Keyword, Text,
                                                 Name.Tag), 'notation'),
            (
                r'(<!\[)([^\[\s]+)(\s*)(\[)',  # conditional sections
                bygroups(Keyword, Name.Entity, Text, Keyword)),
            (r'(<!DOCTYPE)(\s+)([^>\s]+)', bygroups(Keyword, Text, Name.Tag)),
            (r'PUBLIC|SYSTEM', Keyword.Constant),
            (r'[\[\]>]', Keyword),
        ],
        'common': [
            (r'\s+', Text),
            (r'(%|&)[^;]*;', Name.Entity),
            ('<!--', Comment, 'comment'),
            (r'[(|)*,?+]', Operator),
            (r'"[^"]*"', String.Double),
            (r'\'[^\']*\'', String.Single),
        ],
        'comment': [
            ('[^-]+', Comment),
            ('-->', Comment, '#pop'),
            ('-', Comment),
        ],
        'element': [
            include('common'),
            (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Tag),
            (r'>', Keyword, '#pop'),
        ],
        'attlist': [
            include('common'),
            (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
             Keyword.Constant),
            (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
            (r'xml:space|xml:lang', Keyword.Reserved),
            (r'[^>\s|()?+*,]+', Name.Attribute),
            (r'>', Keyword, '#pop'),
        ],
        'entity': [
            include('common'),
            (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Entity),
            (r'>', Keyword, '#pop'),
        ],
        'notation': [
            include('common'),
            (r'SYSTEM|PUBLIC', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Attribute),
            (r'>', Keyword, '#pop'),
        ],
    }

    def analyse_text(text):
        if not looks_like_xml(text) and \
           ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
            return 0.8
Exemple #7
0
class ScamlLexer(ExtendedRegexLexer):
    """
    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.

    .. versionadded:: 1.4
    """

    name = 'Scaml'
    aliases = ['scaml']
    filenames = ['*.scaml']
    mimetypes = ['text/x-scaml']

    flags = re.IGNORECASE
    # Scaml does not yet support the " |\n" notation to
    # wrap long lines.  Once it does, use the custom faux
    # dot instead.
    # _dot = r'(?: \|\n(?=.* \|)|.)'
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemple #8
0
class PythonLexer(RegexLexer):
    """
    For `Python <http://www.python.org>`_ source code (version 3.x).

    .. versionadded:: 0.10

    .. versionchanged:: 2.5
       This is now the default ``PythonLexer``.  It is still available as the
       alias ``Python3Lexer``.
    """

    name = 'Python'
    aliases = ['python', 'py', 'sage', 'python3', 'py3']
    filenames = [
        '*.py',
        '*.pyw',
        # Jython
        '*.jy',
        # Sage
        '*.sage',
        # SCons
        '*.sc',
        'SConstruct',
        'SConscript',
        # Skylark/Starlark (used by Bazel, Buck, and Pants)
        '*.bzl',
        'BUCK',
        'BUILD',
        'BUILD.bazel',
        'WORKSPACE',
        # Twisted Application infrastructure
        '*.tac',
    ]
    mimetypes = ['text/x-python', 'application/x-python',
                 'text/x-python3', 'application/x-python3']

    flags = re.MULTILINE | re.UNICODE

    uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting (still valid in Py3)
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
            # the new style '{}'.format(...) string formatting
            (r'\{'
             r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?'  # field name
             r'(\![sra])?'                       # conversion
             r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
             r'\}', String.Interpol),

            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%{\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%|(\{{1,2})', ttype)
            # newlines are an error (use "nl" state)
        ]

    def fstring_rules(ttype):
        return [
            # Assuming that a '}' is the closing brace after format specifier.
            # Sadly, this means that we won't detect syntax error. But it's
            # more important to parse correct syntax correctly, than to
            # highlight invalid syntax.
            (r'\}', String.Interpol),
            (r'\{', String.Interpol, 'expr-inside-fstring'),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"{}\n]+', ttype),
            (r'[\'"\\]', ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
             bygroups(Text, String.Affix, String.Doc)),
            (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
             bygroups(Text, String.Affix, String.Doc)),
            (r'\A#!.+$', Comment.Hashbang),
            (r'#.*$', Comment.Single),
            (r'\\\n', Text),
            (r'\\', Text),
            include('keywords'),
            (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
            (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
             'fromimport'),
            (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
             'import'),
            include('expr'),
        ],
        'expr': [
            # raw f-strings
            ('(?i)(rf|fr)(""")',
             bygroups(String.Affix, String.Double), 'tdqf'),
            ("(?i)(rf|fr)(''')",
             bygroups(String.Affix, String.Single), 'tsqf'),
            ('(?i)(rf|fr)(")',
             bygroups(String.Affix, String.Double), 'dqf'),
            ("(?i)(rf|fr)(')",
             bygroups(String.Affix, String.Single), 'sqf'),
            # non-raw f-strings
            ('([fF])(""")', bygroups(String.Affix, String.Double),
             combined('fstringescape', 'tdqf')),
            ("([fF])(''')", bygroups(String.Affix, String.Single),
             combined('fstringescape', 'tsqf')),
            ('([fF])(")', bygroups(String.Affix, String.Double),
             combined('fstringescape', 'dqf')),
            ("([fF])(')", bygroups(String.Affix, String.Single),
             combined('fstringescape', 'sqf')),
            # raw strings
            ('(?i)(rb|br|r)(""")',
             bygroups(String.Affix, String.Double), 'tdqs'),
            ("(?i)(rb|br|r)(''')",
             bygroups(String.Affix, String.Single), 'tsqs'),
            ('(?i)(rb|br|r)(")',
             bygroups(String.Affix, String.Double), 'dqs'),
            ("(?i)(rb|br|r)(')",
             bygroups(String.Affix, String.Single), 'sqs'),
            # non-raw strings
            ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'tdqs')),
            ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'tsqs')),
            ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'dqs')),
            ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'sqs')),
            (r'[^\S\n]+', Text),
            (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
            (r'[]{}:(),;[]', Punctuation),
            (r'(in|is|and|or|not)\b', Operator.Word),
            include('expr-keywords'),
            include('builtins'),
            include('magicfuncs'),
            include('magicvars'),
            include('name'),
            include('numbers'),
        ],
        'expr-inside-fstring': [
            (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
            # without format specifier
            (r'(=\s*)?'         # debug (https://bugs.python.org/issue36817)
             r'(\![sraf])?'     # conversion
             r'\}', String.Interpol, '#pop'),
            # with format specifier
            # we'll catch the remaining '}' in the outer scope
            (r'(=\s*)?'         # debug (https://bugs.python.org/issue36817)
             r'(\![sraf])?'     # conversion
             r':', String.Interpol, '#pop'),
            (r'\s+', Text),  # allow new lines
            include('expr'),
        ],
        'expr-inside-fstring-inner': [
            (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
            (r'[])}]', Punctuation, '#pop'),
            (r'\s+', Text),  # allow new lines
            include('expr'),
        ],
        'expr-keywords': [
            # Based on https://docs.python.org/3/reference/expressions.html
            (words((
                'async for', 'await', 'else', 'for', 'if', 'lambda',
                'yield', 'yield from'), suffix=r'\b'),
             Keyword),
            (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
        ],
        'keywords': [
            (words((
                'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
                'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
                'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
                'yield from', 'as', 'with'), suffix=r'\b'),
             Keyword),
            (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
        ],
        'builtins': [
            (words((
                '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray',
                'bytes', 'chr', 'classmethod', 'compile', 'complex',
                'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter',
                'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr',
                'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass',
                'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview',
                'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print',
                'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr',
                'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple',
                'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Builtin),
            (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
            (words((
                'ArithmeticError', 'AssertionError', 'AttributeError',
                'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
                'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
                'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
                'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
                'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
                'NotImplementedError', 'OSError', 'OverflowError',
                'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
                'RuntimeError', 'RuntimeWarning', 'StopIteration',
                'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
                'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
                'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
                'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
                'Warning', 'WindowsError', 'ZeroDivisionError',
                # new builtin exceptions from PEP 3151
                'BlockingIOError', 'ChildProcessError', 'ConnectionError',
                'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
                'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
                'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
                'PermissionError', 'ProcessLookupError', 'TimeoutError',
                # others new in Python 3
                'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError'),
                prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Exception),
        ],
        'magicfuncs': [
            (words((
                '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
                '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
                '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
                '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
                '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
                '__ge__', '__get__', '__getattr__', '__getattribute__',
                '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
                '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
                '__imul__', '__index__', '__init__', '__instancecheck__',
                '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
                '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
                '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
                '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
                '__new__', '__next__', '__or__', '__pos__', '__pow__',
                '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
                '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
                '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
                '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
                '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
                '__sub__', '__subclasscheck__', '__truediv__',
                '__xor__'), suffix=r'\b'),
             Name.Function.Magic),
        ],
        'magicvars': [
            (words((
                '__annotations__', '__bases__', '__class__', '__closure__',
                '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
                '__func__', '__globals__', '__kwdefaults__', '__module__',
                '__mro__', '__name__', '__objclass__', '__qualname__',
                '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
             Name.Variable.Magic),
        ],
        'numbers': [
            (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
             r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
            (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
            (r'0[oO](?:_?[0-7])+', Number.Oct),
            (r'0[bB](?:_?[01])+', Number.Bin),
            (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
            (r'\d(?:_?\d)*', Number.Integer),
        ],
        'name': [
            (r'@' + uni_name, Name.Decorator),
            (r'@', Operator),  # new matrix multiplication operator
            (uni_name, Name),
        ],
        'funcname': [
            include('magicfuncs'),
            (uni_name, Name.Function, '#pop'),
            default('#pop'),
        ],
        'classname': [
            (uni_name, Name.Class, '#pop'),
        ],
        'import': [
            (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
            (r'\.', Name.Namespace),
            (uni_name, Name.Namespace),
            (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
            default('#pop')  # all else: go back
        ],
        'fromimport': [
            (r'(\s+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
            (r'\.', Name.Namespace),
            # if None occurs here, it's "raise x from None", since None can
            # never be a module name
            (r'None\b', Name.Builtin.Pseudo, '#pop'),
            (uni_name, Name.Namespace),
            default('#pop'),
        ],
        'fstringescape': [
            (r'\{\{', String.Escape),
            (r'\}\}', String.Escape),
            include('stringescape'),
        ],
        'stringescape': [
            (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
        ],
        'fstrings-single': fstring_rules(String.Single),
        'fstrings-double': fstring_rules(String.Double),
        'strings-single': innerstring_rules(String.Single),
        'strings-double': innerstring_rules(String.Double),
        'dqf': [
            (r'"', String.Double, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include('fstrings-double')
        ],
        'sqf': [
            (r"'", String.Single, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include('fstrings-single')
        ],
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include('strings-double')
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include('strings-single')
        ],
        'tdqf': [
            (r'"""', String.Double, '#pop'),
            include('fstrings-double'),
            (r'\n', String.Double)
        ],
        'tsqf': [
            (r"'''", String.Single, '#pop'),
            include('fstrings-single'),
            (r'\n', String.Single)
        ],
        'tdqs': [
            (r'"""', String.Double, '#pop'),
            include('strings-double'),
            (r'\n', String.Double)
        ],
        'tsqs': [
            (r"'''", String.Single, '#pop'),
            include('strings-single'),
            (r'\n', String.Single)
        ],
    }

    def analyse_text(text):
        return shebang_matches(text, r'pythonw?(3(\.\d)?)?')
Exemple #9
0
class Python2Lexer(RegexLexer):
    """
    For `Python 2.x <http://www.python.org>`_ source code.

    .. versionchanged:: 2.5
       This class has been renamed from ``PythonLexer``.  ``PythonLexer`` now
       refers to the Python 3 variant.  File name patterns like ``*.py`` have
       been moved to Python 3 as well.
    """

    name = 'Python 2.x'
    aliases = ['python2', 'py2']
    filenames = []  # now taken over by PythonLexer (3.x)
    mimetypes = ['text/x-python2', 'application/x-python2']

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%', ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
             bygroups(Text, String.Affix, String.Doc)),
            (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
             bygroups(Text, String.Affix, String.Doc)),
            (r'[^\S\n]+', Text),
            (r'\A#!.+$', Comment.Hashbang),
            (r'#.*$', Comment.Single),
            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
            include('keywords'),
            (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
            (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
             'fromimport'),
            (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
             'import'),
            include('builtins'),
            include('magicfuncs'),
            include('magicvars'),
            include('backtick'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
             bygroups(String.Affix, String.Double), 'tdqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
             bygroups(String.Affix, String.Single), 'tsqs'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
             bygroups(String.Affix, String.Double), 'dqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
             bygroups(String.Affix, String.Single), 'sqs'),
            ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'tdqs')),
            ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'tsqs')),
            ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'dqs')),
            ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
        ],
        'keywords': [
            (words((
                'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
                'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
                'print', 'raise', 'return', 'try', 'while', 'yield',
                'yield from', 'as', 'with'), suffix=r'\b'),
             Keyword),
        ],
        'builtins': [
            (words((
                '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
                'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
                'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
                'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
                'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
                'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
                'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
                'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
                'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
                'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
                'unichr', 'unicode', 'vars', 'xrange', 'zip'),
                prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Builtin),
            (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
             r')\b', Name.Builtin.Pseudo),
            (words((
                'ArithmeticError', 'AssertionError', 'AttributeError',
                'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
                'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
                'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
                'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
                'MemoryError', 'NameError',
                'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
                'PendingDeprecationWarning', 'ReferenceError',
                'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
                'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
                'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
                'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
                'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
                'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Exception),
        ],
        'magicfuncs': [
            (words((
                '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
                '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
                '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
                '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
                '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
                '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
                '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
                '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
                '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
                '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
                '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
                '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
                '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
                '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
                '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
                '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
                '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
                '__unicode__', '__xor__'), suffix=r'\b'),
             Name.Function.Magic),
        ],
        'magicvars': [
            (words((
                '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
                '__dict__', '__doc__', '__file__', '__func__', '__globals__',
                '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
                '__slots__', '__weakref__'),
                suffix=r'\b'),
             Name.Variable.Magic),
        ],
        'numbers': [
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
            (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
            (r'0[0-7]+j?', Number.Oct),
            (r'0[bB][01]+', Number.Bin),
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+j?', Number.Integer)
        ],
        'backtick': [
            ('`.*?`', String.Backtick),
        ],
        'name': [
            (r'@[\w.]+', Name.Decorator),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'funcname': [
            include('magicfuncs'),
            (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
            default('#pop'),
        ],
        'classname': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop')
        ],
        'import': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'as\b', Keyword.Namespace),
            (r',', Operator),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            default('#pop')  # all else: go back
        ],
        'fromimport': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'import\b', Keyword.Namespace, '#pop'),
            # if None occurs here, it's "raise x from None", since None can
            # never be a module name
            (r'None\b', Name.Builtin.Pseudo, '#pop'),
            # sadly, in "raise x from y" y will be highlighted as namespace too
            (r'[a-zA-Z_.][\w.]*', Name.Namespace),
            # anything else here also means "raise x from y" and is therefore
            # not an error
            default('#pop'),
        ],
        'stringescape': [
            (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
        ],
        'strings-single': innerstring_rules(String.Single),
        'strings-double': innerstring_rules(String.Double),
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include('strings-double')
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include('strings-single')
        ],
        'tdqs': [
            (r'"""', String.Double, '#pop'),
            include('strings-double'),
            (r'\n', String.Double)
        ],
        'tsqs': [
            (r"'''", String.Single, '#pop'),
            include('strings-single'),
            (r'\n', String.Single)
        ],
    }

    def analyse_text(text):
        return shebang_matches(text, r'pythonw?2(\.\d)?') or \
            'import ' in text[:1000]
Exemple #10
0
class RstLexer(RegexLexer):
    """
    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.

    .. versionadded:: 0.7

    Additional options accepted:

    `handlecodeblocks`
        Highlight the contents of ``.. sourcecode:: language``,
        ``.. code:: language`` and ``.. code-block:: language``
        directives with a lexer for the given language (default:
        ``True``).

        .. versionadded:: 0.8
    """
    name = 'reStructuredText'
    aliases = ['rst', 'rest', 'restructuredtext']
    filenames = ['*.rst', '*.rest']
    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
    flags = re.MULTILINE

    def _handle_sourcecode(self, match):
        from typecode._vendor.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item

    # from docutils.parsers.rst.states
    closers = u'\'")]}>\u2019\u201d\xbb!?'
    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' %
                         (re.escape(unicode_delimiters), re.escape(closers)))

    tokens = {
        'root': [
            # Heading with overline
            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
             r'(.+)(\n)(\1)(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text,
                      Generic.Heading, Text)),
            # Plain heading
            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
            # Bulleted lists
            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered lists
            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered, but keep words at BOL from becoming lists
            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Line blocks
            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
             bygroups(Text, Operator, using(this, state='inline'))),
            # Sourcecode directives
            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
             _handle_sourcecode),
            # A directive
            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
                      using(this, state='inline'))),
            # A reference target
            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A footnote/citation target
            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A substitution def
            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
                      Punctuation, Text, using(this, state='inline'))),
            # Comments
            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
            # Field list
            (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
            (r'^( *)(:.*?:)([ \t]+)(.*?)$',
             bygroups(Text, Name.Class, Text, Name.Function)),
            # Definition list
            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
             bygroups(using(this, state='inline'), using(this,
                                                         state='inline'))),
            # Code blocks
            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
             bygroups(String.Escape, Text, String, String, Text, String)),
            include('inline'),
        ],
        'inline': [
            (r'\\.', Text),  # escape
            (r'``', String, 'literal'),  # code
            (
                r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
                bygroups(String, String.Interpol, String)),
            (r'`.+?`__?', String),  # reference
            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
             bygroups(Name.Variable, Name.Attribute)),  # role
            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
            (r'\*.+?\*', Generic.Emph),  # Emphasis
            (r'\[.*?\]_', String),  # Footnote or citation
            (r'<.+?>', Name.Tag),  # Hyperlink
            (r'[^\\\n\[*`:]+', Text),
            (r'.', Text),
        ],
        'literal': [
            (r'[^`]+', String),
            (r'``' + end_string_suffix, String, '#pop'),
            (r'`', String),
        ]
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)

    def analyse_text(text):
        if text[:2] == '..' and text[2:3] != '.':
            return 0.3
        p1 = text.find("\n")
        p2 = text.find("\n", p1 + 1)
        if (p2 > -1 and  # has two lines
                p1 * 2 + 1 == p2 and  # they are the same length
                text[p1 + 1] in '-='
                and  # the next line both starts and ends with
                text[p1 + 1] == text[p2 - 1]):  # ...a sufficiently high header
            return 0.5
Exemple #11
0
class MarkdownLexer(RegexLexer):
    """
    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.

    .. versionadded:: 2.2
    """
    name = 'markdown'
    aliases = ['md']
    filenames = ['*.md']
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from typecode._vendor.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)

    tokens = {
        'root': [
            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
            # task list
            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
             bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)([0-9]+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            # code block with language
            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # italics
            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph,
                                                      Text)),
            # bold
            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
            (r'(\s)((\*\*|__).*\3)((?=\W|\n))',
             bygroups(Text, Generic.Strong, None, Text)),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
            # strikethrough
            (r'(\s)(~~[^~]+~~)((?=\W|\n))',
             bygroups(Text, Generic.Deleted, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r'[@#][\w/:]+', Name.Entity),
            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
             bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
            # reference-style links, e.g.:
            #   [an example][id]
            #   [id]: http://example.com/
            (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
             bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
            (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
             bygroups(Text, Name.Label, Text, Name.Attribute)),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)
Exemple #12
0
class FantomLexer(RegexLexer):
    """
    For Fantom source code.

    .. versionadded:: 1.5
    """
    name = 'Fantom'
    aliases = ['fan']
    filenames = ['*.fan']
    mimetypes = ['application/x-fantom']

    # often used regexes
    def s(str):
        return Template(str).substitute(
            dict(
                pod=r'[\"\w\.]+',
                eos=r'\n|;',
                id=r'[a-zA-Z_]\w*',
                # all chars which can be part of type definition. Starts with
                # either letter, or [ (maps), or | (funcs)
                type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?',
            ))

    tokens = {
        'comments': [
            (r'(?s)/\*.*?\*/', Comment.Multiline),  # Multiline
            (r'//.*?\n', Comment.Single),  # Single line
            # TODO: highlight references in fandocs
            (r'\*\*.*?\n', Comment.Special),  # Fandoc
            (r'#.*\n', Comment.Single)  # Shell-style
        ],
        'literals': [
            (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number),  # Duration
            (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)',
             Number),  # Duration with dot
            (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float),  # Float/Decimal
            (r'\b-?0x[0-9a-fA-F_]+', Number.Hex),  # Hex
            (r'\b-?[\d_]+', Number.Integer),  # Int
            (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),  # Char
            (r'"', Punctuation, 'insideStr'),  # Opening quote
            (r'`', Punctuation, 'insideUri'),  # Opening accent
            (r'\b(true|false|null)\b', Keyword.Constant),  # Bool & null
            (
                r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)',  # DSL
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         String, Punctuation)),
            (
                r'(?:(\w+)(::))?(\w+)?(#)(\w+)?',  # Type/slot literal
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         Name.Function)),
            (r'\[,\]', Literal),  # Empty list
            (
                s(r'($type)(\[,\])'),  # Typed empty list
                bygroups(using(this, state='inType'), Literal)),
            (r'\[:\]', Literal),  # Empty Map
            (s(r'($type)(\[:\])'),
             bygroups(using(this, state='inType'), Literal)),
        ],
        'insideStr': [
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'"', Punctuation, '#pop'),  # Closing quot
            (r'.', String)  # String content
        ],
        'insideUri': [  # TODO: remove copy/paste str/uri
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'`', Punctuation, '#pop'),  # Closing tick
            (r'.', String.Backtick)  # URI content
        ],
        'protectionKeywords': [
            (r'\b(public|protected|private|internal)\b', Keyword),
        ],
        'typeKeywords': [
            (r'\b(abstract|final|const|native|facet|enum)\b', Keyword),
        ],
        'methodKeywords': [
            (r'\b(abstract|native|once|override|static|virtual|final)\b',
             Keyword),
        ],
        'fieldKeywords':
        [(r'\b(abstract|const|final|native|override|static|virtual|'
          r'readonly)\b', Keyword)],
        'otherKeywords': [
            (words(('try', 'catch', 'throw', 'finally', 'for', 'if', 'else',
                    'while', 'as', 'is', 'isnot', 'switch', 'case', 'default',
                    'continue', 'break', 'do', 'return', 'get', 'set'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (r'\b(it|this|super)\b', Name.Builtin.Pseudo),
        ],
        'operators':
        [(r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator)],
        'inType': [
            (r'[\[\]|\->:?]', Punctuation),
            (s(r'$id'), Name.Class),
            default('#pop'),
        ],
        'root': [
            include('comments'),
            include('protectionKeywords'),
            include('typeKeywords'),
            include('methodKeywords'),
            include('fieldKeywords'),
            include('literals'),
            include('otherKeywords'),
            include('operators'),
            (r'using\b', Keyword.Namespace, 'using'),  # Using stmt
            (r'@\w+', Name.Decorator, 'facet'),  # Symbol
            (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class),
             'inheritance'),  # Inheritance list

            # Type var := val
            (s(r'($type)([ \t]+)($id)(\s*)(:=)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Operator)),

            # var := val
            (s(r'($id)(\s*)(:=)'), bygroups(Name.Variable, Text, Operator)),

            # .someId( or ->someId( ###
            (s(r'(\.|(?:\->))($id)(\s*)(\()'),
             bygroups(Operator, Name.Function, Text,
                      Punctuation), 'insideParen'),

            # .someId  or ->someId
            (s(r'(\.|(?:\->))($id)'), bygroups(Operator, Name.Function)),

            # new makeXXX (
            (r'(new)(\s+)(make\w*)(\s*)(\()',
             bygroups(Keyword, Text, Name.Function, Text,
                      Punctuation), 'insideMethodDeclArgs'),

            # Type name (
            (
                s(r'($type)([ \t]+)'  # Return type and whitespace
                  r'($id)(\s*)(\()'),  # method name + open brace
                bygroups(using(this, state='inType'), Text, Name.Function,
                         Text, Punctuation),
                'insideMethodDeclArgs'),

            # ArgType argName,
            (s(r'($type)(\s+)($id)(\s*)(,)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # ArgType argName)
            # Covered in 'insideParen' state

            # ArgType argName -> ArgType|
            (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'),
             bygroups(using(this, state='inType'), Text,
                      Name.Variable, Text, Punctuation, Text,
                      using(this, state='inType'), Punctuation)),

            # ArgType argName|
            (s(r'($type)(\s+)($id)(\s*)(\|)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # Type var
            (s(r'($type)([ \t]+)($id)'),
             bygroups(using(this, state='inType'), Text, Name.Variable)),
            (r'\(', Punctuation, 'insideParen'),
            (r'\{', Punctuation, 'insideBrace'),
            (r'.', Text)
        ],
        'insideParen': [
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
        'insideMethodDeclArgs': [
            (r'\)', Punctuation, '#pop'),
            (s(r'($type)(\s+)($id)(\s*)(\))'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation), '#pop'),
            include('root'),
        ],
        'insideBrace': [
            (r'\}', Punctuation, '#pop'),
            include('root'),
        ],
        'inheritance': [
            (r'\s+', Text),  # Whitespace
            (r':|,', Punctuation),
            (r'(?:(\w+)(::))?(\w+)',
             bygroups(Name.Namespace, Punctuation, Name.Class)),
            (r'\{', Punctuation, '#pop')
        ],
        'using': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(\[)(\w+)(\])',
             bygroups(Punctuation, Comment.Special, Punctuation)),  # ffi
            (r'(\")?([\w.]+)(\")?',
             bygroups(Punctuation, Name.Namespace, Punctuation)),  # podname
            (r'::', Punctuation, 'usingClass'),
            default('#pop')
        ],
        'usingClass': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text,
                                         Name.Class), '#pop:2'),
            (r'[\w$]+', Name.Class),
            default('#pop:2')  # jump out to root state
        ],
        'facet': [(r'\s+', Text), (r'\{', Punctuation, 'facetFields'),
                  default('#pop')],
        'facetFields': [
            include('comments'),
            include('literals'),
            include('operators'), (r'\s+', Text),
            (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)),
            (r'\}', Punctuation, '#pop'), (r'.', Text)
        ],
    }
Exemple #13
0
class DLexer(RegexLexer):
    """
    For D source.

    .. versionadded:: 1.2
    """
    name = 'D'
    filenames = ['*.d', '*.di']
    aliases = ['d']
    mimetypes = ['text/x-dsrc']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'\s+', Text),
            # (r'\\\n', Text), # line continuations
            # Comments
            (r'//(.*?)\n', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
            (r'/\+', Comment.Multiline, 'nested_comment'),
            # Keywords
            (words(
                ('abstract', 'alias', 'align', 'asm', 'assert', 'auto', 'body',
                 'break', 'case', 'cast', 'catch', 'class', 'const',
                 'continue', 'debug', 'default', 'delegate', 'delete',
                 'deprecated', 'do', 'else', 'enum', 'export', 'extern',
                 'finally', 'final', 'foreach_reverse', 'foreach', 'for',
                 'function', 'goto', 'if', 'immutable', 'import', 'interface',
                 'invariant', 'inout', 'in', 'is', 'lazy', 'mixin', 'module',
                 'new', 'nothrow', 'out', 'override', 'package', 'pragma',
                 'private', 'protected', 'public', 'pure', 'ref', 'return',
                 'scope', 'shared', 'static', 'struct', 'super', 'switch',
                 'synchronized', 'template', 'this', 'throw', 'try', 'typedef',
                 'typeid', 'typeof', 'union', 'unittest', 'version',
                 'volatile', 'while', 'with', '__gshared', '__traits',
                 '__vector', '__parameters'),
                suffix=r'\b'), Keyword),
            (words(('bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char',
                    'creal', 'dchar', 'double', 'float', 'idouble', 'ifloat',
                    'int', 'ireal', 'long', 'real', 'short', 'ubyte', 'ucent',
                    'uint', 'ulong', 'ushort', 'void', 'wchar'),
                   suffix=r'\b'), Keyword.Type),
            (r'(false|true|null)\b', Keyword.Constant),
            (words(('__FILE__', '__MODULE__', '__LINE__', '__FUNCTION__',
                    '__PRETTY_FUNCTION__'
                    '', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__',
                    '__VENDOR__', '__VERSION__'),
                   suffix=r'\b'), Keyword.Pseudo),
            (r'macro\b', Keyword.Reserved),
            (r'(string|wstring|dstring|size_t|ptrdiff_t)\b', Name.Builtin),
            # FloatLiteral
            # -- HexFloat
            (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)'
             r'[pP][+\-]?[0-9_]+[fFL]?[i]?', Number.Float),
            # -- DecimalFloat
            (r'[0-9_]+(\.[0-9_]+[eE][+\-]?[0-9_]+|'
             r'\.[0-9_]*|[eE][+\-]?[0-9_]+)[fFL]?[i]?', Number.Float),
            (r'\.(0|[1-9][0-9_]*)([eE][+\-]?[0-9_]+)?[fFL]?[i]?',
             Number.Float),
            # IntegerLiteral
            # -- Binary
            (r'0[Bb][01_]+', Number.Bin),
            # -- Octal
            (r'0[0-7_]+', Number.Oct),
            # -- Hexadecimal
            (r'0[xX][0-9a-fA-F_]+', Number.Hex),
            # -- Decimal
            (r'(0|[1-9][0-9_]*)([LUu]|Lu|LU|uL|UL)?', Number.Integer),
            # CharacterLiteral
            (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}"""
             r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\&\w+;|.)'""",
             String.Char),
            # StringLiteral
            # -- WysiwygString
            (r'r"[^"]*"[cwd]?', String),
            # -- AlternateWysiwygString
            (r'`[^`]*`[cwd]?', String),
            # -- DoubleQuotedString
            (r'"(\\\\|\\"|[^"])*"[cwd]?', String),
            # -- EscapeSequence
            (r"\\(['\"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}"
             r"|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)", String),
            # -- HexString
            (r'x"[0-9a-fA-F_\s]*"[cwd]?', String),
            # -- DelimitedString
            (r'q"\[', String, 'delimited_bracket'),
            (r'q"\(', String, 'delimited_parenthesis'),
            (r'q"<', String, 'delimited_angle'),
            (r'q"\{', String, 'delimited_curly'),
            (r'q"([a-zA-Z_]\w*)\n.*?\n\1"', String),
            (r'q"(.).*?\1"', String),
            # -- TokenString
            (r'q\{', String, 'token_string'),
            # Attributes
            (r'@([a-zA-Z_]\w*)?', Name.Decorator),
            # Tokens
            (r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>='
             r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)'
             r'|[/.&|\-+<>!()\[\]{}?,;:$=*%^~]', Punctuation),
            # Identifier
            (r'[a-zA-Z_]\w*', Name),
            # Line
            (r'#line\s.*\n', Comment.Special),
        ],
        'nested_comment': [
            (r'[^+/]+', Comment.Multiline),
            (r'/\+', Comment.Multiline, '#push'),
            (r'\+/', Comment.Multiline, '#pop'),
            (r'[+/]', Comment.Multiline),
        ],
        'token_string': [
            (r'\{', Punctuation, 'token_string_nest'),
            (r'\}', String, '#pop'),
            include('root'),
        ],
        'token_string_nest': [
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
            include('root'),
        ],
        'delimited_bracket': [
            (r'[^\[\]]+', String),
            (r'\[', String, 'delimited_inside_bracket'),
            (r'\]"', String, '#pop'),
        ],
        'delimited_inside_bracket': [
            (r'[^\[\]]+', String),
            (r'\[', String, '#push'),
            (r'\]', String, '#pop'),
        ],
        'delimited_parenthesis': [
            (r'[^()]+', String),
            (r'\(', String, 'delimited_inside_parenthesis'),
            (r'\)"', String, '#pop'),
        ],
        'delimited_inside_parenthesis': [
            (r'[^()]+', String),
            (r'\(', String, '#push'),
            (r'\)', String, '#pop'),
        ],
        'delimited_angle': [
            (r'[^<>]+', String),
            (r'<', String, 'delimited_inside_angle'),
            (r'>"', String, '#pop'),
        ],
        'delimited_inside_angle': [
            (r'[^<>]+', String),
            (r'<', String, '#push'),
            (r'>', String, '#pop'),
        ],
        'delimited_curly': [
            (r'[^{}]+', String),
            (r'\{', String, 'delimited_inside_curly'),
            (r'\}"', String, '#pop'),
        ],
        'delimited_inside_curly': [
            (r'[^{}]+', String),
            (r'\{', String, '#push'),
            (r'\}', String, '#pop'),
        ],
    }
Exemple #14
0
class NSISLexer(RegexLexer):
    """
    For `NSIS <http://nsis.sourceforge.net/>`_ scripts.

    .. versionadded:: 1.6
    """
    name = 'NSIS'
    aliases = ['nsis', 'nsi', 'nsh']
    filenames = ['*.nsi', '*.nsh']
    mimetypes = ['text/x-nsis']

    flags = re.IGNORECASE

    tokens = {
        'root': [
            (r'[;#].*\n', Comment),
            (r"'.*?'", String.Single),
            (r'"', String.Double, 'str_double'),
            (r'`', String.Backtick, 'str_backtick'),
            include('macro'),
            include('interpol'),
            include('basic'),
            (r'\$\{[a-z_|][\w|]*\}', Keyword.Pseudo),
            (r'/[a-z_]\w*', Name.Attribute),
            ('.', Text),
        ],
        'basic': [
            (r'(\n)(Function)(\s+)([._a-z][.\w]*)\b',
             bygroups(Text, Keyword, Text, Name.Function)),
            (r'\b([_a-z]\w*)(::)([a-z][a-z0-9]*)\b',
             bygroups(Keyword.Namespace, Punctuation, Name.Function)),
            (r'\b([_a-z]\w*)(:)', bygroups(Name.Label, Punctuation)),
            (r'(\b[ULS]|\B)([!<>=]?=|\<\>?|\>)\B', Operator),
            (r'[|+-]', Operator),
            (r'\\', Punctuation),
            (r'\b(Abort|Add(?:BrandingImage|Size)|'
             r'Allow(?:RootDirInstall|SkipFiles)|AutoCloseWindow|'
             r'BG(?:Font|Gradient)|BrandingText|BringToFront|Call(?:InstDLL)?|'
             r'(?:Sub)?Caption|ChangeUI|CheckBitmap|ClearErrors|CompletedText|'
             r'ComponentText|CopyFiles|CRCCheck|'
             r'Create(?:Directory|Font|Shortcut)|Delete(?:INI(?:Sec|Str)|'
             r'Reg(?:Key|Value))?|DetailPrint|DetailsButtonText|'
             r'Dir(?:Show|Text|Var|Verify)|(?:Disabled|Enabled)Bitmap|'
             r'EnableWindow|EnumReg(?:Key|Value)|Exch|Exec(?:Shell|Wait)?|'
             r'ExpandEnvStrings|File(?:BufSize|Close|ErrorText|Open|'
             r'Read(?:Byte)?|Seek|Write(?:Byte)?)?|'
             r'Find(?:Close|First|Next|Window)|FlushINI|Function(?:End)?|'
             r'Get(?:CurInstType|CurrentAddress|DlgItem|DLLVersion(?:Local)?|'
             r'ErrorLevel|FileTime(?:Local)?|FullPathName|FunctionAddress|'
             r'InstDirError|LabelAddress|TempFileName)|'
             r'Goto|HideWindow|Icon|'
             r'If(?:Abort|Errors|FileExists|RebootFlag|Silent)|'
             r'InitPluginsDir|Install(?:ButtonText|Colors|Dir(?:RegKey)?)|'
             r'Inst(?:ProgressFlags|Type(?:[GS]etText)?)|Int(?:CmpU?|Fmt|Op)|'
             r'IsWindow|LangString(?:UP)?|'
             r'License(?:BkColor|Data|ForceSelection|LangString|Text)|'
             r'LoadLanguageFile|LockWindow|Log(?:Set|Text)|MessageBox|'
             r'MiscButtonText|Name|Nop|OutFile|(?:Uninst)?Page(?:Ex(?:End)?)?|'
             r'PluginDir|Pop|Push|Quit|Read(?:(?:Env|INI|Reg)Str|RegDWORD)|'
             r'Reboot|(?:Un)?RegDLL|Rename|RequestExecutionLevel|ReserveFile|'
             r'Return|RMDir|SearchPath|Section(?:Divider|End|'
             r'(?:(?:Get|Set)(?:Flags|InstTypes|Size|Text))|Group(?:End)?|In)?|'
             r'SendMessage|Set(?:AutoClose|BrandingImage|Compress(?:ionLevel|'
             r'or(?:DictSize)?)?|CtlColors|CurInstType|DatablockOptimize|'
             r'DateSave|Details(?:Print|View)|Error(?:s|Level)|FileAttributes|'
             r'Font|OutPath|Overwrite|PluginUnload|RebootFlag|ShellVarContext|'
             r'Silent|StaticBkColor)|'
             r'Show(?:(?:I|Uni)nstDetails|Window)|Silent(?:Un)?Install|Sleep|'
             r'SpaceTexts|Str(?:CmpS?|Cpy|Len)|SubSection(?:End)?|'
             r'Uninstall(?:ButtonText|(?:Sub)?Caption|EXEName|Icon|Text)|'
             r'UninstPage|Var|VI(?:AddVersionKey|ProductVersion)|WindowIcon|'
             r'Write(?:INIStr|Reg(:?Bin|DWORD|(?:Expand)?Str)|Uninstaller)|'
             r'XPStyle)\b', Keyword),
            (r'\b(CUR|END|(?:FILE_ATTRIBUTE_)?'
             r'(?:ARCHIVE|HIDDEN|NORMAL|OFFLINE|READONLY|SYSTEM|TEMPORARY)|'
             r'HK(CC|CR|CU|DD|LM|PD|U)|'
             r'HKEY_(?:CLASSES_ROOT|CURRENT_(?:CONFIG|USER)|DYN_DATA|'
             r'LOCAL_MACHINE|PERFORMANCE_DATA|USERS)|'
             r'ID(?:ABORT|CANCEL|IGNORE|NO|OK|RETRY|YES)|'
             r'MB_(?:ABORTRETRYIGNORE|DEFBUTTON[1-4]|'
             r'ICON(?:EXCLAMATION|INFORMATION|QUESTION|STOP)|'
             r'OK(?:CANCEL)?|RETRYCANCEL|RIGHT|SETFOREGROUND|TOPMOST|USERICON|'
             r'YESNO(?:CANCEL)?)|SET|SHCTX|'
             r'SW_(?:HIDE|SHOW(?:MAXIMIZED|MINIMIZED|NORMAL))|'
             r'admin|all|auto|both|bottom|bzip2|checkbox|colored|current|false|'
             r'force|hide|highest|if(?:diff|newer)|lastused|leave|left|'
             r'listonly|lzma|nevershow|none|normal|off|on|pop|push|'
             r'radiobuttons|right|show|silent|silentlog|smooth|textonly|top|'
             r'true|try|user|zlib)\b', Name.Constant),
        ],
        'macro': [
            (r'\!(addincludedir(?:dir)?|addplugindir|appendfile|cd|define|'
             r'delfilefile|echo(?:message)?|else|endif|error|execute|'
             r'if(?:macro)?n?(?:def)?|include|insertmacro|macro(?:end)?|packhdr|'
             r'search(?:parse|replace)|system|tempfilesymbol|undef|verbose|'
             r'warning)\b', Comment.Preproc),
        ],
        'interpol': [
            (r'\$(R?[0-9])', Name.Builtin.Pseudo),    # registers
            (r'\$(ADMINTOOLS|APPDATA|CDBURN_AREA|COOKIES|COMMONFILES(?:32|64)|'
             r'DESKTOP|DOCUMENTS|EXE(?:DIR|FILE|PATH)|FAVORITES|FONTS|HISTORY|'
             r'HWNDPARENT|INTERNET_CACHE|LOCALAPPDATA|MUSIC|NETHOOD|PICTURES|'
             r'PLUGINSDIR|PRINTHOOD|PROFILE|PROGRAMFILES(?:32|64)|QUICKLAUNCH|'
             r'RECENT|RESOURCES(?:_LOCALIZED)?|SENDTO|SM(?:PROGRAMS|STARTUP)|'
             r'STARTMENU|SYSDIR|TEMP(?:LATES)?|VIDEOS|WINDIR|\{NSISDIR\})',
             Name.Builtin),
            (r'\$(CMDLINE|INSTDIR|OUTDIR|LANGUAGE)', Name.Variable.Global),
            (r'\$[a-z_]\w*', Name.Variable),
        ],
        'str_double': [
            (r'"', String, '#pop'),
            (r'\$(\\[nrt"]|\$)', String.Escape),
            include('interpol'),
            (r'.', String.Double),
        ],
        'str_backtick': [
            (r'`', String, '#pop'),
            (r'\$(\\[nrt"]|\$)', String.Escape),
            include('interpol'),
            (r'.', String.Double),
        ],
    }
Exemple #15
0
class SLexer(RegexLexer):
    """
    For S, S-plus, and R source code.

    .. versionadded:: 0.10
    """

    name = 'S'
    aliases = ['splus', 's', 'r']
    filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
    mimetypes = [
        'text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 'text/x-R',
        'text/x-r-history', 'text/x-r-profile'
    ]

    valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
    tokens = {
        'comments': [
            (r'#.*$', Comment.Single),
        ],
        'valid_name': [
            (valid_name, Name),
        ],
        'punctuation': [
            (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
        ],
        'keywords': [
            (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
             r'(?![\w.])', Keyword.Reserved),
        ],
        'operators': [
            (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
            (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
        ],
        'builtin_symbols': [
            (r'(NULL|NA(_(integer|real|complex|character)_)?|'
             r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
             r'(?![\w.])', Keyword.Constant),
            (r'(T|F)\b', Name.Builtin.Pseudo),
        ],
        'numbers': [
            # hex number
            (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
            # decimal number
            (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
             Number),
        ],
        'statements': [
            include('comments'),
            # whitespaces
            (r'\s+', Text),
            (r'\'', String, 'string_squote'),
            (r'\"', String, 'string_dquote'),
            include('builtin_symbols'),
            include('valid_name'),
            include('numbers'),
            include('keywords'),
            include('punctuation'),
            include('operators'),
        ],
        'root': [
            # calls:
            (r'(%s)\s*(?=\()' % valid_name, Name.Function),
            include('statements'),
            # blocks:
            (r'\{|\}', Punctuation),
            # (r'\{', Punctuation, 'block'),
            (r'.', Text),
        ],
        # 'block': [
        #    include('statements'),
        #    ('\{', Punctuation, '#push'),
        #    ('\}', Punctuation, '#pop')
        # ],
        'string_squote': [
            (r'([^\'\\]|\\.)*\'', String, '#pop'),
        ],
        'string_dquote': [
            (r'([^"\\]|\\.)*"', String, '#pop'),
        ],
    }

    def analyse_text(text):
        if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
            return 0.11
Exemple #16
0
class CythonLexer(RegexLexer):
    """
    For Pyrex and `Cython <http://cython.org>`_ source code.

    .. versionadded:: 1.1
    """

    name = 'Cython'
    aliases = ['cython', 'pyx', 'pyrex']
    filenames = ['*.pyx', '*.pxd', '*.pxi']
    mimetypes = ['text/x-cython', 'application/x-cython']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
            (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
            (r'[^\S\n]+', Text),
            (r'#.*$', Comment),
            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'(<)([a-zA-Z0-9.?]+)(>)',
             bygroups(Punctuation, Keyword.Type, Punctuation)),
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
            (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
             bygroups(Keyword, Number.Integer, Operator, Name, Operator,
                      Name, Punctuation)),
            include('keywords'),
            (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
            # (should actually start a block with only cdefs)
            (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
            (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
            (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
            include('builtins'),
            include('backtick'),
            ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
            ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
            ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
            ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
            ('[uU]?"""', String, combined('stringescape', 'tdqs')),
            ("[uU]?'''", String, combined('stringescape', 'tsqs')),
            ('[uU]?"', String, combined('stringescape', 'dqs')),
            ("[uU]?'", String, combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
        ],
        'keywords': [
            (words((
                'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
                'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
                'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
                'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
             Keyword),
            (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
        ],
        'builtins': [
            (words((
                '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
                'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr',
                'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
                'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit',
                'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
                'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
                'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
                'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property',
                'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
                'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
                'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned',
                'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Builtin),
            (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
             r')\b', Name.Builtin.Pseudo),
            (words((
                'ArithmeticError', 'AssertionError', 'AttributeError',
                'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
                'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
                'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
                'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
                'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError',
                'OSError', 'OverflowError', 'OverflowWarning',
                'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
                'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
                'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
                'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
                'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
                'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
                'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Exception),
        ],
        'numbers': [
            (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
            (r'0\d+', Number.Oct),
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+', Number.Integer)
        ],
        'backtick': [
            ('`.*?`', String.Backtick),
        ],
        'name': [
            (r'@\w+', Name.Decorator),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'funcname': [
            (r'[a-zA-Z_]\w*', Name.Function, '#pop')
        ],
        'cdef': [
            (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
            (r'(struct|enum|union|class)\b', Keyword),
            (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)',
             bygroups(Name.Function, Text), '#pop'),
            (r'([a-zA-Z_]\w*)(\s*)(,)',
             bygroups(Name.Function, Text, Punctuation)),
            (r'from\b', Keyword, '#pop'),
            (r'as\b', Keyword),
            (r':', Punctuation, '#pop'),
            (r'(?=["\'])', Text, '#pop'),
            (r'[a-zA-Z_]\w*', Keyword.Type),
            (r'.', Text),
        ],
        'classname': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop')
        ],
        'import': [
            (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
            default('#pop')  # all else: go back
        ],
        'fromimport': [
            (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
            (r'[a-zA-Z_.][\w.]*', Name.Namespace),
            # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
            default('#pop'),
        ],
        'stringescape': [
            (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
        ],
        'strings': [
            (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            (r'[^\\\'"%\n]+', String),
            # quotes, percents and backslashes must be parsed one at a time
            (r'[\'"\\]', String),
            # unhandled string formatting sign
            (r'%', String)
            # newlines are an error (use "nl" state)
        ],
        'nl': [
            (r'\n', String)
        ],
        'dqs': [
            (r'"', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here again for raw strings
            include('strings')
        ],
        'sqs': [
            (r"'", String, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here again for raw strings
            include('strings')
        ],
        'tdqs': [
            (r'"""', String, '#pop'),
            include('strings'),
            include('nl')
        ],
        'tsqs': [
            (r"'''", String, '#pop'),
            include('strings'),
            include('nl')
        ],
    }
Exemple #17
0
class RideLexer(RegexLexer):
    """
    For `Ride <https://docs.wavesplatform.com/en/ride/about-ride.html>`_
    source code.

    .. versionadded:: 2.6
    """

    name = 'Ride'
    aliases = ['ride']
    filenames = ['*.ride']
    mimetypes = ['text/x-ride']

    validName = r'[a-zA-Z_][a-zA-Z0-9_\']*'

    builtinOps = (
        '||', '|', '>=', '>', '==', '!',
        '=', '<=', '<', '::', ':+', ':', '!=', '/',
        '.', '=>', '-', '+', '*', '&&', '%', '++',
    )

    globalVariablesName = (
        'NOALG', 'MD5', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512',
        'SHA3224', 'SHA3256', 'SHA3384', 'SHA3512', 'nil', 'this', 'unit',
        'height', 'lastBlock', 'Buy', 'Sell', 'CEILING', 'FLOOR', 'DOWN',
        'HALFDOWN', 'HALFEVEN', 'HALFUP', 'UP',
    )

    typesName = (
        'Unit', 'Int', 'Boolean', 'ByteVector', 'String', 'Address', 'Alias',
        'Transfer', 'AssetPair', 'DataEntry', 'Order', 'Transaction',
        'GenesisTransaction', 'PaymentTransaction', 'ReissueTransaction',
        'BurnTransaction', 'MassTransferTransaction', 'ExchangeTransaction',
        'TransferTransaction', 'SetAssetScriptTransaction',
        'InvokeScriptTransaction', 'IssueTransaction', 'LeaseTransaction',
        'LeaseCancelTransaction', 'CreateAliasTransaction',
        'SetScriptTransaction', 'SponsorFeeTransaction', 'DataTransaction',
        'WriteSet', 'AttachedPayment', 'ScriptTransfer', 'TransferSet',
        'ScriptResult', 'Invocation', 'Asset', 'BlockInfo', 'Issue', 'Reissue',
        'Burn', 'NoAlg', 'Md5', 'Sha1', 'Sha224', 'Sha256', 'Sha384', 'Sha512',
        'Sha3224', 'Sha3256', 'Sha3384', 'Sha3512', 'BinaryEntry',
        'BooleanEntry', 'IntegerEntry', 'StringEntry', 'List', 'Ceiling',
        'Down', 'Floor', 'HalfDown', 'HalfEven', 'HalfUp', 'Up',
    )

    functionsName = (
        'fraction', 'size', 'toBytes', 'take', 'drop', 'takeRight', 'dropRight',
        'toString', 'isDefined', 'extract', 'throw', 'getElement', 'value',
        'cons', 'toUtf8String', 'toInt', 'indexOf', 'lastIndexOf', 'split',
        'parseInt', 'parseIntValue', 'keccak256', 'blake2b256', 'sha256',
        'sigVerify', 'toBase58String', 'fromBase58String', 'toBase64String',
        'fromBase64String', 'transactionById', 'transactionHeightById',
        'getInteger', 'getBoolean', 'getBinary', 'getString',
        'addressFromPublicKey', 'addressFromString', 'addressFromRecipient',
        'assetBalance', 'wavesBalance', 'getIntegerValue', 'getBooleanValue',
        'getBinaryValue', 'getStringValue', 'addressFromStringValue',
        'assetInfo', 'rsaVerify', 'checkMerkleProof', 'median',
        'valueOrElse', 'valueOrErrorMessage', 'contains', 'log', 'pow',
        'toBase16String', 'fromBase16String', 'blockInfoByHeight',
        'transferTransactionById',
    )

    reservedWords = words((
        'match', 'case', 'else', 'func', 'if',
        'let', 'then', '@Callable', '@Verifier',
    ), suffix=r'\b')

    tokens = {
        'root': [
            # Comments
            (r'#.*', Comment.Single),
            # Whitespace
            (r'\s+', Text),
            # Strings
            (r'"', String, 'doublequote'),
            (r'utf8\'', String, 'utf8quote'),
            (r'base(58|64|16)\'', String, 'singlequote'),
            # Keywords
            (reservedWords, Keyword.Reserved),
            (r'\{-#.*?#-\}', Keyword.Reserved),
            (r'FOLD<\d+>', Keyword.Reserved),
            # Types
            (words(typesName), Keyword.Type),
            # Main
            # (specialName, Keyword.Reserved),
            # Prefix Operators
            (words(builtinOps, prefix=r'\(', suffix=r'\)'), Name.Function),
            # Infix Operators
            (words(builtinOps), Name.Function),
            (words(globalVariablesName), Name.Function),
            (words(functionsName), Name.Function),
            # Numbers
            include('numbers'),
            # Variable Names
            (validName, Name.Variable),
            # Parens
            (r'[,()\[\]{}]', Punctuation),
        ],

        'doublequote': [
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\[nrfvb\\"]', String.Escape),
            (r'[^"]', String),
            (r'"', String, '#pop'),
        ],

        'utf8quote': [
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\[nrfvb\\\']', String.Escape),
            (r'[^\']', String),
            (r'\'', String, '#pop'),
        ],

        'singlequote': [
            (r'[^\']', String),
            (r'\'', String, '#pop'),
        ],

        'numbers': [
            (r'_?\d+', Number.Integer),
        ],
    }
Exemple #18
0
class ElmLexer(RegexLexer):
    """
    For `Elm <http://elm-lang.org/>`_ source code.

    .. versionadded:: 2.1
    """

    name = 'Elm'
    aliases = ['elm']
    filenames = ['*.elm']
    mimetypes = ['text/x-elm']

    validName = r'[a-z_][a-zA-Z0-9_\']*'

    specialName = r'^main '

    builtinOps = (
        '~',
        '||',
        '|>',
        '|',
        '`',
        '^',
        '\\',
        '\'',
        '>>',
        '>=',
        '>',
        '==',
        '=',
        '<~',
        '<|',
        '<=',
        '<<',
        '<-',
        '<',
        '::',
        ':',
        '/=',
        '//',
        '/',
        '..',
        '.',
        '->',
        '-',
        '++',
        '+',
        '*',
        '&&',
        '%',
    )

    reservedWords = words((
        'alias',
        'as',
        'case',
        'else',
        'if',
        'import',
        'in',
        'let',
        'module',
        'of',
        'port',
        'then',
        'type',
        'where',
    ),
                          suffix=r'\b')

    tokens = {
        'root': [

            # Comments
            (r'\{-', Comment.Multiline, 'comment'),
            (r'--.*', Comment.Single),

            # Whitespace
            (r'\s+', Text),

            # Strings
            (r'"', String, 'doublequote'),

            # Modules
            (r'^\s*module\s*', Keyword.Namespace, 'imports'),

            # Imports
            (r'^\s*import\s*', Keyword.Namespace, 'imports'),

            # Shaders
            (r'\[glsl\|.*', Name.Entity, 'shader'),

            # Keywords
            (reservedWords, Keyword.Reserved),

            # Types
            (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),

            # Main
            (specialName, Keyword.Reserved),

            # Prefix Operators
            (words((builtinOps), prefix=r'\(', suffix=r'\)'), Name.Function),

            # Infix Operators
            (words(builtinOps), Name.Function),

            # Numbers
            include('numbers'),

            # Variable Names
            (validName, Name.Variable),

            # Parens
            (r'[,()\[\]{}]', Punctuation),
        ],
        'comment': [
            (r'-(?!\})', Comment.Multiline),
            (r'\{-', Comment.Multiline, 'comment'),
            (r'[^-}]', Comment.Multiline),
            (r'-\}', Comment.Multiline, '#pop'),
        ],
        'doublequote': [
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\[nrfvb\\"]', String.Escape),
            (r'[^"]', String),
            (r'"', String, '#pop'),
        ],
        'imports': [
            (r'\w+(\.\w+)*', Name.Class, '#pop'),
        ],
        'numbers': [
            (r'_?\d+\.(?=\d+)', Number.Float),
            (r'_?\d+', Number.Integer),
        ],
        'shader': [
            (r'\|(?!\])', Name.Entity),
            (r'\|\]', Name.Entity, '#pop'),
            (r'.*\n', Name.Entity),
        ],
    }
Exemple #19
0
class HamlLexer(ExtendedRegexLexer):
    """
    For Haml markup.

    .. versionadded:: 1.3
    """

    name = 'Haml'
    aliases = ['haml']
    filenames = ['*.haml']
    mimetypes = ['text/x-haml']

    flags = re.IGNORECASE
    # Haml can include " |\n" anywhere,
    # which is ignored and used to wrap long lines.
    # To accomodate this, use this custom faux dot instead.
    _dot = r'(?: \|\n(?=.* \|)|.)'

    # In certain places, a comma at the end of the line
    # allows line wrapping as well.
    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'),
            (r'(-)(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'haml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemple #20
0
class JsgfLexer(RegexLexer):
    """
    For `JSpeech Grammar Format <https://www.w3.org/TR/jsgf/>`_
    grammars.

    .. versionadded:: 2.2
    """
    name = 'JSGF'
    aliases = ['jsgf']
    filenames = ['*.jsgf']
    mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

    flags = re.MULTILINE | re.UNICODE

    tokens = {
        'root': [
            include('comments'),
            include('non-comments'),
        ],
        'comments': [
            (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
            (r'//.*', Comment.Single),
        ],
        'non-comments': [
            (r'\A#JSGF[^;]*', Comment.Preproc),
            (r'\s+', Text),
            (r';', Punctuation),
            (r'[=|()\[\]*+]', Operator),
            (r'/[^/]+/', Number.Float),
            (r'"', String.Double, 'string'),
            (r'\{', String.Other, 'tag'),
            (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
            (r'grammar\b', Keyword.Reserved, 'grammar name'),
            (r'(<)(NULL|VOID)(>)',
             bygroups(Punctuation, Name.Builtin, Punctuation)),
            (r'<', Punctuation, 'rulename'),
            (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\"]+', String.Double),
        ],
        'tag': [
            (r'\}', String.Other, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\}]+', String.Other),
        ],
        'grammar name': [
            (r';', Punctuation, '#pop'),
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[^;\s.]+', Name.Namespace),
        ],
        'rulename': [
            (r'>', Punctuation, '#pop'),
            (r'\*', Punctuation),
            (r'\s+', Text),
            (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text,
                                            Punctuation)),
            (r'[^.>]+', Name.Constant),
        ],
        'documentation comment': [
            (r'\*/', Comment.Multiline, '#pop'),
            (r'(^\s*\*?\s*)(@(?:example|see)\s+)'
             r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
             bygroups(Comment.Multiline, Comment.Special,
                      using(this, state='example'))),
            (r'(^\s*\*?\s*)(@\S*)', bygroups(Comment.Multiline,
                                             Comment.Special)),
            (r'[^*\n@]+|\w|\W', Comment.Multiline),
        ],
        'example': [
            (r'\n\s*\*', Comment.Multiline),
            include('non-comments'),
            (r'.', Comment.Multiline),
        ],
    }
Exemple #21
0
class PugLexer(ExtendedRegexLexer):
    """
    For Pug markup.
    Pug is a variant of Scaml, see:
    http://scalate.fusesource.org/documentation/scaml-reference.html

    .. versionadded:: 1.4
    """

    name = 'Pug'
    aliases = ['pug', 'jade']
    filenames = ['*.pug', '*.jade']
    mimetypes = ['text/x-pug', 'text/x-jade']

    flags = re.IGNORECASE
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            (r'\|', Text, 'eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemple #22
0
class CleanLexer(ExtendedRegexLexer):
    """
    Lexer for the general purpose, state-of-the-art, pure and lazy functional
    programming language Clean (http://clean.cs.ru.nl/Clean).

    .. versionadded: 2.2
    """
    name = 'Clean'
    aliases = ['clean']
    filenames = ['*.icl', '*.dcl']

    keywords = ('case', 'ccall', 'class', 'code', 'code inline', 'derive',
                'export', 'foreign', 'generic', 'if', 'in', 'infix', 'infixl',
                'infixr', 'instance', 'let', 'of', 'otherwise', 'special',
                'stdcall', 'where', 'with')

    modulewords = ('implementation', 'definition', 'system')

    lowerId = r'[a-z`][\w`]*'
    upperId = r'[A-Z`][\w`]*'
    funnyId = r'[~@#$%\^?!+\-*<>\\/|&=:]+'
    scoreUpperId = r'_' + upperId
    scoreLowerId = r'_' + lowerId
    moduleId = r'[a-zA-Z_][a-zA-Z0-9_.`]+'
    classId = '|'.join([lowerId, upperId, funnyId])

    tokens = {
        'root': [
            include('comments'),
            include('keywords'),
            include('module'),
            include('import'),
            include('whitespace'),
            include('literals'),
            include('operators'),
            include('delimiters'),
            include('names'),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'comments': [
            (r'//.*\n', Comment.Single),
            (r'/\*', Comment.Multi, 'comments.in'),
            (r'/\*\*', Comment.Special, 'comments.in'),
        ],
        'comments.in': [
            (r'\*\/', Comment.Multi, '#pop'),
            (r'/\*', Comment.Multi, '#push'),
            (r'[^*/]+', Comment.Multi),
            (r'\*(?!/)', Comment.Multi),
            (r'/', Comment.Multi),
        ],
        'keywords': [
            (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
        ],
        'module': [
            (words(modulewords, prefix=r'\b',
                   suffix=r'\b'), Keyword.Namespace),
            (r'\bmodule\b', Keyword.Namespace, 'module.name'),
        ],
        'module.name': [
            include('whitespace'),
            (moduleId, Name.Class, '#pop'),
        ],
        'import': [
            (r'\b(import)\b(\s*)', bygroups(Keyword,
                                            Whitespace), 'import.module'),
            (r'\b(from)\b(\s*)\b(' + moduleId + r')\b(\s*)\b(import)\b',
             bygroups(Keyword, Whitespace, Name.Class, Whitespace,
                      Keyword), 'import.what'),
        ],
        'import.module': [
            (r'\b(qualified)\b(\s*)', bygroups(Keyword, Whitespace)),
            (r'(\s*)\b(as)\b', bygroups(Whitespace, Keyword),
             ('#pop', 'import.module.as')),
            (moduleId, Name.Class),
            (r'(\s*)(,)(\s*)', bygroups(Whitespace, Punctuation, Whitespace)),
            (r'\s+', Whitespace),
            default('#pop'),
        ],
        'import.module.as': [
            include('whitespace'),
            (lowerId, Name.Class, '#pop'),
            (upperId, Name.Class, '#pop'),
        ],
        'import.what': [
            (r'\b(class)\b(\s+)(' + classId + r')',
             bygroups(Keyword, Whitespace, Name.Class), 'import.what.class'),
            (r'\b(instance)(\s+)(' + classId + r')(\s+)',
             bygroups(Keyword, Whitespace, Name.Class,
                      Whitespace), 'import.what.instance'),
            (r'(::)(\s*)\b(' + upperId + r')\b',
             bygroups(Punctuation, Whitespace,
                      Name.Class), 'import.what.type'),
            (r'\b(generic)\b(\s+)\b(' + lowerId + '|' + upperId + r')\b',
             bygroups(Keyword, Whitespace, Name)),
            include('names'),
            (r'(,)(\s+)', bygroups(Punctuation, Whitespace)),
            (r'$', Whitespace, '#pop'),
            include('whitespace'),
        ],
        'import.what.class': [
            (r',', Punctuation, '#pop'),
            (r'\(', Punctuation, 'import.what.class.members'),
            (r'$', Whitespace, '#pop:2'),
            include('whitespace'),
        ],
        'import.what.class.members': [
            (r',', Punctuation),
            (r'\.\.', Punctuation),
            (r'\)', Punctuation, '#pop'),
            include('names'),
        ],
        'import.what.instance': [
            (r'[,)]', Punctuation, '#pop'),
            (r'\(', Punctuation, 'import.what.instance'),
            (r'$', Whitespace, '#pop:2'),
            include('whitespace'),
            include('names'),
        ],
        'import.what.type': [
            (r',', Punctuation, '#pop'),
            (r'[({]', Punctuation, 'import.what.type.consesandfields'),
            (r'$', Whitespace, '#pop:2'),
            include('whitespace'),
        ],
        'import.what.type.consesandfields': [
            (r',', Punctuation),
            (r'\.\.', Punctuation),
            (r'[)}]', Punctuation, '#pop'),
            include('names'),
        ],
        'literals': [
            (r'\'([^\'\\]|\\(x[\da-fA-F]+|\d+|.))\'', Literal.Char),
            (r'[+~-]?0[0-7]+\b', Number.Oct),
            (r'[+~-]?\d+\.\d+(E[+-]?\d+)?', Number.Float),
            (r'[+~-]?\d+\b', Number.Integer),
            (r'[+~-]?0x[\da-fA-F]+\b', Number.Hex),
            (r'True|False', Literal),
            (r'"', String.Double, 'literals.stringd'),
        ],
        'literals.stringd': [
            (r'[^\\"\n]+', String.Double),
            (r'"', String.Double, '#pop'),
            (r'\\.', String.Double),
            (r'[$\n]', Error, '#pop'),
        ],
        'operators': [
            (r'[-~@#$%\^?!+*<>\\/|&=:.]+', Operator),
            (r'\b_+\b', Operator),
        ],
        'delimiters': [
            (r'[,;(){}\[\]]', Punctuation),
            (r'(\')([\w`.]+)(\')',
             bygroups(Punctuation, Name.Class, Punctuation)),
        ],
        'names': [
            (lowerId, Name),
            (scoreLowerId, Name),
            (funnyId, Name.Function),
            (upperId, Name.Class),
            (scoreUpperId, Name.Class),
        ]
    }
Exemple #23
0
class SmaliLexer(RegexLexer):
    """
    For `Smali <http://code.google.com/p/smali/>`_ (Android/Dalvik) assembly
    code.

    .. versionadded:: 1.6
    """
    name = 'Smali'
    aliases = ['smali']
    filenames = ['*.smali']
    mimetypes = ['text/smali']

    tokens = {
        'root': [
            include('comment'),
            include('label'),
            include('field'),
            include('method'),
            include('class'),
            include('directive'),
            include('access-modifier'),
            include('instruction'),
            include('literal'),
            include('punctuation'),
            include('type'),
            include('whitespace')
        ],
        'directive': [
            (r'^[ \t]*\.(class|super|implements|field|subannotation|annotation|'
             r'enum|method|registers|locals|array-data|packed-switch|'
             r'sparse-switch|catchall|catch|line|parameter|local|prologue|'
             r'epilogue|source)', Keyword),
            (r'^[ \t]*\.end (field|subannotation|annotation|method|array-data|'
             'packed-switch|sparse-switch|parameter|local)', Keyword),
            (r'^[ \t]*\.restart local', Keyword),
        ],
        'access-modifier': [
            (r'(public|private|protected|static|final|synchronized|bridge|'
             r'varargs|native|abstract|strictfp|synthetic|constructor|'
             r'declared-synchronized|interface|enum|annotation|volatile|'
             r'transient)', Keyword),
        ],
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
        ],
        'instruction': [
            (r'\b[vp]\d+\b', Name.Builtin),  # registers
            (r'\b[a-z][A-Za-z0-9/-]+\s+', Text),  # instructions
        ],
        'literal': [
            (r'".*"', String),
            (r'0x[0-9A-Fa-f]+t?', Number.Hex),
            (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'[0-9]+L?', Number.Integer),
        ],
        'field': [
            (r'(\$?\b)([\w$]*)(:)',
             bygroups(Punctuation, Name.Variable, Punctuation)),
        ],
        'method': [
            (r'<(?:cl)?init>', Name.Function),  # constructor
            (r'(\$?\b)([\w$]*)(\()',
             bygroups(Punctuation, Name.Function, Punctuation)),
        ],
        'label': [
            (r':\w+', Name.Label),
        ],
        'class': [
            # class names in the form Lcom/namespace/ClassName;
            # I only want to color the ClassName part, so the namespace part is
            # treated as 'Text'
            (r'(L)((?:[\w$]+/)*)([\w$]+)(;)',
             bygroups(Keyword.Type, Text, Name.Class, Text)),
        ],
        'punctuation': [
            (r'->', Punctuation),
            (r'[{},():=.-]', Punctuation),
        ],
        'type': [
            (r'[ZBSCIJFDV\[]+', Keyword.Type),
        ],
        'comment': [
            (r'#.*?\n', Comment),
        ],
    }

    def analyse_text(text):
        score = 0
        if re.search(r'^\s*\.class\s', text, re.MULTILINE):
            score += 0.5
            if re.search(
                    r'\b((check-cast|instance-of|throw-verification-error'
                    r')\b|(-to|add|[ais]get|[ais]put|and|cmpl|const|div|'
                    r'if|invoke|move|mul|neg|not|or|rem|return|rsub|shl|'
                    r'shr|sub|ushr)[-/])|{|}', text, re.MULTILINE):
                score += 0.3
        if re.search(
                r'(\.(catchall|epilogue|restart local|prologue)|'
                r'\b(array-data|class-change-error|declared-synchronized|'
                r'(field|inline|vtable)@0x[0-9a-fA-F]|generic-error|'
                r'illegal-class-access|illegal-field-access|'
                r'illegal-method-access|instantiation-error|no-error|'
                r'no-such-class|no-such-field|no-such-method|'
                r'packed-switch|sparse-switch))\b', text, re.MULTILINE):
            score += 0.6
        return score
Exemple #24
0
class SmalltalkLexer(RegexLexer):
    """
    For `Smalltalk <http://www.smalltalk.org/>`_ syntax.
    Contributed by Stefan Matthias Aust.
    Rewritten by Nils Winter.

    .. versionadded:: 0.10
    """
    name = 'Smalltalk'
    filenames = ['*.st']
    aliases = ['smalltalk', 'squeak', 'st']
    mimetypes = ['text/x-smalltalk']

    tokens = {
        'root': [
            (r'(<)(\w+:)(.*?)(>)', bygroups(Text, Keyword, Text, Text)),
            include('squeak fileout'),
            include('whitespaces'),
            include('method definition'),
            (r'(\|)([\w\s]*)(\|)', bygroups(Operator, Name.Variable, Operator)),
            include('objects'),
            (r'\^|\:=|\_', Operator),
            # temporaries
            (r'[\]({}.;!]', Text),
        ],
        'method definition': [
            # Not perfect can't allow whitespaces at the beginning and the
            # without breaking everything
            (r'([a-zA-Z]+\w*:)(\s*)(\w+)',
             bygroups(Name.Function, Text, Name.Variable)),
            (r'^(\b[a-zA-Z]+\w*\b)(\s*)$', bygroups(Name.Function, Text)),
            (r'^([-+*/\\~<>=|&!?,@%]+)(\s*)(\w+)(\s*)$',
             bygroups(Name.Function, Text, Name.Variable, Text)),
        ],
        'blockvariables': [
            include('whitespaces'),
            (r'(:)(\s*)(\w+)',
             bygroups(Operator, Text, Name.Variable)),
            (r'\|', Operator, '#pop'),
            default('#pop'),  # else pop
        ],
        'literals': [
            (r"'(''|[^'])*'", String, 'afterobject'),
            (r'\$.', String.Char, 'afterobject'),
            (r'#\(', String.Symbol, 'parenth'),
            (r'\)', Text, 'afterobject'),
            (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number, 'afterobject'),
        ],
        '_parenth_helper': [
            include('whitespaces'),
            (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number),
            (r'[-+*/\\~<>=|&#!?,@%\w:]+', String.Symbol),
            # literals
            (r"'(''|[^'])*'", String),
            (r'\$.', String.Char),
            (r'#*\(', String.Symbol, 'inner_parenth'),
        ],
        'parenth': [
            # This state is a bit tricky since
            # we can't just pop this state
            (r'\)', String.Symbol, ('root', 'afterobject')),
            include('_parenth_helper'),
        ],
        'inner_parenth': [
            (r'\)', String.Symbol, '#pop'),
            include('_parenth_helper'),
        ],
        'whitespaces': [
            # skip whitespace and comments
            (r'\s+', Text),
            (r'"(""|[^"])*"', Comment),
        ],
        'objects': [
            (r'\[', Text, 'blockvariables'),
            (r'\]', Text, 'afterobject'),
            (r'\b(self|super|true|false|nil|thisContext)\b',
             Name.Builtin.Pseudo, 'afterobject'),
            (r'\b[A-Z]\w*(?!:)\b', Name.Class, 'afterobject'),
            (r'\b[a-z]\w*(?!:)\b', Name.Variable, 'afterobject'),
            (r'#("(""|[^"])*"|[-+*/\\~<>=|&!?,@%]+|[\w:]+)',
             String.Symbol, 'afterobject'),
            include('literals'),
        ],
        'afterobject': [
            (r'! !$', Keyword, '#pop'),  # squeak chunk delimiter
            include('whitespaces'),
            (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)',
             Name.Builtin, '#pop'),
            (r'\b(new\b(?!:))', Name.Builtin),
            (r'\:=|\_', Operator, '#pop'),
            (r'\b[a-zA-Z]+\w*:', Name.Function, '#pop'),
            (r'\b[a-zA-Z]+\w*', Name.Function),
            (r'\w+:?|[-+*/\\~<>=|&!?,@%]+', Name.Function, '#pop'),
            (r'\.', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'[\])}]', Text),
            (r'[\[({]', Text, '#pop'),
        ],
        'squeak fileout': [
            # Squeak fileout format (optional)
            (r'^"(""|[^"])*"!', Keyword),
            (r"^'(''|[^'])*'!", Keyword),
            (r'^(!)(\w+)( commentStamp: )(.*?)( prior: .*?!\n)(.*?)(!)',
                bygroups(Keyword, Name.Class, Keyword, String, Keyword, Text, Keyword)),
            (r"^(!)(\w+(?: class)?)( methodsFor: )('(?:''|[^'])*')(.*?!)",
                bygroups(Keyword, Name.Class, Keyword, String, Keyword)),
            (r'^(\w+)( subclass: )(#\w+)'
             r'(\s+instanceVariableNames: )(.*?)'
             r'(\s+classVariableNames: )(.*?)'
             r'(\s+poolDictionaries: )(.*?)'
             r'(\s+category: )(.*?)(!)',
                bygroups(Name.Class, Keyword, String.Symbol, Keyword, String, Keyword,
                         String, Keyword, String, Keyword, String, Keyword)),
            (r'^(\w+(?: class)?)(\s+instanceVariableNames: )(.*?)(!)',
                bygroups(Name.Class, Keyword, String, Keyword)),
            (r'(!\n)(\].*)(! !)$', bygroups(Keyword, Text, Keyword)),
            (r'! !$', Keyword),
        ],
    }
Exemple #25
0
class BibTeXLexer(ExtendedRegexLexer):
    """
    A lexer for BibTeX bibliography data format.

    .. versionadded:: 2.2
    """

    name = 'BibTeX'
    aliases = ['bib', 'bibtex']
    filenames = ['*.bib']
    mimetypes = ["text/x-bibtex"]
    flags = re.IGNORECASE

    ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
    IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS)

    def open_brace_callback(self, match, ctx):
        opening_brace = match.group()
        ctx.opening_brace = opening_brace
        yield match.start(), Punctuation, opening_brace
        ctx.pos = match.end()

    def close_brace_callback(self, match, ctx):
        closing_brace = match.group()
        if (
            ctx.opening_brace == '{' and closing_brace != '}' or
            ctx.opening_brace == '(' and closing_brace != ')'
        ):
            yield match.start(), Error, closing_brace
        else:
            yield match.start(), Punctuation, closing_brace
        del ctx.opening_brace
        ctx.pos = match.end()

    tokens = {
        'root': [
            include('whitespace'),
            ('@comment', Comment),
            ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')),
            ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')),
            ('@' + IDENTIFIER, Name.Class,
             ('closing-brace', 'command-body', 'opening-brace')),
            ('.+', Comment),
        ],
        'opening-brace': [
            include('whitespace'),
            (r'[{(]', open_brace_callback, '#pop'),
        ],
        'closing-brace': [
            include('whitespace'),
            (r'[})]', close_brace_callback, '#pop'),
        ],
        'command-body': [
            include('whitespace'),
            (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
        ],
        'fields': [
            include('whitespace'),
            (',', Punctuation, 'field'),
            default('#pop'),
        ],
        'field': [
            include('whitespace'),
            (IDENTIFIER, Name.Attribute, ('value', '=')),
            default('#pop'),
        ],
        '=': [
            include('whitespace'),
            ('=', Punctuation, '#pop'),
        ],
        'value': [
            include('whitespace'),
            (IDENTIFIER, Name.Variable),
            ('"', String, 'quoted-string'),
            (r'\{', String, 'braced-string'),
            (r'[\d]+', Number),
            ('#', Punctuation),
            default('#pop'),
        ],
        'quoted-string': [
            (r'\{', String, 'braced-string'),
            ('"', String, '#pop'),
            (r'[^\{\"]+', String),
        ],
        'braced-string': [
            (r'\{', String, '#push'),
            (r'\}', String, '#pop'),
            (r'[^\{\}]+', String),
        ],
        'whitespace': [
            (r'\s+', Text),
        ],
    }
Exemple #26
0
class DevicetreeLexer(RegexLexer):
    """
    Lexer for `Devicetree <https://www.devicetree.org/>`_ files.

    .. versionadded:: 2.7
    """

    name = 'Devicetree'
    aliases = ['devicetree', 'dts']
    filenames = ['*.dts', '*.dtsi']
    mimetypes = ['text/x-c']

    #: optional Whitespace or /*...*/ style comment
    _ws = r'\s*(?:/[*][^*/]*?[*]/\s*)*'

    tokens = {
        'macro': [
            # Include preprocessor directives (C style):
            (r'(#include)(' + _ws + r')([^\n]+)',
             bygroups(Comment.Preproc, Comment.Multiline,
                      Comment.PreprocFile)),
            # Define preprocessor directives (C style):
            (r'(#define)(' + _ws + r')([^\n]+)',
             bygroups(Comment.Preproc, Comment.Multiline, Comment.Preproc)),
            # devicetree style with file:
            (r'(/[^*/{]+/)(' + _ws + r')("[^\n{]+")',
             bygroups(Comment.Preproc, Comment.Multiline,
                      Comment.PreprocFile)),
            # devicetree style with property:
            (r'(/[^*/{]+/)(' + _ws + r')([^\n;{]*)([;]?)',
             bygroups(Comment.Preproc, Comment.Multiline, Comment.Preproc,
                      Punctuation)),
        ],
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'([^\s{}/*]*)(\s*)(:)', bygroups(Name.Label, Text, Punctuation)),
            (words(('compatible', 'model', 'phandle', 'status',
                    '#address-cells', '#size-cells', 'reg', 'virtual-reg',
                    'ranges', 'dma-ranges', 'device_type', 'name'),
                   suffix=r'\b'), Keyword.Reserved),
            (r'([~!%^&*+=|?:<>/#-])', Operator),
            (r'[()\[\]{},.]', Punctuation),
            (r'[a-zA-Z_][\w-]*(?=(?:\s*,\s*[a-zA-Z_][\w-]*|(?:' + _ws +
             r'))*\s*[=;])', Name),
            (r'[a-zA-Z_]\w*', Name.Attribute),
        ],
        'root': [
            include('whitespace'),
            include('macro'),

            # Nodes
            (r'([^/*@\s&]+|/)(@?)([0-9a-fA-F,]*)(' + _ws + r')(\{)',
             bygroups(Name.Function, Operator, Number.Integer,
                      Comment.Multiline, Punctuation), 'node'),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation, '#pop'),
        ],
        'node': [
            include('whitespace'),
            include('macro'),
            (r'([^/*@\s&]+|/)(@?)([0-9a-fA-F,]*)(' + _ws + r')(\{)',
             bygroups(Name.Function, Operator, Number.Integer,
                      Comment.Multiline, Punctuation), '#push'),
            include('statements'),
            (r'\};', Punctuation, '#pop'),
            (';', Punctuation),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
    }
Exemple #27
0
class XQueryLexer(ExtendedRegexLexer):
    """
    An XQuery lexer, parsing a stream and outputting the tokens needed to
    highlight xquery code.

    .. versionadded:: 1.4
    """
    name = 'XQuery'
    aliases = ['xquery', 'xqy', 'xq', 'xql', 'xqm']
    filenames = ['*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm']
    mimetypes = ['text/xquery', 'application/xquery']

    xquery_parse_state = []

    # FIX UNICODE LATER
    # ncnamestartchar = (
    #    r"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|"
    #    r"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|"
    #    r"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|"
    #    r"[\u10000-\uEFFFF]"
    # )
    ncnamestartchar = r"(?:[A-Z]|_|[a-z])"
    # FIX UNICODE LATER
    # ncnamechar = ncnamestartchar + (r"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|"
    #                                 r"[\u203F-\u2040]")
    ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])"
    ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar)
    pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])"
    pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])"
    pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar)
    prefixedname = "%s:%s" % (ncname, ncname)
    unprefixedname = ncname
    qname = "(?:%s|%s)" % (prefixedname, unprefixedname)

    entityref = r'(?:&(?:lt|gt|amp|quot|apos|nbsp);)'
    charref = r'(?:&#[0-9]+;|&#x[0-9a-fA-F]+;)'

    stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")'
    stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')"

    # FIX UNICODE LATER
    # elementcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|'
    #                       r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
    elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]'
    # quotattrcontentchar = (r'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|'
    #                        r'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
    quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]'
    # aposattrcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|'
    #                        r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
    aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]'

    # CHAR elements - fix the above elementcontentchar, quotattrcontentchar,
    #                 aposattrcontentchar
    # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]

    flags = re.DOTALL | re.MULTILINE | re.UNICODE

    def punctuation_root_callback(lexer, match, ctx):
        yield match.start(), Punctuation, match.group(1)
        # transition to root always - don't pop off stack
        ctx.stack = ['root']
        ctx.pos = match.end()

    def operator_root_callback(lexer, match, ctx):
        yield match.start(), Operator, match.group(1)
        # transition to root always - don't pop off stack
        ctx.stack = ['root']
        ctx.pos = match.end()

    def popstate_tag_callback(lexer, match, ctx):
        yield match.start(), Name.Tag, match.group(1)
        if lexer.xquery_parse_state:
            ctx.stack.append(lexer.xquery_parse_state.pop())
        ctx.pos = match.end()

    def popstate_xmlcomment_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append(lexer.xquery_parse_state.pop())
        ctx.pos = match.end()

    def popstate_kindtest_callback(lexer, match, ctx):
        yield match.start(), Punctuation, match.group(1)
        next_state = lexer.xquery_parse_state.pop()
        if next_state == 'occurrenceindicator':
            if re.match("[?*+]+", match.group(2)):
                yield match.start(), Punctuation, match.group(2)
                ctx.stack.append('operator')
                ctx.pos = match.end()
            else:
                ctx.stack.append('operator')
                ctx.pos = match.end(1)
        else:
            ctx.stack.append(next_state)
            ctx.pos = match.end(1)

    def popstate_callback(lexer, match, ctx):
        yield match.start(), Punctuation, match.group(1)
        # if we have run out of our state stack, pop whatever is on the pygments
        # state stack
        if len(lexer.xquery_parse_state) == 0:
            ctx.stack.pop()
            if not ctx.stack:
                # make sure we have at least the root state on invalid inputs
                ctx.stack = ['root']
        elif len(ctx.stack) > 1:
            ctx.stack.append(lexer.xquery_parse_state.pop())
        else:
            # i don't know if i'll need this, but in case, default back to root
            ctx.stack = ['root']
        ctx.pos = match.end()

    def pushstate_element_content_starttag_callback(lexer, match, ctx):
        yield match.start(), Name.Tag, match.group(1)
        lexer.xquery_parse_state.append('element_content')
        ctx.stack.append('start_tag')
        ctx.pos = match.end()

    def pushstate_cdata_section_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('cdata_section')
        lexer.xquery_parse_state.append(ctx.state.pop)
        ctx.pos = match.end()

    def pushstate_starttag_callback(lexer, match, ctx):
        yield match.start(), Name.Tag, match.group(1)
        lexer.xquery_parse_state.append(ctx.state.pop)
        ctx.stack.append('start_tag')
        ctx.pos = match.end()

    def pushstate_operator_order_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        ctx.stack = ['root']
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_operator_map_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        ctx.stack = ['root']
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_operator_root_validate(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        ctx.stack = ['root']
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_operator_root_validate_withmode(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Keyword, match.group(3)
        ctx.stack = ['root']
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_operator_processing_instruction_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('processing_instruction')
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_element_content_processing_instruction_callback(
            lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('processing_instruction')
        lexer.xquery_parse_state.append('element_content')
        ctx.pos = match.end()

    def pushstate_element_content_cdata_section_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('cdata_section')
        lexer.xquery_parse_state.append('element_content')
        ctx.pos = match.end()

    def pushstate_operator_cdata_section_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('cdata_section')
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_element_content_xmlcomment_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('xml_comment')
        lexer.xquery_parse_state.append('element_content')
        ctx.pos = match.end()

    def pushstate_operator_xmlcomment_callback(lexer, match, ctx):
        yield match.start(), String.Doc, match.group(1)
        ctx.stack.append('xml_comment')
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    def pushstate_kindtest_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('kindtest')
        ctx.stack.append('kindtest')
        ctx.pos = match.end()

    def pushstate_operator_kindtestforpi_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('operator')
        ctx.stack.append('kindtestforpi')
        ctx.pos = match.end()

    def pushstate_operator_kindtest_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('operator')
        ctx.stack.append('kindtest')
        ctx.pos = match.end()

    def pushstate_occurrenceindicator_kindtest_callback(lexer, match, ctx):
        yield match.start(), Name.Tag, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('occurrenceindicator')
        ctx.stack.append('kindtest')
        ctx.pos = match.end()

    def pushstate_operator_starttag_callback(lexer, match, ctx):
        yield match.start(), Name.Tag, match.group(1)
        lexer.xquery_parse_state.append('operator')
        ctx.stack.append('start_tag')
        ctx.pos = match.end()

    def pushstate_operator_root_callback(lexer, match, ctx):
        yield match.start(), Punctuation, match.group(1)
        lexer.xquery_parse_state.append('operator')
        ctx.stack = ['root']
        ctx.pos = match.end()

    def pushstate_operator_root_construct_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('operator')
        ctx.stack = ['root']
        ctx.pos = match.end()

    def pushstate_root_callback(lexer, match, ctx):
        yield match.start(), Punctuation, match.group(1)
        cur_state = ctx.stack.pop()
        lexer.xquery_parse_state.append(cur_state)
        ctx.stack = ['root']
        ctx.pos = match.end()

    def pushstate_operator_attribute_callback(lexer, match, ctx):
        yield match.start(), Name.Attribute, match.group(1)
        ctx.stack.append('operator')
        ctx.pos = match.end()

    def pushstate_operator_callback(lexer, match, ctx):
        yield match.start(), Keyword, match.group(1)
        yield match.start(), Text, match.group(2)
        yield match.start(), Punctuation, match.group(3)
        lexer.xquery_parse_state.append('operator')
        ctx.pos = match.end()

    tokens = {
        'comment': [
            # xquery comments
            (r'(:\))', Comment, '#pop'),
            (r'(\(:)', Comment, '#push'),
            (r'[^:)]', Comment),
            (r'([^:)]|:|\))', Comment),
        ],
        'whitespace': [
            (r'\s+', Text),
        ],
        'operator': [
            include('whitespace'),
            (r'(\})', popstate_callback),
            (r'\(:', Comment, 'comment'),
            (r'(\{)', pushstate_root_callback),
            (r'then|else|external|at|div|except', Keyword, 'root'),
            (r'order by', Keyword, 'root'),
            (r'group by', Keyword, 'root'),
            (r'is|mod|order\s+by|stable\s+order\s+by', Keyword, 'root'),
            (r'and|or', Operator.Word, 'root'),
            (r'(eq|ge|gt|le|lt|ne|idiv|intersect|in)(?=\b)', Operator.Word,
             'root'),
            (r'return|satisfies|to|union|where|count|preserve\s+strip',
             Keyword, 'root'),
            (r'(>=|>>|>|<=|<<|<|-|\*|!=|\+|\|\||\||:=|=|!)',
             operator_root_callback),
            (r'(::|:|;|\[|//|/|,)', punctuation_root_callback),
            (r'(castable|cast)(\s+)(as)\b', bygroups(Keyword, Text,
                                                     Keyword), 'singletype'),
            (r'(instance)(\s+)(of)\b', bygroups(Keyword, Text,
                                                Keyword), 'itemtype'),
            (r'(treat)(\s+)(as)\b', bygroups(Keyword, Text,
                                             Keyword), 'itemtype'),
            (r'(case)(\s+)(' + stringdouble + ')',
             bygroups(Keyword, Text, String.Double), 'itemtype'),
            (r'(case)(\s+)(' + stringsingle + ')',
             bygroups(Keyword, Text, String.Single), 'itemtype'),
            (r'(case|as)\b', Keyword, 'itemtype'),
            (r'(\))(\s*)(as)', bygroups(Punctuation, Text,
                                        Keyword), 'itemtype'),
            (r'\$', Name.Variable, 'varname'),
            (r'(for|let|previous|next)(\s+)(\$)',
             bygroups(Keyword, Text, Name.Variable), 'varname'),
            (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)',
             bygroups(Keyword, Text, Keyword, Text, Keyword, Text,
                      Name.Variable), 'varname'),
            # (r'\)|\?|\]', Punctuation, '#push'),
            (r'\)|\?|\]', Punctuation),
            (r'(empty)(\s+)(greatest|least)', bygroups(Keyword, Text,
                                                       Keyword)),
            (r'ascending|descending|default', Keyword, '#push'),
            (r'(allowing)(\s+)(empty)', bygroups(Keyword, Text, Keyword)),
            (r'external', Keyword),
            (r'(start|when|end)', Keyword, 'root'),
            (r'(only)(\s+)(end)', bygroups(Keyword, Text, Keyword), 'root'),
            (r'collation', Keyword, 'uritooperator'),

            # eXist specific XQUF
            (r'(into|following|preceding|with)', Keyword, 'root'),

            # support for current context on rhs of Simple Map Operator
            (r'\.', Operator),

            # finally catch all string literals and stay in operator state
            (stringdouble, String.Double),
            (stringsingle, String.Single),
            (r'(catch)(\s*)', bygroups(Keyword, Text), 'root'),
        ],
        'uritooperator': [
            (stringdouble, String.Double, '#pop'),
            (stringsingle, String.Single, '#pop'),
        ],
        'namespacedecl': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (r'(at)(\s+)(' + stringdouble + ')',
             bygroups(Keyword, Text, String.Double)),
            (r"(at)(\s+)(" + stringsingle + ')',
             bygroups(Keyword, Text, String.Single)),
            (stringdouble, String.Double),
            (stringsingle, String.Single),
            (r',', Punctuation),
            (r'=', Operator),
            (r';', Punctuation, 'root'),
            (ncname, Name.Namespace),
        ],
        'namespacekeyword': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (stringdouble, String.Double, 'namespacedecl'),
            (stringsingle, String.Single, 'namespacedecl'),
            (r'inherit|no-inherit', Keyword, 'root'),
            (r'namespace', Keyword, 'namespacedecl'),
            (r'(default)(\s+)(element)', bygroups(Keyword, Text, Keyword)),
            (r'preserve|no-preserve', Keyword),
            (r',', Punctuation),
        ],
        'annotationname':
        [(r'\(:', Comment, 'comment'), (qname, Name.Decorator),
         (r'(\()(' + stringdouble + ')', bygroups(Punctuation, String.Double)),
         (r'(\()(' + stringsingle + ')', bygroups(Punctuation, String.Single)),
         (r'(\,)(\s+)(' + stringdouble + ')',
          bygroups(Punctuation, Text, String.Double)),
         (r'(\,)(\s+)(' + stringsingle + ')',
          bygroups(Punctuation, Text, String.Single)), (r'\)', Punctuation),
         (r'(\s+)(\%)', bygroups(Text, Name.Decorator), 'annotationname'),
         (r'(\s+)(variable)(\s+)(\$)',
          bygroups(Text, Keyword.Declaration, Text, Name.Variable), 'varname'),
         (r'(\s+)(function)(\s+)', bygroups(Text, Keyword.Declaration,
                                            Text), 'root')],
        'varname': [
            (r'\(:', Comment, 'comment'),
            (r'(' + qname + r')(\()?', bygroups(Name,
                                                Punctuation), 'operator'),
        ],
        'singletype': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (ncname + r'(:\*)', Name.Variable, 'operator'),
            (qname, Name.Variable, 'operator'),
        ],
        'itemtype': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (r'\$', Name.Variable, 'varname'),
            (r'(void)(\s*)(\()(\s*)(\))',
             bygroups(Keyword, Text, Punctuation, Text,
                      Punctuation), 'operator'),
            (r'(element|attribute|schema-element|schema-attribute|comment|text|'
             r'node|binary|document-node|empty-sequence)(\s*)(\()',
             pushstate_occurrenceindicator_kindtest_callback),
            # Marklogic specific type?
            (r'(processing-instruction)(\s*)(\()',
             bygroups(Keyword, Text,
                      Punctuation), ('occurrenceindicator', 'kindtestforpi')),
            (r'(item)(\s*)(\()(\s*)(\))(?=[*+?])',
             bygroups(Keyword, Text, Punctuation, Text,
                      Punctuation), 'occurrenceindicator'),
            (r'(\(\#)(\s*)', bygroups(Punctuation, Text), 'pragma'),
            (r';', Punctuation, '#pop'),
            (r'then|else', Keyword, '#pop'),
            (r'(at)(\s+)(' + stringdouble + ')',
             bygroups(Keyword, Text, String.Double), 'namespacedecl'),
            (r'(at)(\s+)(' + stringsingle + ')',
             bygroups(Keyword, Text, String.Single), 'namespacedecl'),
            (r'except|intersect|in|is|return|satisfies|to|union|where|count',
             Keyword, 'root'),
            (r'and|div|eq|ge|gt|le|lt|ne|idiv|mod|or', Operator.Word, 'root'),
            (r':=|=|,|>=|>>|>|\[|\(|<=|<<|<|-|!=|\|\||\|', Operator, 'root'),
            (r'external|at', Keyword, 'root'),
            (r'(stable)(\s+)(order)(\s+)(by)',
             bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'),
            (r'(castable|cast)(\s+)(as)', bygroups(Keyword, Text,
                                                   Keyword), 'singletype'),
            (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword)),
            (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)),
            (r'(case)(\s+)(' + stringdouble + ')',
             bygroups(Keyword, Text, String.Double), 'itemtype'),
            (r'(case)(\s+)(' + stringsingle + ')',
             bygroups(Keyword, Text, String.Single), 'itemtype'),
            (r'case|as', Keyword, 'itemtype'),
            (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
            (ncname + r':\*', Keyword.Type, 'operator'),
            (r'(function|map|array)(\()', bygroups(Keyword.Type, Punctuation)),
            (qname, Keyword.Type, 'occurrenceindicator'),
        ],
        'kindtest': [
            (r'\(:', Comment, 'comment'),
            (r'\{', Punctuation, 'root'),
            (r'(\))([*+?]?)', popstate_kindtest_callback),
            (r'\*', Name, 'closekindtest'),
            (qname, Name, 'closekindtest'),
            (r'(element|schema-element)(\s*)(\()',
             pushstate_kindtest_callback),
        ],
        'kindtestforpi': [
            (r'\(:', Comment, 'comment'),
            (r'\)', Punctuation, '#pop'),
            (ncname, Name.Variable),
            (stringdouble, String.Double),
            (stringsingle, String.Single),
        ],
        'closekindtest': [
            (r'\(:', Comment, 'comment'),
            (r'(\))', popstate_callback),
            (r',', Punctuation),
            (r'(\{)', pushstate_operator_root_callback),
            (r'\?', Punctuation),
        ],
        'xml_comment': [
            (r'(-->)', popstate_xmlcomment_callback),
            (r'[^-]{1,2}', Literal),
            (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]',
             Literal),
        ],
        'processing_instruction': [
            (r'\s+', Text, 'processing_instruction_content'),
            (r'\?>', String.Doc, '#pop'),
            (pitarget, Name),
        ],
        'processing_instruction_content': [
            (r'\?>', String.Doc, '#pop'),
            (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]',
             Literal),
        ],
        'cdata_section': [
            (r']]>', String.Doc, '#pop'),
            (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]',
             Literal),
        ],
        'start_tag': [
            include('whitespace'),
            (r'(/>)', popstate_tag_callback),
            (r'>', Name.Tag, 'element_content'),
            (r'"', Punctuation, 'quot_attribute_content'),
            (r"'", Punctuation, 'apos_attribute_content'),
            (r'=', Operator),
            (qname, Name.Tag),
        ],
        'quot_attribute_content': [
            (r'"', Punctuation, 'start_tag'),
            (r'(\{)', pushstate_root_callback),
            (r'""', Name.Attribute),
            (quotattrcontentchar, Name.Attribute),
            (entityref, Name.Attribute),
            (charref, Name.Attribute),
            (r'\{\{|\}\}', Name.Attribute),
        ],
        'apos_attribute_content': [
            (r"'", Punctuation, 'start_tag'),
            (r'\{', Punctuation, 'root'),
            (r"''", Name.Attribute),
            (aposattrcontentchar, Name.Attribute),
            (entityref, Name.Attribute),
            (charref, Name.Attribute),
            (r'\{\{|\}\}', Name.Attribute),
        ],
        'element_content': [
            (r'</', Name.Tag, 'end_tag'),
            (r'(\{)', pushstate_root_callback),
            (r'(<!--)', pushstate_element_content_xmlcomment_callback),
            (r'(<\?)',
             pushstate_element_content_processing_instruction_callback),
            (r'(<!\[CDATA\[)',
             pushstate_element_content_cdata_section_callback),
            (r'(<)', pushstate_element_content_starttag_callback),
            (elementcontentchar, Literal),
            (entityref, Literal),
            (charref, Literal),
            (r'\{\{|\}\}', Literal),
        ],
        'end_tag': [
            include('whitespace'),
            (r'(>)', popstate_tag_callback),
            (qname, Name.Tag),
        ],
        'xmlspace_decl': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (r'preserve|strip', Keyword, '#pop'),
        ],
        'declareordering': [
            (r'\(:', Comment, 'comment'),
            include('whitespace'),
            (r'ordered|unordered', Keyword, '#pop'),
        ],
        'xqueryversion': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (stringdouble, String.Double),
            (stringsingle, String.Single),
            (r'encoding', Keyword),
            (r';', Punctuation, '#pop'),
        ],
        'pragma': [
            (qname, Name.Variable, 'pragmacontents'),
        ],
        'pragmacontents': [
            (r'#\)', Punctuation, 'operator'),
            (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]',
             Literal),
            (r'(\s+)', Text),
        ],
        'occurrenceindicator': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),
            (r'\*|\?|\+', Operator, 'operator'),
            (r':=', Operator, 'root'),
            default('operator'),
        ],
        'option': [
            include('whitespace'),
            (qname, Name.Variable, '#pop'),
        ],
        'qname_braren': [
            include('whitespace'),
            (r'(\{)', pushstate_operator_root_callback),
            (r'(\()', Punctuation, 'root'),
        ],
        'element_qname': [
            (qname, Name.Variable, 'root'),
        ],
        'attribute_qname': [
            (qname, Name.Variable, 'root'),
        ],
        'root': [
            include('whitespace'),
            (r'\(:', Comment, 'comment'),

            # handle operator state
            # order on numbers matters - handle most complex first
            (r'\d+(\.\d*)?[eE][+-]?\d+', Number.Float, 'operator'),
            (r'(\.\d+)[eE][+-]?\d+', Number.Float, 'operator'),
            (r'(\.\d+|\d+\.\d*)', Number.Float, 'operator'),
            (r'(\d+)', Number.Integer, 'operator'),
            (r'(\.\.|\.|\))', Punctuation, 'operator'),
            (r'(declare)(\s+)(construction)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'operator'),
            (r'(declare)(\s+)(default)(\s+)(order)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text,
                      Keyword.Declaration), 'operator'),
            (r'(declare)(\s+)(context)(\s+)(item)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text,
                      Keyword.Declaration), 'operator'),
            (ncname + r':\*', Name, 'operator'),
            (r'\*:' + ncname, Name.Tag, 'operator'),
            (r'\*', Name.Tag, 'operator'),
            (stringdouble, String.Double, 'operator'),
            (stringsingle, String.Single, 'operator'),
            (r'(\}|\])', popstate_callback),

            # NAMESPACE DECL
            (r'(declare)(\s+)(default)(\s+)(collation)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text,
                      Keyword.Declaration)),
            (r'(module|declare)(\s+)(namespace)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'namespacedecl'),
            (r'(declare)(\s+)(base-uri)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'namespacedecl'),

            # NAMESPACE KEYWORD
            (r'(declare)(\s+)(default)(\s+)(element|function)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text,
                      Keyword.Declaration), 'namespacekeyword'),
            (r'(import)(\s+)(schema|module)',
             bygroups(Keyword.Pseudo, Text,
                      Keyword.Pseudo), 'namespacekeyword'),
            (r'(declare)(\s+)(copy-namespaces)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'namespacekeyword'),

            # VARNAMEs
            (r'(for|let|some|every)(\s+)(\$)',
             bygroups(Keyword, Text, Name.Variable), 'varname'),
            (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)',
             bygroups(Keyword, Text, Keyword, Text, Keyword, Text,
                      Name.Variable), 'varname'),
            (r'\$', Name.Variable, 'varname'),
            (r'(declare)(\s+)(variable)(\s+)(\$)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text,
                      Name.Variable), 'varname'),

            # ANNOTATED GLOBAL VARIABLES AND FUNCTIONS
            (r'(declare)(\s+)(\%)',
             bygroups(Keyword.Declaration, Text,
                      Name.Decorator), 'annotationname'),

            # ITEMTYPE
            (r'(\))(\s+)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
            (r'(element|attribute|schema-element|schema-attribute|comment|'
             r'text|node|document-node|empty-sequence)(\s+)(\()',
             pushstate_operator_kindtest_callback),
            (r'(processing-instruction)(\s+)(\()',
             pushstate_operator_kindtestforpi_callback),
            (r'(<!--)', pushstate_operator_xmlcomment_callback),
            (r'(<\?)', pushstate_operator_processing_instruction_callback),
            (r'(<!\[CDATA\[)', pushstate_operator_cdata_section_callback),

            # (r'</', Name.Tag, 'end_tag'),
            (r'(<)', pushstate_operator_starttag_callback),
            (r'(declare)(\s+)(boundary-space)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'xmlspace_decl'),
            (r'(validate)(\s+)(lax|strict)',
             pushstate_operator_root_validate_withmode),
            (r'(validate)(\s*)(\{)', pushstate_operator_root_validate),
            (r'(typeswitch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
            (r'(switch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
            (r'(element|attribute|namespace)(\s*)(\{)',
             pushstate_operator_root_construct_callback),
            (r'(document|text|processing-instruction|comment)(\s*)(\{)',
             pushstate_operator_root_construct_callback),
            # ATTRIBUTE
            (r'(attribute)(\s+)(?=' + qname + r')', bygroups(Keyword, Text),
             'attribute_qname'),
            # ELEMENT
            (r'(element)(\s+)(?=' + qname + r')', bygroups(Keyword, Text),
             'element_qname'),
            # PROCESSING_INSTRUCTION
            (r'(processing-instruction|namespace)(\s+)(' + ncname +
             r')(\s*)(\{)',
             bygroups(Keyword, Text, Name.Variable, Text,
                      Punctuation), 'operator'),
            (r'(declare|define)(\s+)(function)',
             bygroups(Keyword.Declaration, Text, Keyword.Declaration)),
            (r'(\{|\[)', pushstate_operator_root_callback),
            (r'(unordered|ordered)(\s*)(\{)',
             pushstate_operator_order_callback),
            (r'(map|array)(\s*)(\{)', pushstate_operator_map_callback),
            (r'(declare)(\s+)(ordering)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'declareordering'),
            (r'(xquery)(\s+)(version)',
             bygroups(Keyword.Pseudo, Text, Keyword.Pseudo), 'xqueryversion'),
            (r'(\(#)(\s*)', bygroups(Punctuation, Text), 'pragma'),

            # sometimes return can occur in root state
            (r'return', Keyword),
            (r'(declare)(\s+)(option)',
             bygroups(Keyword.Declaration, Text,
                      Keyword.Declaration), 'option'),

            # URI LITERALS - single and double quoted
            (r'(at)(\s+)(' + stringdouble + ')', String.Double, 'namespacedecl'
             ),
            (r'(at)(\s+)(' + stringsingle + ')', String.Single,
             'namespacedecl'),
            (r'(ancestor-or-self|ancestor|attribute|child|descendant-or-self)(::)',
             bygroups(Keyword, Punctuation)),
            (r'(descendant|following-sibling|following|parent|preceding-sibling'
             r'|preceding|self)(::)', bygroups(Keyword, Punctuation)),
            (r'(if)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
            (r'then|else', Keyword),

            # eXist specific XQUF
            (r'(update)(\s*)(insert|delete|replace|value|rename)',
             bygroups(Keyword, Text, Keyword)),
            (r'(into|following|preceding|with)', Keyword),

            # Marklogic specific
            (r'(try)(\s*)', bygroups(Keyword, Text), 'root'),
            (r'(catch)(\s*)(\()(\$)',
             bygroups(Keyword, Text, Punctuation, Name.Variable), 'varname'),
            (r'(@' + qname + ')', Name.Attribute, 'operator'),
            (r'(@' + ncname + ')', Name.Attribute, 'operator'),
            (r'@\*:' + ncname, Name.Attribute, 'operator'),
            (r'@\*', Name.Attribute, 'operator'),
            (r'(@)', Name.Attribute, 'operator'),
            (r'//|/|\+|-|;|,|\(|\)', Punctuation),

            # STANDALONE QNAMES
            (qname + r'(?=\s*\{)', Name.Tag, 'qname_braren'),
            (qname + r'(?=\s*\([^:])', Name.Function, 'qname_braren'),
            (r'(' + qname + ')(#)([0-9]+)',
             bygroups(Name.Function, Keyword.Type, Number.Integer)),
            (qname, Name.Tag, 'operator'),
        ]
    }
Exemple #28
0
class PraatLexer(RegexLexer):
    """
    For `Praat <http://www.praat.org>`_ scripts.

    .. versionadded:: 2.1
    """

    name = 'Praat'
    aliases = ['praat']
    filenames = ['*.praat', '*.proc', '*.psc']

    keywords = (
        'if', 'then', 'else', 'elsif', 'elif', 'endif', 'fi', 'for', 'from', 'to',
        'endfor', 'endproc', 'while', 'endwhile', 'repeat', 'until', 'select', 'plus',
        'minus', 'demo', 'assert', 'stopwatch', 'nocheck', 'nowarn', 'noprogress',
        'editor', 'endeditor', 'clearinfo',
    )

    functions_string = (
        'backslashTrigraphsToUnicode', 'chooseDirectory', 'chooseReadFile',
        'chooseWriteFile', 'date', 'demoKey', 'do', 'environment', 'extractLine',
        'extractWord', 'fixed', 'info', 'left', 'mid', 'percent', 'readFile', 'replace',
        'replace_regex', 'right', 'selected', 'string', 'unicodeToBackslashTrigraphs',
    )

    functions_numeric = (
        'abs', 'appendFile', 'appendFileLine', 'appendInfo', 'appendInfoLine', 'arccos',
        'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'barkToHertz',
        'beginPause', 'beginSendPraat', 'besselI', 'besselK', 'beta', 'beta2',
        'binomialP', 'binomialQ', 'boolean', 'ceiling', 'chiSquareP', 'chiSquareQ',
        'choice', 'comment', 'cos', 'cosh', 'createDirectory', 'deleteFile',
        'demoClicked', 'demoClickedIn', 'demoCommandKeyPressed',
        'demoExtraControlKeyPressed', 'demoInput', 'demoKeyPressed',
        'demoOptionKeyPressed', 'demoShiftKeyPressed', 'demoShow', 'demoWaitForInput',
        'demoWindowTitle', 'demoX', 'demoY', 'differenceLimensToPhon', 'do', 'editor',
        'endPause', 'endSendPraat', 'endsWith', 'erb', 'erbToHertz', 'erf', 'erfc',
        'exitScript', 'exp', 'extractNumber', 'fileReadable', 'fisherP', 'fisherQ',
        'floor', 'gaussP', 'gaussQ', 'hertzToBark', 'hertzToErb', 'hertzToMel',
        'hertzToSemitones', 'imax', 'imin', 'incompleteBeta', 'incompleteGammaP', 'index',
        'index_regex', 'integer', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ',
        'invGaussQ', 'invSigmoid', 'invStudentQ', 'length', 'ln', 'lnBeta', 'lnGamma',
        'log10', 'log2', 'max', 'melToHertz', 'min', 'minusObject', 'natural', 'number',
        'numberOfColumns', 'numberOfRows', 'numberOfSelected', 'objectsAreIdentical',
        'option', 'optionMenu', 'pauseScript', 'phonToDifferenceLimens', 'plusObject',
        'positive', 'randomBinomial', 'randomGauss', 'randomInteger', 'randomPoisson',
        'randomUniform', 'real', 'readFile', 'removeObject', 'rindex', 'rindex_regex',
        'round', 'runScript', 'runSystem', 'runSystem_nocheck', 'selectObject',
        'selected', 'semitonesToHertz', 'sentence', 'sentencetext', 'sigmoid', 'sin', 'sinc',
        'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP',
        'studentQ', 'tan', 'tanh', 'text', 'variableExists', 'word', 'writeFile', 'writeFileLine',
        'writeInfo', 'writeInfoLine',
    )

    functions_array = (
        'linear', 'randomGauss', 'randomInteger', 'randomUniform', 'zero',
    )

    objects = (
        'Activation', 'AffineTransform', 'AmplitudeTier', 'Art', 'Artword',
        'Autosegment', 'BarkFilter', 'BarkSpectrogram', 'CCA', 'Categories',
        'Cepstrogram', 'Cepstrum', 'Cepstrumc', 'ChebyshevSeries', 'ClassificationTable',
        'Cochleagram', 'Collection', 'ComplexSpectrogram', 'Configuration', 'Confusion',
        'ContingencyTable', 'Corpus', 'Correlation', 'Covariance',
        'CrossCorrelationTable', 'CrossCorrelationTables', 'DTW', 'DataModeler',
        'Diagonalizer', 'Discriminant', 'Dissimilarity', 'Distance', 'Distributions',
        'DurationTier', 'EEG', 'ERP', 'ERPTier', 'EditCostsTable', 'EditDistanceTable',
        'Eigen', 'Excitation', 'Excitations', 'ExperimentMFC', 'FFNet', 'FeatureWeights',
        'FileInMemory', 'FilesInMemory', 'Formant', 'FormantFilter', 'FormantGrid',
        'FormantModeler', 'FormantPoint', 'FormantTier', 'GaussianMixture', 'HMM',
        'HMM_Observation', 'HMM_ObservationSequence', 'HMM_State', 'HMM_StateSequence',
        'Harmonicity', 'ISpline', 'Index', 'Intensity', 'IntensityTier', 'IntervalTier',
        'KNN', 'KlattGrid', 'KlattTable', 'LFCC', 'LPC', 'Label', 'LegendreSeries',
        'LinearRegression', 'LogisticRegression', 'LongSound', 'Ltas', 'MFCC', 'MSpline',
        'ManPages', 'Manipulation', 'Matrix', 'MelFilter', 'MelSpectrogram',
        'MixingMatrix', 'Movie', 'Network', 'Object', 'OTGrammar', 'OTHistory', 'OTMulti',
        'PCA', 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo',
        'Pitch', 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial',
        'PowerCepstrogram', 'PowerCepstrum', 'Procrustes', 'RealPoint', 'RealTier',
        'ResultsMFC', 'Roots', 'SPINET', 'SSCP', 'SVD', 'Salience', 'ScalarProduct',
        'Similarity', 'SimpleString', 'SortedSetOfString', 'Sound', 'Speaker',
        'Spectrogram', 'Spectrum', 'SpectrumTier', 'SpeechSynthesizer', 'SpellingChecker',
        'Strings', 'StringsIndex', 'Table', 'TableOfReal', 'TextGrid', 'TextInterval',
        'TextPoint', 'TextTier', 'Tier', 'Transition', 'VocalTract', 'VocalTractTier',
        'Weight', 'WordList',
    )

    variables_numeric = (
        'macintosh', 'windows', 'unix', 'praatVersion', 'pi', 'e', 'undefined',
    )

    variables_string = (
        'praatVersion', 'tab', 'shellDirectory', 'homeDirectory',
        'preferencesDirectory', 'newline', 'temporaryDirectory',
        'defaultDirectory',
    )

    object_attributes = (
        'ncol', 'nrow', 'xmin', 'ymin', 'xmax', 'ymax', 'nx', 'ny', 'dx', 'dy',
    )

    tokens = {
        'root': [
            (r'(\s+)(#.*?$)',  bygroups(Text, Comment.Single)),
            (r'^#.*?$',        Comment.Single),
            (r';[^\n]*',       Comment.Single),
            (r'\s+',           Text),

            (r'\bprocedure\b', Keyword,       'procedure_definition'),
            (r'\bcall\b',      Keyword,       'procedure_call'),
            (r'@',             Name.Function, 'procedure_call'),

            include('function_call'),

            (words(keywords, suffix=r'\b'), Keyword),

            (r'(\bform\b)(\s+)([^\n]+)',
             bygroups(Keyword, Text, String), 'old_form'),

            (r'(print(?:line|tab)?|echo|exit|asserterror|pause|send(?:praat|socket)|'
             r'include|execute|system(?:_nocheck)?)(\s+)',
             bygroups(Keyword, Text), 'string_unquoted'),

            (r'(goto|label)(\s+)(\w+)', bygroups(Keyword, Text, Name.Label)),

            include('variable_name'),
            include('number'),

            (r'"', String, 'string'),

            (words((objects), suffix=r'(?=\s+\S+\n)'), Name.Class, 'string_unquoted'),

            (r'\b[A-Z]', Keyword, 'command'),
            (r'(\.{3}|[)(,])', Punctuation),
        ],
        'command': [
            (r'( ?[\w()-]+ ?)', Keyword),

            include('string_interpolated'),

            (r'\.{3}', Keyword, ('#pop', 'old_arguments')),
            (r':', Keyword, ('#pop', 'comma_list')),
            (r'\s', Text, '#pop'),
        ],
        'procedure_call': [
            (r'\s+', Text),
            (r'([\w.]+)(:|\s*\()',
             bygroups(Name.Function, Text), '#pop'),
            (r'([\w.]+)', Name.Function, ('#pop', 'old_arguments')),
        ],
        'procedure_definition': [
            (r'\s', Text),
            (r'([\w.]+)(\s*?[(:])',
             bygroups(Name.Function, Text), '#pop'),
            (r'([\w.]+)([^\n]*)',
             bygroups(Name.Function, Text), '#pop'),
        ],
        'function_call': [
            (words(functions_string, suffix=r'\$(?=\s*[:(])'), Name.Function, 'function'),
            (words(functions_array, suffix=r'#(?=\s*[:(])'),   Name.Function, 'function'),
            (words(functions_numeric, suffix=r'(?=\s*[:(])'),  Name.Function, 'function'),
        ],
        'function': [
            (r'\s+',   Text),
            (r':',     Punctuation, ('#pop', 'comma_list')),
            (r'\s*\(', Punctuation, ('#pop', 'comma_list')),
        ],
        'comma_list': [
            (r'(\s*\n\s*)(\.{3})', bygroups(Text, Punctuation)),

            (r'(\s*[])\n])', Text, '#pop'),

            (r'\s+', Text),
            (r'"',   String, 'string'),
            (r'\b(if|then|else|fi|endif)\b', Keyword),

            include('function_call'),
            include('variable_name'),
            include('operator'),
            include('number'),

            (r'[()]', Text),
            (r',', Punctuation),
        ],
        'old_arguments': [
            (r'\n', Text, '#pop'),

            include('variable_name'),
            include('operator'),
            include('number'),

            (r'"', String, 'string'),
            (r'[^\n]', Text),
        ],
        'number': [
            (r'\n', Text, '#pop'),
            (r'\b\d+(\.\d*)?([eE][-+]?\d+)?%?', Number),
        ],
        'object_reference': [
            include('string_interpolated'),
            (r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin),

            (words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'),

            (r'\$', Name.Builtin),
            (r'\[', Text, '#pop'),
        ],
        'variable_name': [
            include('operator'),
            include('number'),

            (words(variables_string,  suffix=r'\$'), Name.Variable.Global),
            (words(variables_numeric,
             suffix=r'(?=[^a-zA-Z0-9_."\'$#\[:(]|\s|^|$)'),
             Name.Variable.Global),

            (words(objects, prefix=r'\b', suffix=r"(_)"),
             bygroups(Name.Builtin, Name.Builtin),
             'object_reference'),

            (r'\.?_?[a-z][\w.]*(\$|#)?', Text),
            (r'[\[\]]', Punctuation, 'comma_list'),

            include('string_interpolated'),
        ],
        'operator': [
            (r'([+\/*<>=!-]=?|[&*|][&*|]?|\^|<>)',       Operator),
            (r'(?<![\w.])(and|or|not|div|mod)(?![\w.])', Operator.Word),
        ],
        'string_interpolated': [
            (r'\'[_a-z][^\[\]\'":]*(\[([\d,]+|"[\w,]+")\])?(:[0-9]+)?\'',
             String.Interpol),
        ],
        'string_unquoted': [
            (r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),

            (r'\n',       Text,            '#pop'),
            (r'\s',       Text),

            include('string_interpolated'),

            (r"'",        String),
            (r"[^'\n]+",  String),
        ],
        'string': [
            (r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),

            (r'"',          String,          '#pop'),

            include('string_interpolated'),

            (r"'",          String),
            (r'[^\'"\n]+',  String),
        ],
        'old_form': [
            (r'(\s+)(#.*?$)',  bygroups(Text, Comment.Single)),
            (r'\s+', Text),

            (r'(optionmenu|choice)([ \t]+\S+:[ \t]+)',
             bygroups(Keyword, Text), 'number'),

            (r'(option|button)([ \t]+)',
             bygroups(Keyword, Text), 'string_unquoted'),

            (r'(sentence|text)([ \t]+\S+)',
             bygroups(Keyword, Text), 'string_unquoted'),

            (r'(word)([ \t]+\S+[ \t]*)(\S+)?([ \t]+.*)?',
             bygroups(Keyword, Text, String, Text)),

            (r'(boolean)(\s+\S+\s*)(0|1|"?(?:yes|no)"?)',
             bygroups(Keyword, Text, Name.Variable)),

            # Ideally processing of the number would happend in the 'number'
            # but that doesn't seem to work
            (r'(real|natural|positive|integer)([ \t]+\S+[ \t]*)([+-]?)(\d+(?:\.\d*)?'
             r'(?:[eE][-+]?\d+)?%?)',
             bygroups(Keyword, Text, Operator, Number)),

            (r'(comment)(\s+)',
             bygroups(Keyword, Text), 'string_unquoted'),

            (r'\bendform\b', Keyword, '#pop'),
        ]
    }
Exemple #29
0
class SlimLexer(ExtendedRegexLexer):
    """
    For Slim markup.

    .. versionadded:: 2.0
    """

    name = 'Slim'
    aliases = ['slim']
    filenames = ['*.slim']
    mimetypes = ['text/x-slim']

    flags = re.IGNORECASE
    _dot = r'(?: \|\n(?=.* \|)|.)'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'([ \t]*==?)(.*\n)', bygroups(Punctuation,
                                            using(RubyLexer)), 'root'),
            (r'[ \t]+[\w:-]+(?==)', Name.Attribute, 'html-attributes'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'[\w:-]+:[ \t]*\n', Text, 'plain'),
            (r'(-)(.*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r'\|' + _dot + r'*\n', _starts_block(Text, 'plain'), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'slim-comment-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            (r'[ \t]+\n', Punctuation, '#pop:2'),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(.*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'=', Punctuation),
            (r'"[^"]+"', using(RubyLexer), 'tag'),
            (r'\'[^\']+\'', using(RubyLexer), 'tag'),
            (r'\w+', Text, 'tag'),
        ],
        'slim-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
    }
Exemple #30
0
class CssLexer(RegexLexer):
    """
    For CSS (Cascading Style Sheets).
    """

    name = 'CSS'
    aliases = ['css']
    filenames = ['*.css']
    mimetypes = ['text/css']

    tokens = {
        'root': [
            include('basics'),
        ],
        'basics': [(r'\s+', Text), (r'/\*(?:.|\n)*?\*/', Comment),
                   (r'\{', Punctuation, 'content'),
                   (r'(\:{1,2})([\w-]+)', bygroups(Punctuation,
                                                   Name.Decorator)),
                   (r'(\.)([\w-]+)', bygroups(Punctuation, Name.Class)),
                   (r'(\#)([\w-]+)', bygroups(Punctuation, Name.Namespace)),
                   (r'(@)([\w-]+)', bygroups(Punctuation, Keyword), 'atrule'),
                   (r'[\w-]+', Name.Tag),
                   (r'[~^*!%&$\[\]()<>|+=@:;,./?-]', Operator),
                   (r'"(\\\\|\\"|[^"])*"', String.Double),
                   (r"'(\\\\|\\'|[^'])*'", String.Single)],
        'atrule': [
            (r'\{', Punctuation, 'atcontent'),
            (r';', Punctuation, '#pop'),
            include('basics'),
        ],
        'atcontent': [
            include('basics'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'content': [
            (r'\s+', Text),
            (r'\}', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'^@.*?$', Comment.Preproc),
            (words(_vendor_prefixes, ), Keyword.Pseudo),
            (r'(' + r'|'.join(_css_properties) + r')(\s*)(\:)',
             bygroups(Keyword, Text, Punctuation), 'value-start'),
            (r'([a-zA-Z_][\w-]*)(\s*)(\:)', bygroups(Name, Text, Punctuation),
             'value-start'),
            (r'/\*(?:.|\n)*?\*/', Comment),
        ],
        'value-start': [
            (r'\s+', Text),
            (words(_vendor_prefixes, ), Name.Builtin.Pseudo),
            include('urls'),
            (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation),
             'function-start'),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),
            # for transition-property etc.
            (words(_css_properties, suffix=r'\b'), Keyword),
            (r'\!important', Comment.Preproc),
            (r'/\*(?:.|\n)*?\*/', Comment),
            include('numeric-values'),
            (r'[~^*!%&<>|+=@:./?-]+', Operator),
            (r'[\[\](),]+', Punctuation),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[a-zA-Z_][\w-]*', Name),
            (r';', Punctuation, '#pop'),
            (r'\}', Punctuation, '#pop:2'),
        ],
        'function-start': [
            (r'\s+', Text),
            include('urls'),
            (words(_vendor_prefixes, ), Keyword.Pseudo),
            (words(_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_other_keyword_values, suffix=r'\b'), Keyword.Constant),
            (words(_color_keywords, suffix=r'\b'), Keyword.Constant),

            # function-start may be entered recursively
            (r'(' + r'|'.join(_functional_notation_keyword_values) + r')(\()',
             bygroups(Name.Builtin, Punctuation), 'function-start'),
            (r'([a-zA-Z_][\w-]+)(\()', bygroups(Name.Function, Punctuation),
             'function-start'),
            (r'/\*(?:.|\n)*?\*/', Comment),
            include('numeric-values'),
            (r'[*+/-]', Operator),
            (r'[,]', Punctuation),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[a-zA-Z_-]\w*', Name),
            (r'\)', Punctuation, '#pop'),
        ],
        'urls': [
            (r'(url)(\()(".*?")(\))',
             bygroups(Name.Builtin, Punctuation, String.Double, Punctuation)),
            (r"(url)(\()('.*?')(\))",
             bygroups(Name.Builtin, Punctuation, String.Single, Punctuation)),
            (r'(url)(\()(.*?)(\))',
             bygroups(Name.Builtin, Punctuation, String.Other, Punctuation)),
        ],
        'numeric-values': [
            (r'\#[a-zA-Z0-9]{1,6}', Number.Hex),
            (r'[+\-]?[0-9]*[.][0-9]+', Number.Float, 'numeric-end'),
            (r'[+\-]?[0-9]+', Number.Integer, 'numeric-end'),
        ],
        'numeric-end': [
            (words(_all_units, suffix=r'\b'), Keyword.Type),
            (r'%', Keyword.Type),
            default('#pop'),
        ],
    }