Ejemplo n.º 1
0
class GasLexer(RegexLexer):
    """
    For Gas (AT&T) assembly code.
    """
    name = 'GAS'
    aliases = ['gas']
    filenames = ['*.s', '*.S']
    mimetypes = ['text/x-gas']

    #: optional Comment or Whitespace
    string = r'"(\\"|[^"])*"'
    char = r'[a-zA-Z$._0-9@]'
    identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
    number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'

    tokens = {
        'root': [
            include('whitespace'), (identifier + ':', Name.Label),
            (r'\.' + identifier, Name.Attribute, 'directive-args'),
            (r'lock|rep(n?z)?|data\d+', Name.Attribute),
            (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text)
        ],
        'directive-args': [(identifier, Name.Constant), (string, String),
                           ('@' + identifier, Name.Attribute),
                           (number, Number.Integer),
                           (r'[\r\n]+', Text, '#pop'),
                           (r'#.*?$', Comment, '#pop'),
                           include('punctuation'),
                           include('whitespace')],
        'instruction-args': [
            # For objdump-disassembled code, shouldn't occur in
            # actual assembler input
            ('([a-z0-9]+)( )(<)(' + identifier + ')(>)',
             bygroups(Number.Hex, Text, Punctuation, Name.Constant,
                      Punctuation)),
            ('([a-z0-9]+)( )(<)(' + identifier + ')([-+])(' + number + ')(>)',
             bygroups(Number.Hex, Text, Punctuation, Name.Constant,
                      Punctuation, Number.Integer, Punctuation)),

            # Address constants
            (identifier, Name.Constant),
            (number, Number.Integer),
            # Registers
            ('%' + identifier, Name.Variable),
            # Numeric constants
            ('$' + number, Number.Integer),
            (r'[\r\n]+', Text, '#pop'),
            (r'#.*?$', Comment, '#pop'),
            include('punctuation'),
            include('whitespace')
        ],
        'whitespace': [(r'\n', Text), (r'\s+', Text), (r'#.*?\n', Comment)],
        'punctuation': [(r'[-*,.():]+', Punctuation)]
    }

    def analyse_text(text):
        return re.match(r'^\.\w+', text, re.M)
Ejemplo n.º 2
0
class BooLexer(RegexLexer):
    """
    For `Boo <http://boo.codehaus.org/>`_ source code.
    """

    name = 'Boo'
    aliases = ['boo']
    filenames = ['*.boo']
    mimetypes = ['text/x-boo']

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(#|//).*$', Comment.Single),
            (r'/[*]', Comment.Multiline, 'comment'),
            (r'[]{}:(),.;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'/(\\\\|\\/|[^/\s])/', String.Regex),
            (r'@/(\\\\|\\/|[^/])*/', String.Regex),
            (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
            (r'(as|abstract|callable|constructor|destructor|do|import|'
             r'enum|event|final|get|interface|internal|of|override|'
             r'partial|private|protected|public|return|set|static|'
             r'struct|transient|virtual|yield|super|and|break|cast|'
             r'continue|elif|else|ensure|except|for|given|goto|if|in|'
             r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|'
             r'while|from|as)\b', Keyword),
            (r'def(?=\s+\(.*?\))', Keyword),
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|'
             r'assert|checked|enumerate|filter|getter|len|lock|map|'
             r'matrix|max|min|normalArrayIndexing|print|property|range|'
             r'rawArrayIndexing|required|typeof|unchecked|using|'
             r'yieldAll|zip)\b', Name.Builtin),
            ('"""(\\\\|\\"|.*?)"""', String.Double),
            ('"(\\\\|\\"|[^"]*?)"', String.Double),
            ("'(\\\\|\\'|[^']*?)'", String.Single),
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
            (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number),
            (r'0\d+', Number.Oct),
            (r'0x[a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+', Number.Integer),
        ],
        'comment': [('/[*]', Comment.Multiline, '#push'),
                    ('[*]/', Comment.Multiline, '#pop'),
                    ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline)],
        'funcname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')],
        'classname': [('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')],
        'namespace': [('[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop')]
    }
Ejemplo n.º 3
0
class ObjdumpLexer(RegexLexer):
    """
    For the output of 'objdump -dr'
    """
    name = 'objdump'
    aliases = ['objdump']
    filenames = ['*.objdump']
    mimetypes = ['text/x-objdump']

    hex = r'[0-9A-Za-z]'

    tokens = {
        'root': [
            # File name & format:
            ('(.*?)(:)( +file format )(.*?)$',
             bygroups(Name.Label, Punctuation, Text, String)),
            # Section header
            ('(Disassembly of section )(.*?)(:)$',
             bygroups(Text, Name.Label, Punctuation)),
            # Function labels
            # (With offset)
            ('(' + hex + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation, Number.Hex, Punctuation)),
            # (Without offset)
            ('(' + hex + '+)( )(<)(.*?)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation)),
            # Code line with disassembled instructions
            ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex +
             ' )+)( *\t)([a-zA-Z].*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text,
                      using(GasLexer))),
            # Code line with ascii
            ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)( *)(.*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
            # Continued code line, only raw opcodes without disassembled
            # instruction
            ('( *)(' + hex + r'+:)(\t)((?:' + hex + hex + ' )+)$',
             bygroups(Text, Name.Label, Text, Number.Hex)),
            # Skipped a few bytes
            ('\t\.\.\.$', Text),
            # Relocation line
            # (With offset)
            ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex +
             '+)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant, Punctuation, Number.Hex)),
            # (Without offset)
            ('(\t\t\t)(' + hex + '+:)( )([^\t]+)(\t)(.*?)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant)),
            ('[^\n]+\n', Other)
        ]
    }
Ejemplo n.º 4
0
class NasmLexer(RegexLexer):
    """
    For Nasm (Intel) assembly code.
    """
    name = 'NASM'
    aliases = ['nasm']
    filenames = ['*.asm', '*.ASM']
    mimetypes = ['text/x-nasm']

    identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*'
    hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)'
    octn = r'[0-7]+q'
    binn = r'[01]+b'
    decn = r'[0-9]+'
    floatn = decn + r'\.e?' + decn
    string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'"
    declkw = r'(?:res|d)[bwdqt]|times'
    register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|'
                r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
    wordop = r'seg|wrt|strict'
    type = r'byte|[dq]?word'
    directives = (
        r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
        r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
        r'EXPORT|LIBRARY|MODULE')

    flags = re.IGNORECASE | re.MULTILINE
    tokens = {
        'root': [
            include('whitespace'), (r'^\s*%', Comment.Preproc, 'preproc'),
            (identifier + ':', Name.Label),
            (r'(%s)(\s+)(equ)' % identifier,
             bygroups(Name.Constant, Keyword.Declaration,
                      Keyword.Declaration), 'instruction-args'),
            (directives, Keyword, 'instruction-args'),
            (declkw, Keyword.Declaration, 'instruction-args'),
            (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text)
        ],
        'instruction-args': [(string, String), (hexn, Number.Hex),
                             (octn, Number.Oct), (binn, Number),
                             (floatn, Number.Float), (decn, Number.Integer),
                             include('punctuation'), (register, Name.Builtin),
                             (identifier, Name.Variable),
                             (r'[\r\n]+', Text, '#pop'),
                             include('whitespace')],
        'preproc': [
            (r'[^;\n]+', Comment.Preproc),
            (r';.*?\n', Comment.Single, '#pop'),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'whitespace': [(r'\n', Text), (r'[ \t]+', Text),
                       (r';.*', Comment.Single)],
        'punctuation': [(r'[,():\[\]]+', Punctuation),
                        (r'[&|^<>+*/%~-]+', Operator),
                        (r'[$]+', Keyword.Constant), (wordop, Operator.Word),
                        (type, Keyword.Type)],
    }
Ejemplo n.º 5
0
class GenericAspxLexer(RegexLexer):
    """
    Lexer for ASP.NET pages.
    """

    name = 'aspx-gen'
    filenames = []
    mimetypes = []

    flags = re.DOTALL

    tokens = {
        'root': [
            (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)),
            (r'(<script.*?>)(.*?)(</script>)',
             bygroups(using(XmlLexer), Other, using(XmlLexer))),
            (r'(.+?)(?=<)', using(XmlLexer)),
            (r'.+', using(XmlLexer)),
        ],
    }
Ejemplo n.º 6
0
class MatlabLexer(RegexLexer):
    """
    For Matlab (or GNU Octave) source code.
    Contributed by Ken Schutte <*****@*****.**>.

    *New in Pygments 0.10.*
    """
    name = 'Matlab'
    aliases = ['matlab', 'octave']
    filenames = ['*.m']
    mimetypes = ['text/matlab']

    #
    # These lists are generated automatically.
    # Run the following in bash shell:
    #
    # for f in elfun specfun elmat; do
    #   echo -n "$f = "
    #   matlab -nojvm -r "help $f;exit;" | perl -ne \
    #   'push(@c,$1) if /^    (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}'
    # done
    #
    # elfun: Elementary math functions
    # specfun: Special Math functions
    # elmat: Elementary matrices and matrix manipulation
    #
    # taken from Matlab version 7.4.0.336 (R2007a)
    #
    elfun = ["sin","sind","sinh","asin","asind","asinh","cos","cosd","cosh",
             "acos","acosd","acosh","tan","tand","tanh","atan","atand","atan2",
             "atanh","sec","secd","sech","asec","asecd","asech","csc","cscd",
             "csch","acsc","acscd","acsch","cot","cotd","coth","acot","acotd",
             "acoth","hypot","exp","expm1","log","log1p","log10","log2","pow2",
             "realpow","reallog","realsqrt","sqrt","nthroot","nextpow2","abs",
             "angle","complex","conj","imag","real","unwrap","isreal","cplxpair",
             "fix","floor","ceil","round","mod","rem","sign"]
    specfun = ["airy","besselj","bessely","besselh","besseli","besselk","beta",
               "betainc","betaln","ellipj","ellipke","erf","erfc","erfcx",
               "erfinv","expint","gamma","gammainc","gammaln","psi","legendre",
               "cross","dot","factor","isprime","primes","gcd","lcm","rat",
               "rats","perms","nchoosek","factorial","cart2sph","cart2pol",
               "pol2cart","sph2cart","hsv2rgb","rgb2hsv"]
    elmat = ["zeros","ones","eye","repmat","rand","randn","linspace","logspace",
             "freqspace","meshgrid","accumarray","size","length","ndims","numel",
             "disp","isempty","isequal","isequalwithequalnans","cat","reshape",
             "diag","blkdiag","tril","triu","fliplr","flipud","flipdim","rot90",
             "find","end","sub2ind","ind2sub","bsxfun","ndgrid","permute",
             "ipermute","shiftdim","circshift","squeeze","isscalar","isvector",
             "ans","eps","realmax","realmin","pi","i","inf","nan","isnan",
             "isinf","isfinite","j","why","compan","gallery","hadamard","hankel",
             "hilb","invhilb","magic","pascal","rosser","toeplitz","vander",
             "wilkinson"]

    tokens = {
        'root': [
            # line starting with '!' is sent as a system command.  not sure what
            # label to use...
            (r'^!.*', String.Other),
            (r'%.*$', Comment),
            (r'^\s*function', Keyword, 'deffunc'),

            # from 'iskeyword' on version 7.11 (R2010):
            (r'(break|case|catch|classdef|continue|else|elseif|end|enumerated|'
             r'events|for|function|global|if|methods|otherwise|parfor|'
             r'persistent|properties|return|spmd|switch|try|while)\b', Keyword),

            ("(" + "|".join(elfun+specfun+elmat) + r')\b',  Name.Builtin),

            # operators:
            (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator),
            # operators requiring escape for re:
            (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator),

            # punctuation:
            (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
            (r'=|:|;', Punctuation),

            # quote can be transpose, instead of string:
            # (not great, but handles common cases...)
            (r'(?<=[\w\)\]])\'', Operator),

            (r'(?<![\w\)\]])\'', String, 'string'),
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
            (r'.', Text),
        ],
        'string': [
            (r'[^\']*\'', String, '#pop')
        ],
        'deffunc': [
            (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
             bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation,
                      Text.Whitespace, Name.Function, Punctuation, Text,
                      Punctuation, Text.Whitespace), '#pop'),
        ],
    }

    def analyse_text(text):
        if re.match('^\s*%', text, re.M): # comment
            return 0.9
        elif re.match('^!\w+', text, re.M): # system cmd
            return 0.9
        return 0.1
Ejemplo n.º 7
0
class MuPADLexer(RegexLexer):
    """
    A `MuPAD <http://www.mupad.com>`_ lexer.
    Contributed by Christopher Creutzig <*****@*****.**>.

    *New in Pygments 0.8.*
    """
    name = 'MuPAD'
    aliases = ['mupad']
    filenames = ['*.mu']

    tokens = {
      'root' : [
        (r'//.*?$', Comment.Single),
        (r'/\*', Comment.Multiline, 'comment'),
        (r'"(?:[^"\\]|\\.)*"', String),
        (r'\(|\)|\[|\]|\{|\}', Punctuation),
        (r'''(?x)\b(?:
            next|break|end|
            axiom|end_axiom|category|end_category|domain|end_domain|inherits|
            if|%if|then|elif|else|end_if|
            case|of|do|otherwise|end_case|
            while|end_while|
            repeat|until|end_repeat|
            for|from|to|downto|step|end_for|
            proc|local|option|save|begin|end_proc|
            delete|frame
          )\b''', Keyword),
        (r'''(?x)\b(?:
            DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR|
            DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT|
            DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC|
            DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR
          )\b''', Name.Class),
        (r'''(?x)\b(?:
            PI|EULER|E|CATALAN|
            NIL|FAIL|undefined|infinity|
            TRUE|FALSE|UNKNOWN
          )\b''',
          Name.Constant),
        (r'\b(?:dom|procname)\b', Name.Builtin.Pseudo),
        (r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator),
        (r'''(?x)\b(?:
            and|or|not|xor|
            assuming|
            div|mod|
            union|minus|intersect|in|subset
          )\b''',
          Operator.Word),
        (r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number),
        #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin),
        (r'''(?x)
          ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
          (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''',
          bygroups(Name.Function, Punctuation)),
        (r'''(?x)
          (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
          (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable),
        (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number),
        (r'\.[0-9]+(?:e[0-9]+)?', Number),
        (r'.', Text)
      ],
      'comment' : [
        (r'[^*/]', Comment.Multiline),
        (r'/\*', Comment.Multiline, '#push'),
        (r'\*/', Comment.Multiline, '#pop'),
        (r'[*/]', Comment.Multiline)
      ]
    }
Ejemplo n.º 8
0
class AntlrLexer(RegexLexer):
    """
    Generic `ANTLR`_ Lexer.
    Should not be called directly, instead
    use DelegatingLexer for your target language.

    *New in Pygments 1.1.*

    .. _ANTLR: http://www.antlr.org/
    """

    name = 'ANTLR'
    aliases = ['antlr']
    filenames = []

    _id = r'[A-Za-z][A-Za-z_0-9]*'
    _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*'
    _RULE_REF = r'[a-z][A-Za-z_0-9]*'
    _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
    _INT = r'[0-9]+'

    tokens = {
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'comments': [
            (r'//.*$', Comment),
            (r'/\*(.|\n)*?\*/', Comment),
        ],
        'root': [
            include('whitespace'),
            include('comments'),

            (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
                      Punctuation)),
            # optionsSpec
            (r'options\b', Keyword, 'options'),
            # tokensSpec
            (r'tokens\b', Keyword, 'tokens'),
            # attrScope
            (r'(scope)(\s*)(' + _id + ')(\s*)({)',
             bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
                      Punctuation), 'action'),
            # exception
            (r'(catch|finally)\b', Keyword, 'exception'),
            # action
            (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)',
             bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
                      Name.Label, Whitespace, Punctuation), 'action'),
            # rule
            (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \
             bygroups(Keyword, Whitespace, Name.Label, Punctuation),
             ('rule-alts', 'rule-prelims')),
        ],
        'exception': [
            (r'\n', Whitespace, '#pop'),
            (r'\s', Whitespace),
            include('comments'),

            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
        ],
        'rule-prelims': [
            include('whitespace'),
            include('comments'),

            (r'returns\b', Keyword),
            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
            # throwsSpec
            (r'(throws)(\s+)(' + _id + ')',
             bygroups(Keyword, Whitespace, Name.Label)),
            (r'(?:(,)(\s*)(' + _id + '))+',
             bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
            # optionsSpec
            (r'options\b', Keyword, 'options'),
            # ruleScopeSpec - scope followed by target language code or name of action
            # TODO finish implementing other possibilities for scope
            # L173 ANTLRv3.g from ANTLR book
            (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation),
            'action'),
            (r'(scope)(\s+)(' + _id + ')(\s*)(;)',
             bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
            # ruleAction
            (r'(@' + _id + ')(\s*)({)',
             bygroups(Name.Label, Whitespace, Punctuation), 'action'),
            # finished prelims, go to rule alts!
            (r':', Punctuation, '#pop')
        ],
        'rule-alts': [
            include('whitespace'),
            include('comments'),

            # These might need to go in a separate 'block' state triggered by (
            (r'options\b', Keyword, 'options'),
            (r':', Punctuation),

            # literals
            (r"'(\\\\|\\'|[^'])*'", String),
            (r'"(\\\\|\\"|[^"])*"', String),
            (r'<<([^>]|>[^>])>>', String),
            # identifiers
            # Tokens start with capital letter.
            (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant),
            # Rules start with small letter.
            (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable),
            # operators
            (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
            (r',', Punctuation),
            (r'\[', Punctuation, 'nested-arg-action'),
            (r'\{', Punctuation, 'action'),
            (r';', Punctuation, '#pop')
        ],
        'tokens': [
            include('whitespace'),
            include('comments'),
            (r'{', Punctuation),
            (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
             + ')?(\s*)(;)',
             bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
                      String, Whitespace, Punctuation)),
            (r'}', Punctuation, '#pop'),
        ],
        'options': [
            include('whitespace'),
            include('comments'),
            (r'{', Punctuation),
            (r'(' + _id + r')(\s*)(=)(\s*)(' +
             '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)',
             bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
                      Text, Whitespace, Punctuation)),
            (r'}', Punctuation, '#pop'),
        ],
        'action': [
            (r'(' + r'|'.join(( # keep host code in largest possible chunks
                r'[^\${}\'"/\\]+', # exclude unsafe characters

                # strings and comments may safely contain unsafe characters
                r'"(\\\\|\\"|[^"])*"', # double quote string
                r"'(\\\\|\\'|[^'])*'", # single quote string
                r'//.*$\n?', # single line comment
                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment

                # regular expression: There's no reason for it to start
                # with a * and this stops confusion with comments.
                r'/(?!\*)(\\\\|\\/|[^/])*/',

                # backslashes are okay, as long as we are not backslashing a %
                r'\\(?!%)',

                # Now that we've handled regex and javadoc comments
                # it's safe to let / through.
                r'/',
            )) + r')+', Other),
            (r'(\\)(%)', bygroups(Punctuation, Other)),
            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
             bygroups(Name.Variable, Punctuation, Name.Property)),
            (r'{', Punctuation, '#push'),
            (r'}', Punctuation, '#pop'),
        ],
        'nested-arg-action': [
            (r'(' + r'|'.join(( # keep host code in largest possible chunks.
                r'[^\$\[\]\'"/]+', # exclude unsafe characters

                # strings and comments may safely contain unsafe characters
                r'"(\\\\|\\"|[^"])*"', # double quote string
                r"'(\\\\|\\'|[^'])*'", # single quote string
                r'//.*$\n?', # single line comment
                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment

                # regular expression: There's no reason for it to start
                # with a * and this stops confusion with comments.
                r'/(?!\*)(\\\\|\\/|[^/])*/',

                # Now that we've handled regex and javadoc comments
                # it's safe to let / through.
                r'/',
            )) + r')+', Other),


            (r'\[', Punctuation, '#push'),
            (r'\]', Punctuation, '#pop'),
            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
             bygroups(Name.Variable, Punctuation, Name.Property)),
            (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
        ]
    }

    def analyse_text(text):
        return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
Ejemplo n.º 9
0
class RagelEmbeddedLexer(RegexLexer):
    """
    A lexer for `Ragel`_ embedded in a host language file.

    This will only highlight Ragel statements. If you want host language
    highlighting then call the language-specific Ragel lexer.

    *New in Pygments 1.1.*
    """

    name = 'Embedded Ragel'
    aliases = ['ragel-em']
    filenames = ['*.rl']

    tokens = {
        'root': [
            (
                r'(' + r'|'.join((  # keep host code in largest possible chunks
                    r'[^%\'"/#]+',  # exclude unsafe characters
                    r'%(?=[^%]|$)',  # a single % sign is okay, just not 2 of them

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                    r'/(?!\*)(\\\\|\\/|[^/])*/',  # regular expression

                    # / is safe now that we've handled regex and javadoc comments
                    r'/',
                )) + r')+',
                Other),

            # Single Line FSM.
            # Please don't put a quoted newline in a single line FSM.
            # That's just mean. It will break this.
            (r'(%%)(?![{%])(.*)($|;)(\n?)',
             bygroups(Punctuation, using(RagelLexer), Punctuation, Text)),

            # Multi Line FSM.
            (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
        ],
        'multi-line-fsm': [
            (
                r'(' +
                r'|'.join((  # keep ragel code in largest possible chunks.
                    r'(' + r'|'.join((
                        r'[^}\'"\[/#]',  # exclude unsafe characters
                        r'}(?=[^%]|$)',  # } is okay as long as it's not followed by %
                        r'}%(?=[^%]|$)',  # ...well, one %'s okay, just not two...
                        r'[^\\][\\][{}]',  # ...and } is okay if it's escaped

                        # allow / if it's preceded with one of these symbols
                        # (ragel EOF actions)
                        r'(>|\$|%|<|@|<>)/',

                        # specifically allow regex followed immediately by *
                        # so it doesn't get mistaken for a comment
                        r'/(?!\*)(\\\\|\\/|[^/])*/\*',

                        # allow / as long as it's not followed by another / or by a *
                        r'/(?=[^/\*]|$)',

                        # We want to match as many of these as we can in one block.
                        # Not sure if we need the + sign here,
                        # does it help performance?
                    )) + r')+',

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r"\[(\\\\|\\\]|[^\]])*\]",  # square bracket literal
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                )) + r')+',
                using(RagelLexer)),
            (r'}%%', Punctuation, '#pop'),
        ]
    }

    def analyse_text(text):
        return '@LANG: indep' in text or 0.1
Ejemplo n.º 10
0
class CSharpLexer(RegexLexer):
    """
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

      *New in Pygments 0.8.*
    """

    name = 'C#'
    aliases = ['csharp', 'c#']
    filenames = ['*.cs']
    mimetypes = ['text/x-csharp']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers,
    # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        '@?[_a-zA-Z][a-zA-Z0-9_]*',
        'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
                  '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd +
                  uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
        'full': ('@?(?:_|[^' +
                 _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) +
                 '])' + '[^' + _escape(
                     uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                   'Pc', 'Cf', 'Mn', 'Mc')) + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in levels.items():
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*](.|\n)*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(\\\\|\\"|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|base|break|case|catch|'
                 r'checked|const|continue|default|delegate|'
                 r'do|else|enum|event|explicit|extern|false|finally|'
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
                 r'internal|is|lock|new|null|operator|'
                 r'out|override|params|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|stackalloc|static|'
                 r'switch|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|add|remove|value)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
                 r'short|string|uint|ulong|ushort)\b\??', Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [(cs_ident, Name.Class, '#pop')],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Ejemplo n.º 11
0
class VbNetLexer(RegexLexer):
    """
    For
    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
    source code.
    """

    name = 'VB.net'
    aliases = ['vb.net', 'vbnet']
    filenames = ['*.vb', '*.bas']
    mimetypes = ['text/x-vbnet', 'text/x-vba']  # (?)

    flags = re.MULTILINE | re.IGNORECASE
    tokens = {
        'root': [
            (r'^\s*<.*?>', Name.Attribute),
            (r'\s+', Text),
            (r'\n', Text),
            (r'rem\b.*?\n', Comment),
            (r"'.*?\n", Comment),
            (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#End\s+If|#Const|'
             r'#ExternalSource.*?\n|#End\s+ExternalSource|'
             r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc),
            (r'[\(\){}!#,.:]', Punctuation),
            (r'Option\s+(Strict|Explicit|Compare)\s+'
             r'(On|Off|Binary|Text)', Keyword.Declaration),
            (r'(?<!\.)(AddHandler|Alias|'
             r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|'
             r'CDec|CDbl|CInt|CLng|CObj|Continue|CSByte|CShort|'
             r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|'
             r'Default|Delegate|DirectCast|Do|Each|Else|ElseIf|'
             r'EndIf|Erase|Error|Event|Exit|False|Finally|For|'
             r'Friend|Get|Global|GoSub|GoTo|Handles|If|'
             r'Implements|Inherits|Interface|'
             r'Let|Lib|Loop|Me|MustInherit|'
             r'MustOverride|MyBase|MyClass|Narrowing|New|Next|'
             r'Not|Nothing|NotInheritable|NotOverridable|Of|On|'
             r'Operator|Option|Optional|Overloads|Overridable|'
             r'Overrides|ParamArray|Partial|Private|Protected|'
             r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|'
             r'Return|Select|Set|Shadows|Shared|Single|'
             r'Static|Step|Stop|SyncLock|Then|'
             r'Throw|To|True|Try|TryCast|Wend|'
             r'Using|When|While|Widening|With|WithEvents|'
             r'WriteOnly)\b', Keyword),
            (r'(?<!\.)End\b', Keyword, 'end'),
            (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'),
            (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text),
             'funcname'),
            (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text),
             'classname'),
            (r'(?<!\.)(Module|Namespace|Imports)(\s+)',
             bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|'
             r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|'
             r'UShort)\b', Keyword.Type),
            (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|'
             r'Or|OrElse|TypeOf|Xor)\b', Operator.Word),
            (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|'
             r'<=|>=|<>|[-&*/\\^+=<>]', Operator),
            ('"', String, 'string'),
            ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name),
            ('#.*?#', Literal.Date),
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
            (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'_\n', Text),  # Line continuation
        ],
        'string': [
            (r'""', String),
            (r'"C?', String, '#pop'),
            (r'[^"]+', String),
        ],
        'dim': [
            (r'[a-z_][a-z0-9_]*', Name.Variable, '#pop'),
            (r'', Text, '#pop'),  # any other syntax
        ],
        'funcname': [
            (r'[a-z_][a-z0-9_]*', Name.Function, '#pop'),
        ],
        'classname': [
            (r'[a-z_][a-z0-9_]*', Name.Class, '#pop'),
        ],
        'namespace': [
            (r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop'),
        ],
        'end': [
            (r'\s+', Text),
            (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b',
             Keyword, '#pop'),
            (r'', Text, '#pop'),
        ]
    }
Ejemplo n.º 12
0
class VerilogLexer(RegexLexer):
    """
    For verilog source code with preprocessor directives.

    *New in Pygments 1.4.*
    """
    name = 'verilog'
    aliases = ['v']
    filenames = ['*.v', '*.sv']
    mimetypes = ['text/x-verilog']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    tokens = {
        'root': [
            (r'^\s*`define', Comment.Preproc, 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
            (r'[{}#@]', Punctuation),
            (r'L?"', String, 'string'),
            (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'([0-9]+)|(\'h)[0-9a-fA-F]+', Number.Hex),
            (r'([0-9]+)|(\'b)[0-1]+', Number.Hex),  # should be binary
            (r'([0-9]+)|(\'d)[0-9]+', Number.Integer),
            (r'([0-9]+)|(\'o)[0-7]+', Number.Oct),
            (r'\'[01xz]', Number),
            (r'\d+[Ll]?', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.;\']', Punctuation),
            (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant),
            (r'^\s*(package)(\s+)', bygroups(Keyword.Namespace, Text)),
            (r'^\s*(import)(\s+)', bygroups(Keyword.Namespace,
                                            Text), 'import'),
            (r'(always|always_comb|always_ff|always_latch|and|assign|automatic|'
             r'begin|break|buf|bufif0|bufif1|case|casex|casez|cmos|const|'
             r'continue|deassign|default|defparam|disable|do|edge|else|end|endcase|'
             r'endfunction|endgenerate|endselectedModulee|endpackage|endprimitive|endspecify|'
             r'endtable|endtask|enum|event|final|for|force|forever|fork|function|'
             r'generate|genvar|highz0|highz1|if|initial|inout|input|'
             r'integer|join|large|localparam|macroselectedModulee|medium|selectedModulee|'
             r'nand|negedge|nmos|nor|not|notif0|notif1|or|output|packed|'
             r'parameter|pmos|posedge|primitive|pull0|pull1|pulldown|pullup|rcmos|'
             r'ref|release|repeat|return|rnmos|rpmos|rtran|rtranif0|'
             r'rtranif1|scalared|signed|small|specify|specparam|strength|'
             r'string|strong0|strong1|struct|table|task|'
             r'tran|tranif0|tranif1|type|typedef|'
             r'unsigned|var|vectored|void|wait|weak0|weak1|while|'
             r'xnor|xor)\b', Keyword),
            (r'(`accelerate|`autoexpand_vectornets|`celldefine|`default_nettype|'
             r'`else|`elsif|`endcelldefine|`endif|`endprotect|`endprotected|'
             r'`expand_vectornets|`ifdef|`ifndef|`include|`noaccelerate|`noexpand_vectornets|'
             r'`noremove_gatenames|`noremove_netnames|`nounconnected_drive|'
             r'`protect|`protected|`remove_gatenames|`remove_netnames|`resetall|'
             r'`timescale|`unconnected_drive|`undef)\b', Comment.Preproc),
            (r'(\$cits|\$citstoreal|\$citstoshortreal|\$countdrivers|\$display|\$fclose|'
             r'\$fdisplay|\$finish|\$floor|\$fmonitor|\$fopen|\$fstrobe|\$fwrite|'
             r'\$getpattern|\$history|\$incsave|\$input|\$itor|\$key|\$list|\$log|'
             r'\$monitor|\$monitoroff|\$monitoron|\$nokey|\$nolog|\$printtimescale|'
             r'\$random|\$readmemb|\$readmemh|\$realtime|\$realtobits|\$reset|\$reset_count|'
             r'\$reset_value|\$restart|\$rtoi|\$save|\$scale|\$scope|\$shortrealtobits|'
             r'\$showscopes|\$showvariables|\$showvars|\$sreadmemb|\$sreadmemh|'
             r'\$stime|\$stop|\$strobe|\$time|\$timeformat|\$write)\b',
             Name.Builtin),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(byte|shortint|int|longint|interger|time|'
             r'bit|logic|reg|'
             r'supply0|supply1|tri|triand|trior|tri0|tri1|trireg|uwire|wire|wand|wor'
             r'shortreal|real|realtime)\b', Keyword.Type),
            ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
        ],
        'classname': [
            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'import': [(r'[a-zA-Z0-9_:]+\*?', Name.Namespace, '#pop')]
    }

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
            RegexLexer.get_tokens_unprocessed(self, text):
            # Convention: mark all upper case names as constants
            if token is Name:
                if value.isupper():
                    token = Name.Constant
            yield index, token, value
Ejemplo n.º 13
0
class ErlangLexer(RegexLexer):
    """
    For the Erlang functional programming language.

    Blame Jeremy Thurgood (http://jerith.za.net/).

    *New in Pygments 0.9.*
    """

    name = 'Erlang'
    aliases = ['erlang']
    filenames = ['*.erl', '*.hrl']
    mimetypes = ['text/x-erlang']

    keywords = [
        'after',
        'begin',
        'case',
        'catch',
        'cond',
        'end',
        'fun',
        'if',
        'let',
        'of',
        'query',
        'receive',
        'try',
        'when',
    ]

    builtins = [  # See erlang(3) man page
        'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
        'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
        'byte_size', 'cancel_timer', 'check_process_code',
        'delete_selectedModulee', 'demonitor', 'disconnect_node', 'display',
        'element', 'erase', 'exit', 'float', 'float_to_list', 'fun_info',
        'fun_to_list', 'function_exported', 'garbage_collect', 'get',
        'get_keys', 'group_leader', 'hash', 'hd', 'integer_to_list',
        'iolist_to_binary', 'iolist_size', 'is_atom', 'is_binary',
        'is_bitstring', 'is_boolean', 'is_builtin', 'is_float', 'is_function',
        'is_integer', 'is_list', 'is_number', 'is_pid', 'is_port',
        'is_process_alive', 'is_record', 'is_reference', 'is_tuple', 'length',
        'link', 'list_to_atom', 'list_to_binary', 'list_to_bitstring',
        'list_to_existing_atom', 'list_to_float', 'list_to_integer',
        'list_to_pid', 'list_to_tuple', 'load_selectedModulee',
        'localtime_to_universaltime', 'make_tuple', 'md5', 'md5_final',
        'md5_update', 'memory', 'selectedModulee_loaded', 'monitor',
        'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
        'pid_to_list', 'port_close', 'port_command', 'port_connect',
        'port_control', 'port_call', 'port_info', 'port_to_list',
        'process_display', 'process_flag', 'process_info',
        'purge_selectedModulee', 'put', 'read_timer', 'ref_to_list',
        'register', 'resume_process', 'round', 'send', 'send_after',
        'send_nosuspend', 'set_cookie', 'setelement', 'size', 'spawn',
        'spawn_link', 'spawn_monitor', 'spawn_opt', 'split_binary',
        'start_timer', 'statistics', 'suspend_process', 'system_flag',
        'system_info', 'system_monitor', 'system_profile', 'term_to_binary',
        'tl', 'trace', 'trace_delivered', 'trace_info', 'trace_pattern',
        'trunc', 'tuple_size', 'tuple_to_list', 'universaltime_to_localtime',
        'unlink', 'unregister', 'whereis'
    ]

    operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
    word_operators = [
        'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div',
        'not', 'or', 'orelse', 'rem', 'xor'
    ]

    atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')"

    variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)'

    escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))'

    macro_re = r'(?:' + variable_re + r'|' + atom_re + r')'

    base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'%.*\n', Comment),
            ('(' + '|'.join(keywords) + r')\b', Keyword),
            ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
            ('(' + '|'.join(word_operators) + r')\b', Operator.Word),
            (r'^-', Punctuation, 'directive'),
            (operators, Operator),
            (r'"', String, 'string'),
            (r'<<', Name.Label),
            (r'>>', Name.Label),
            (r'(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
            (r'^(' + atom_re + r')(\s*)(\()',
             bygroups(Name.Function, Text, Punctuation)),
            (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer),
            (r'[+-]?\d+', Number.Integer),
            (r'[+-]?\d+.\d+', Number.Float),
            (r'[]\[:_@\".{}()|;,]', Punctuation),
            (variable_re, Name.Variable),
            (atom_re, Name),
            (r'\?' + macro_re, Name.Constant),
            (r'\$(?:' + escape_re + r'|\\[ %]|[^\\])', String.Char),
            (r'#' + atom_re + r'(:?\.' + atom_re + r')?', Name.Label),
        ],
        'string': [
            (escape_re, String.Escape),
            (r'"', String, '#pop'),
            (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
            (r'[^"\\~]+', String),
            (r'~', String),
        ],
        'directive': [
            (r'(define)(\s*)(\()(' + macro_re + r')',
             bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
            (r'(record)(\s*)(\()(' + macro_re + r')',
             bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
            (atom_re, Name.Entity, '#pop'),
        ],
    }
Ejemplo n.º 14
0
class HaskellLexer(RegexLexer):
    """
    A Haskell lexer based on the lexemes defined in the Haskell 98 Report.

    *New in Pygments 0.8.*
    """
    name = 'Haskell'
    aliases = ['haskell', 'hs']
    filenames = ['*.hs']
    mimetypes = ['text/x-haskell']

    reserved = [
        'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if',
        'in', 'infix[lr]?', 'instance', 'let', 'newtype', 'of', 'then', 'type',
        'where', '_'
    ]
    ascii = [
        'NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 'BEL', 'BS', 'HT', 'LF',
        'VT', 'FF', 'CR', 'S[OI]', 'DLE', 'DC[1-4]', 'NAK', 'SYN', 'ETB',
        'CAN', 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL'
    ]

    tokens = {
        'root': [
            # Whitespace:
            (r'\s+', Text),
            #(r'--\s*|.*$', Comment.Doc),
            (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single),
            (r'{-', Comment.Multiline, 'comment'),
            # Lexemes:
            #  Identifiers
            (r'\bimport\b', Keyword.Reserved, 'import'),
            (r'\bselectedModulee\b', Keyword.Reserved, 'selectedModulee'),
            (r'\berror\b', Name.Exception),
            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
            (r'^[_a-z][\w\']*', Name.Function),
            (r'[_a-z][\w\']*', Name),
            (r'[A-Z][\w\']*', Keyword.Type),
            #  Operators
            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function
             ),  # lambda operator
            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)',
             Operator.Word),  # specials
            (r':[:!#$%&*+.\\/<=>?@^|~-]*',
             Keyword.Type),  # Constructor operators
            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
            #  Numbers
            (r'\d+[eE][+-]?\d+', Number.Float),
            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
            (r'0[oO][0-7]+', Number.Oct),
            (r'0[xX][\da-fA-F]+', Number.Hex),
            (r'\d+', Number.Integer),
            #  Character/String Literals
            (r"'", String.Char, 'character'),
            (r'"', String, 'string'),
            #  Special
            (r'\[\]', Keyword.Type),
            (r'\(\)', Name.Builtin),
            (r'[][(),;`{}]', Punctuation),
        ],
        'import': [
            # Import statements
            (r'\s+', Text),
            (r'"', String, 'string'),
            # after "funclist" state
            (r'\)', Punctuation, '#pop'),
            (r'qualified\b', Keyword),
            # import X as Y
            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)',
             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
            # import X hiding (functions)
            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()',
             bygroups(Name.Namespace, Text, Keyword, Text,
                      Punctuation), 'funclist'),
            # import X (functions)
            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
            # import X
            (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'),
        ],
        'selectedModulee': [
            (r'\s+', Text),
            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
            (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'),
        ],
        'funclist': [
            (r'\s+', Text),
            (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
            (r'--.*$', Comment.Single),
            (r'{-', Comment.Multiline, 'comment'),
            (r',', Punctuation),
            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
            # (HACK, but it makes sense to push two instances, believe me)
            (r'\(', Punctuation, ('funclist', 'funclist')),
            (r'\)', Punctuation, '#pop:2'),
        ],
        'comment': [
            # Multiline Comments
            (r'[^-{}]+', Comment.Multiline),
            (r'{-', Comment.Multiline, '#push'),
            (r'-}', Comment.Multiline, '#pop'),
            (r'[-{}]', Comment.Multiline),
        ],
        'character': [
            # Allows multi-chars, incorrectly.
            (r"[^\\']", String.Char),
            (r"\\", String.Escape, 'escape'),
            ("'", String.Char, '#pop'),
        ],
        'string': [
            (r'[^\\"]+', String),
            (r"\\", String.Escape, 'escape'),
            ('"', String, '#pop'),
        ],
        'escape': [
            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
            (r'\^[][A-Z@\^_]', String.Escape, '#pop'),
            ('|'.join(ascii), String.Escape, '#pop'),
            (r'o[0-7]+', String.Escape, '#pop'),
            (r'x[\da-fA-F]+', String.Escape, '#pop'),
            (r'\d+', String.Escape, '#pop'),
            (r'\s+\\', String.Escape, '#pop'),
        ],
    }
Ejemplo n.º 15
0
class CommonLispLexer(RegexLexer):
    """
    A Common Lisp lexer.

    *New in Pygments 0.9.*
    """
    name = 'Common Lisp'
    aliases = ['common-lisp', 'cl']
    filenames = ['*.cl', '*.lisp', '*.el']  # use for Elisp too
    mimetypes = ['text/x-common-lisp']

    flags = re.IGNORECASE | re.MULTILINE

    ### couple of useful regexes

    # characters that are not macro-characters and can be used to begin a symbol
    nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]'
    constituent = nonmacro + '|[#.:]'
    terminated = r'(?=[ "()\'\n,;`])'  # whitespace or terminating macro characters

    ### symbol token, reverse-engineered from hyperspec
    # Take a deep breath...
    symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)

    def __init__(self, **options):
        from plushcms.syntaxhighlight.pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \
            SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \
            BUILTIN_TYPES, BUILTIN_CLASSES
        self.builtin_function = BUILTIN_FUNCTIONS
        self.special_forms = SPECIAL_FORMS
        self.macros = MACROS
        self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS
        self.declarations = DECLARATIONS
        self.builtin_types = BUILTIN_TYPES
        self.builtin_classes = BUILTIN_CLASSES
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        for index, token, value in RegexLexer.get_tokens_unprocessed(
                self, text, stack):
            if token is Name.Variable:
                if value in self.builtin_function:
                    yield index, Name.Builtin, value
                    continue
                if value in self.special_forms:
                    yield index, Keyword, value
                    continue
                if value in self.macros:
                    yield index, Name.Builtin, value
                    continue
                if value in self.lambda_list_keywords:
                    yield index, Keyword, value
                    continue
                if value in self.declarations:
                    yield index, Keyword, value
                    continue
                if value in self.builtin_types:
                    yield index, Keyword.Type, value
                    continue
                if value in self.builtin_classes:
                    yield index, Name.Class, value
                    continue
            yield index, token, value

    tokens = {
        'root' : [
            ('', Text, 'body'),
        ],
        'multiline-comment' : [
            (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19)
            (r'\|#', Comment.Multiline, '#pop'),
            (r'[^|#]+', Comment.Multiline),
            (r'[|#]', Comment.Multiline),
        ],
        'commented-form' : [
            (r'\(', Comment.Preproc, '#push'),
            (r'\)', Comment.Preproc, '#pop'),
            (r'[^()]+', Comment.Preproc),
        ],
        'body' : [
            # whitespace
            (r'\s+', Text),

            # single-line comment
            (r';.*$', Comment.Single),

            # multi-line comment
            (r'#\|', Comment.Multiline, 'multiline-comment'),

            # encoding comment (?)
            (r'#\d*Y.*$', Comment.Special),

            # strings and characters
            (r'"(\\.|[^"\\])*"', String),
            # quoting
            (r":" + symbol, String.Symbol),
            (r"'" + symbol, String.Symbol),
            (r"'", Operator),
            (r"`", Operator),

            # decimal numbers
            (r'[-+]?\d+\.?' + terminated, Number.Integer),
            (r'[-+]?\d+/\d+' + terminated, Number),
            (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \
                + terminated, Number.Float),

            # sharpsign strings and characters
            (r"#\\." + terminated, String.Char),
            (r"#\\" + symbol, String.Char),

            # vector
            (r'#\(', Operator, 'body'),

            # bitstring
            (r'#\d*\*[01]*', Literal.Other),

            # uninterned symbol
            (r'#:' + symbol, String.Symbol),

            # read-time and load-time evaluation
            (r'#[.,]', Operator),

            # function shorthand
            (r'#\'', Name.Function),

            # binary rational
            (r'#[bB][+-]?[01]+(/[01]+)?', Number),

            # octal rational
            (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct),

            # hex rational
            (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex),

            # radix rational
            (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number),

            # complex
            (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'),

            # array
            (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'),

            # structure
            (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'),

            # path
            (r'#[pP]?"(\\.|[^"])*"', Literal.Other),

            # reference
            (r'#\d+=', Operator),
            (r'#\d+#', Operator),

            # read-time comment
            (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'),

            # read-time conditional
            (r'#[+-]', Operator),

            # special operators that should have been parsed already
            (r'(,@|,|\.)', Operator),

            # special constants
            (r'(t|nil)' + terminated, Name.Constant),

            # functions and variables
            (r'\*' + symbol + '\*', Name.Variable.Global),
            (symbol, Name.Variable),

            # parentheses
            (r'\(', Punctuation, 'body'),
            (r'\)', Punctuation, '#pop'),
        ],
    }