Exemple #1
0
 def do_indent(level):
     # Print paragraphs of indentation level >= {level} as String.Doc,
     # ignoring blank lines. Then return to 'root' state.
     return [
         (_rx_indent(level), String.Doc),
         (r'\s*\n', Text),
         default('#pop:2')
     ]
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
Exemple #3
0
class Perl6Lexer(ExtendedRegexLexer):
    """
    For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code.

    .. versionadded:: 2.0
    """

    name = 'Perl6'
    aliases = ['perl6', 'pl6', 'raku']
    filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
                 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
                 '*.rakutest', '*.rakudoc']
    mimetypes = ['text/x-perl6', 'application/x-perl6']
    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    PERL6_IDENTIFIER_RANGE = r"['\w:-]"

    PERL6_KEYWORDS = (
        #Phasers
        'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
        'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
        #Keywords
        'anon','augment','but','class','constant','default','does','else',
        'elsif','enum','for','gather','given','grammar','has','if','import',
        'is','let','loop','made','make','method','module','multi','my','need',
        'orwith','our','proceed','proto','repeat','require','return',
        'return-rw','returns','role','rule','state','sub','submethod','subset',
        'succeed','supersede','token','try','unit','unless','until','use',
        'when','while','with','without',
        #Traits
        'export','native','repr','required','rw','symbol',
    )

    PERL6_BUILTINS = (
        'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
        'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
        'actions','add','add_attribute','add_enum_value','add_fallback',
        'add_method','add_parent','add_private_method','add_role','add_trustee',
        'adverb','after','all','allocate','allof','allowed','alternative-names',
        'annotations','antipair','antipairs','any','anyof','app_lifetime',
        'append','arch','archname','args','arity','Array','asec','asech','asin',
        'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
        'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
        'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
        'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
        'await','backtrace','Bag','BagHash','bail-out','base','basename',
        'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
        'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
        'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
        'bytes','cache','callframe','calling-package','CALL-ME','callsame',
        'callwith','can','cancel','candidates','cando','can-ok','canonpath',
        'caps','caption','Capture','cas','catdir','categorize','categorize-list',
        'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
        'chars','chdir','child','child-name','child-typename','chmod','chomp',
        'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
        'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
        'column','comb','combinations','command','comment','compiler','Complex',
        'compose','compose_type','composer','condition','config',
        'configure_destroy','configure_type_checking','conj','connect',
        'constraints','construct','contains','contents','copy','cos','cosec',
        'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
        'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
        'Date','DateTime','day','daycount','day-of-month','day-of-week',
        'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
        'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
        'denominator','desc','DESTROY','destroyers','devnull','diag',
        'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
        'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
        'eager','earlier','elems','emit','enclosing','encode','encoder',
        'encoding','end','ends-with','enum_from_value','enum_value_list',
        'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
        'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
        'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
        'expmod','extension','f','fail','fails-like','fc','feature','file',
        'filename','find_method','find_method_qualified','finish','first','flat',
        'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
        'freeze','from','from-list','from-loop','from-posix','full',
        'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
        'grep','handle','handled','handles','hardware','has_accessor','Hash',
        'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
        'illegal','im','in','indent','index','indices','indir','infinite',
        'infix','infix:<+>','infix:<->','install_method_cache','Instant',
        'instead','Int','int-bounds','interval','in-timezone','invalid-str',
        'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
        'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
        'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
        'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
        'is-win','item','iterator','join','keep','kept','KERNELnames','key',
        'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
        'lazy','lc','leading','level','like','line','lines','link','List',
        'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
        'made','MAIN','make','Map','match','max','maxpairs','merge','message',
        'method','method_table','methods','migrate','min','minmax','minpairs',
        'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
        'move','mro','msb','multi','multiness','my','name','named','named_names',
        'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
        'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
        'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
        'nodemap','nok','none','norm','not','note','now','nude','Num',
        'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
        'ok','old','on-close','one','on-switch','open','opened','operation',
        'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
        'package','package-kind','package-name','packages','pair','pairs',
        'pairup','parameter','params','parent','parent-name','parents','parse',
        'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
        'payload','peer-host','peer-port','periods','perl','permutations','phaser',
        'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
        'polymod','pop','pos','positional','posix','postfix','postmatch',
        'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
        'print','printf','print-nl','print-to','private','private_method_table',
        'proc','produce','Promise','prompt','protect','pull-one','push',
        'push-all','push-at-least','push-exactly','push-until-lazy','put',
        'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
        're','read','readchars','readonly','ready','Real','reallocate','reals',
        'reason','rebless','receive','recv','redispatcher','redo','reduce',
        'rel2abs','relative','release','rename','repeated','replacement',
        'report','reserved','resolve','restore','result','resume','rethrow',
        'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
        'roll','rootdir','roots','rotate','rotor','round','roundrobin',
        'routine-type','run','rwx','s','samecase','samemark','samewith','say',
        'schedule-on','scheduler','scope','sec','sech','second','seek','self',
        'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
        'SetHash','set-instruments','setup_finalization','shape','share','shell',
        'shift','sibling','sigil','sign','signal','signals','signature','sin',
        'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
        'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
        'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
        'sort','source','source-package','spawn','SPEC','splice','split',
        'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
        'start','started','starts-with','status','stderr','stdout','Str',
        'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
        'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
        'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
        'target','target-name','tc','tclc','tell','then','throttle','throw',
        'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
        'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
        'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
        'type_captures','typename','uc','udp','uncaught_handler','unimatch',
        'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
        'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
        'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
        'variable','verbose-config','version','VMnames','volume','vow','w','wait',
        'warn','watch','watch-path','week','weekday-of-month','week-number',
        'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
        'whole-second','WHY','wordcase','words','workaround','wrap','write',
        'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',

    )

    PERL6_BUILTIN_CLASSES = (
        #Booleans
        'False','True',
        #Classes
        'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
        'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
        'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
        'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
        'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
        'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
        'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
        'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
        'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
        'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
        'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
        'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
        'Lock','Lock::Async','long','longlong','Macro','Map','Match',
        'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
        'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
        'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
        'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
        'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
        'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
        'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
        'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
        'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
        'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
        'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
        'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
        'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
        'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
        'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
        'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
        'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
        'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
        'Version','VM','Whatever','WhateverCode','WrapHandle'
    )

    PERL6_OPERATORS = (
        'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
        'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
        'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
        '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
        '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
        'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
        '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
        '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
        '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
        'not', '<==', '==>', '<<==', '==>>','unicmp',
    )

    # Perl 6 has a *lot* of possible bracketing characters
    # this list was lifted from STD.pm6 (https://github.com/perl6/std)
    PERL6_BRACKETS = {
        u'\u0028': u'\u0029', u'\u003c': u'\u003e', u'\u005b': u'\u005d',
        u'\u007b': u'\u007d', u'\u00ab': u'\u00bb', u'\u0f3a': u'\u0f3b',
        u'\u0f3c': u'\u0f3d', u'\u169b': u'\u169c', u'\u2018': u'\u2019',
        u'\u201a': u'\u2019', u'\u201b': u'\u2019', u'\u201c': u'\u201d',
        u'\u201e': u'\u201d', u'\u201f': u'\u201d', u'\u2039': u'\u203a',
        u'\u2045': u'\u2046', u'\u207d': u'\u207e', u'\u208d': u'\u208e',
        u'\u2208': u'\u220b', u'\u2209': u'\u220c', u'\u220a': u'\u220d',
        u'\u2215': u'\u29f5', u'\u223c': u'\u223d', u'\u2243': u'\u22cd',
        u'\u2252': u'\u2253', u'\u2254': u'\u2255', u'\u2264': u'\u2265',
        u'\u2266': u'\u2267', u'\u2268': u'\u2269', u'\u226a': u'\u226b',
        u'\u226e': u'\u226f', u'\u2270': u'\u2271', u'\u2272': u'\u2273',
        u'\u2274': u'\u2275', u'\u2276': u'\u2277', u'\u2278': u'\u2279',
        u'\u227a': u'\u227b', u'\u227c': u'\u227d', u'\u227e': u'\u227f',
        u'\u2280': u'\u2281', u'\u2282': u'\u2283', u'\u2284': u'\u2285',
        u'\u2286': u'\u2287', u'\u2288': u'\u2289', u'\u228a': u'\u228b',
        u'\u228f': u'\u2290', u'\u2291': u'\u2292', u'\u2298': u'\u29b8',
        u'\u22a2': u'\u22a3', u'\u22a6': u'\u2ade', u'\u22a8': u'\u2ae4',
        u'\u22a9': u'\u2ae3', u'\u22ab': u'\u2ae5', u'\u22b0': u'\u22b1',
        u'\u22b2': u'\u22b3', u'\u22b4': u'\u22b5', u'\u22b6': u'\u22b7',
        u'\u22c9': u'\u22ca', u'\u22cb': u'\u22cc', u'\u22d0': u'\u22d1',
        u'\u22d6': u'\u22d7', u'\u22d8': u'\u22d9', u'\u22da': u'\u22db',
        u'\u22dc': u'\u22dd', u'\u22de': u'\u22df', u'\u22e0': u'\u22e1',
        u'\u22e2': u'\u22e3', u'\u22e4': u'\u22e5', u'\u22e6': u'\u22e7',
        u'\u22e8': u'\u22e9', u'\u22ea': u'\u22eb', u'\u22ec': u'\u22ed',
        u'\u22f0': u'\u22f1', u'\u22f2': u'\u22fa', u'\u22f3': u'\u22fb',
        u'\u22f4': u'\u22fc', u'\u22f6': u'\u22fd', u'\u22f7': u'\u22fe',
        u'\u2308': u'\u2309', u'\u230a': u'\u230b', u'\u2329': u'\u232a',
        u'\u23b4': u'\u23b5', u'\u2768': u'\u2769', u'\u276a': u'\u276b',
        u'\u276c': u'\u276d', u'\u276e': u'\u276f', u'\u2770': u'\u2771',
        u'\u2772': u'\u2773', u'\u2774': u'\u2775', u'\u27c3': u'\u27c4',
        u'\u27c5': u'\u27c6', u'\u27d5': u'\u27d6', u'\u27dd': u'\u27de',
        u'\u27e2': u'\u27e3', u'\u27e4': u'\u27e5', u'\u27e6': u'\u27e7',
        u'\u27e8': u'\u27e9', u'\u27ea': u'\u27eb', u'\u2983': u'\u2984',
        u'\u2985': u'\u2986', u'\u2987': u'\u2988', u'\u2989': u'\u298a',
        u'\u298b': u'\u298c', u'\u298d': u'\u298e', u'\u298f': u'\u2990',
        u'\u2991': u'\u2992', u'\u2993': u'\u2994', u'\u2995': u'\u2996',
        u'\u2997': u'\u2998', u'\u29c0': u'\u29c1', u'\u29c4': u'\u29c5',
        u'\u29cf': u'\u29d0', u'\u29d1': u'\u29d2', u'\u29d4': u'\u29d5',
        u'\u29d8': u'\u29d9', u'\u29da': u'\u29db', u'\u29f8': u'\u29f9',
        u'\u29fc': u'\u29fd', u'\u2a2b': u'\u2a2c', u'\u2a2d': u'\u2a2e',
        u'\u2a34': u'\u2a35', u'\u2a3c': u'\u2a3d', u'\u2a64': u'\u2a65',
        u'\u2a79': u'\u2a7a', u'\u2a7d': u'\u2a7e', u'\u2a7f': u'\u2a80',
        u'\u2a81': u'\u2a82', u'\u2a83': u'\u2a84', u'\u2a8b': u'\u2a8c',
        u'\u2a91': u'\u2a92', u'\u2a93': u'\u2a94', u'\u2a95': u'\u2a96',
        u'\u2a97': u'\u2a98', u'\u2a99': u'\u2a9a', u'\u2a9b': u'\u2a9c',
        u'\u2aa1': u'\u2aa2', u'\u2aa6': u'\u2aa7', u'\u2aa8': u'\u2aa9',
        u'\u2aaa': u'\u2aab', u'\u2aac': u'\u2aad', u'\u2aaf': u'\u2ab0',
        u'\u2ab3': u'\u2ab4', u'\u2abb': u'\u2abc', u'\u2abd': u'\u2abe',
        u'\u2abf': u'\u2ac0', u'\u2ac1': u'\u2ac2', u'\u2ac3': u'\u2ac4',
        u'\u2ac5': u'\u2ac6', u'\u2acd': u'\u2ace', u'\u2acf': u'\u2ad0',
        u'\u2ad1': u'\u2ad2', u'\u2ad3': u'\u2ad4', u'\u2ad5': u'\u2ad6',
        u'\u2aec': u'\u2aed', u'\u2af7': u'\u2af8', u'\u2af9': u'\u2afa',
        u'\u2e02': u'\u2e03', u'\u2e04': u'\u2e05', u'\u2e09': u'\u2e0a',
        u'\u2e0c': u'\u2e0d', u'\u2e1c': u'\u2e1d', u'\u2e20': u'\u2e21',
        u'\u3008': u'\u3009', u'\u300a': u'\u300b', u'\u300c': u'\u300d',
        u'\u300e': u'\u300f', u'\u3010': u'\u3011', u'\u3014': u'\u3015',
        u'\u3016': u'\u3017', u'\u3018': u'\u3019', u'\u301a': u'\u301b',
        u'\u301d': u'\u301e', u'\ufd3e': u'\ufd3f', u'\ufe17': u'\ufe18',
        u'\ufe35': u'\ufe36', u'\ufe37': u'\ufe38', u'\ufe39': u'\ufe3a',
        u'\ufe3b': u'\ufe3c', u'\ufe3d': u'\ufe3e', u'\ufe3f': u'\ufe40',
        u'\ufe41': u'\ufe42', u'\ufe43': u'\ufe44', u'\ufe47': u'\ufe48',
        u'\ufe59': u'\ufe5a', u'\ufe5b': u'\ufe5c', u'\ufe5d': u'\ufe5e',
        u'\uff08': u'\uff09', u'\uff1c': u'\uff1e', u'\uff3b': u'\uff3d',
        u'\uff5b': u'\uff5d', u'\uff5f': u'\uff60', u'\uff62': u'\uff63',
    }

    def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
        if boundary_regex_fragment is None:
            return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
                suffix + r')\b'
        else:
            return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
                r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
                boundary_regex_fragment + r')'

    def brackets_callback(token_class):
        def callback(lexer, match, context):
            groups = match.groupdict()
            opening_chars = groups['delimiter']
            n_chars = len(opening_chars)
            adverbs = groups.get('adverbs')

            closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
            text = context.text

            if closer is None:  # it's not a mirrored character, which means we
                                # just need to look for the next occurrence

                end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
            else:   # we need to look for the corresponding closing character,
                    # keep nesting in mind
                closing_chars = closer * n_chars
                nesting_level = 1

                search_pos = match.start('delimiter')

                while nesting_level > 0:
                    next_open_pos = text.find(opening_chars, search_pos + n_chars)
                    next_close_pos = text.find(closing_chars, search_pos + n_chars)

                    if next_close_pos == -1:
                        next_close_pos = len(text)
                        nesting_level = 0
                    elif next_open_pos != -1 and next_open_pos < next_close_pos:
                        nesting_level += 1
                        search_pos = next_open_pos
                    else:  # next_close_pos < next_open_pos
                        nesting_level -= 1
                        search_pos = next_close_pos

                end_pos = next_close_pos

            if end_pos < 0:     # if we didn't find a closer, just highlight the
                                # rest of the text in this class
                end_pos = len(text)

            if adverbs is not None and re.search(r':to\b', adverbs):
                heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
                end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
                                        r'\s*$', text[end_pos:], re.MULTILINE)

                if end_heredoc:
                    end_pos += end_heredoc.end()
                else:
                    end_pos = len(text)

            yield match.start(), token_class, text[match.start():end_pos + n_chars]
            context.pos = end_pos + n_chars

        return callback

    def opening_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter an opening brace and we're one level
        # below a token state, it means we need to increment
        # the nesting level for braces so we know later when
        # we should return to the token rules.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level += 1

    def closing_brace_callback(lexer, match, context):
        stack = context.stack

        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()

        # if we encounter a free closing brace and we're one level
        # below a token state, it means we need to check the nesting
        # level to see if we need to return to the token state.
        if len(stack) > 2 and stack[-2] == 'token':
            context.perl6_token_nesting_level -= 1
            if context.perl6_token_nesting_level == 0:
                stack.pop()

    def embedded_perl6_callback(lexer, match, context):
        context.perl6_token_nesting_level = 1
        yield match.start(), Text, context.text[match.start():match.end()]
        context.pos = match.end()
        context.stack.append('root')

    # If you're modifying these rules, be careful if you need to process '{' or '}'
    # characters. We have special logic for processing these characters (due to the fact
    # that you can nest Perl 6 code in regex blocks), so if you need to process one of
    # them, make sure you also process the corresponding one!
    tokens = {
        'common': [
            (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
             brackets_callback(Comment.Multiline)),
            (r'#[^\n]*$', Comment.Single),
            (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
            (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
            (r'^=.*?\n\s*?\n', Comment.Multiline),
            (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
             bygroups(Keyword, Name), 'token-sym-brackets'),
            (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
             bygroups(Keyword, Name), 'pre-token'),
            # deal with a special case in the Perl 6 grammar (role q { ... })
            (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
            (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
            (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
             Name.Builtin),
            (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
            # copied from PerlLexer
            (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
             Name.Variable),
            (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
            (r'::\?\w+', Name.Variable.Global),
            (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
             Name.Variable.Global),
            (r'\$(?:<.*?>)+', Name.Variable),
            (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
             r'(?P=first_char)*)', brackets_callback(String)),
            # copied from PerlLexer
            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
            (r'0b[01]+(_[01]+)*', Number.Bin),
            (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
             Number.Float),
            (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
            (r'\d+(_\d+)*', Number.Integer),
            (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
            (r'm\w+(?=\()', Name),
            (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
             r'(?P=first_char)*)', brackets_callback(String.Regex)),
            (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
             String.Regex),
            (r'<[^\s=].*?\S>', String),
            (_build_word_match(PERL6_OPERATORS), Operator),
            (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
            (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
            (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
        ],
        'root': [
            include('common'),
            (r'\{', opening_brace_callback),
            (r'\}', closing_brace_callback),
            (r'.+?', Text),
        ],
        'pre-token': [
            include('common'),
            (r'\{', Text, ('#pop', 'token')),
            (r'.+?', Text),
        ],
        'token-sym-brackets': [
            (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
             brackets_callback(Name), ('#pop', 'pre-token')),
            default(('#pop', 'pre-token')),
        ],
        'token': [
            (r'\}', Text, '#pop'),
            (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
            # make sure that quotes in character classes aren't treated as strings
            (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
            # make sure that '#' characters in quotes aren't treated as comments
            (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
            (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
            (r'#.*?$', Comment.Single),
            (r'\{', embedded_perl6_callback),
            ('.+?', String.Regex),
        ],
    }

    def analyse_text(text):
        def strip_pod(lines):
            in_pod = False
            stripped_lines = []

            for line in lines:
                if re.match(r'^=(?:end|cut)', line):
                    in_pod = False
                elif re.match(r'^=\w+', line):
                    in_pod = True
                elif not in_pod:
                    stripped_lines.append(line)

            return stripped_lines

        # XXX handle block comments
        lines = text.splitlines()
        lines = strip_pod(lines)
        text = '\n'.join(lines)

        if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
            return True

        saw_perl_decl = False
        rating = False

        # check for my/our/has declarations
        if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
                     r"+\s+)?[$@%&(]", text):
            rating = 0.8
            saw_perl_decl = True

        for line in lines:
            line = re.sub('#.*', '', line)
            if re.match(r'^\s*$', line):
                continue

            # match v6; use v6; use v6.0; use v6.0.0;
            if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
                return True
            # match class, module, role, enum, grammar declarations
            class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
            if class_decl:
                if saw_perl_decl or class_decl.group('scope') is not None:
                    return True
                rating = 0.05
                continue
            break

        return rating

    def __init__(self, **options):
        super(Perl6Lexer, self).__init__(**options)
        self.encoding = options.get('encoding', 'utf-8')
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('asm', 'auto', 'break', 'case', 'const', 'continue',
                 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                 'if', 'register', 'restricted', 'return', 'sizeof', 'static',
                 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'),
                suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')([^\n]+)',
             bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = {
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
        'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
        'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'
    }
    c99_types = {
        '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
        'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
        'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t',
        'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t',
        'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t',
        'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t',
        'uintptr_t', 'intmax_t', 'uintmax_t'
    }
    linux_types = {
        'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
        'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
        'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
        'timer_t', 'uid_t'
    }

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Exemple #5
0
class ValaLexer(RegexLexer):
    """
    For Vala source code with preprocessor directives.

    .. versionadded:: 1.1
    """
    name = 'Vala'
    aliases = ['vala', 'vapi']
    filenames = ['*.vala', '*.vapi']
    mimetypes = ['text/x-vala']

    tokens = {
        'whitespace': [
            (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
        ],
        'statements': [
            (r'[L@]?"', String, 'string'),
            (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'(?s)""".*?"""', String),  # verbatim strings
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])',
             bygroups(Punctuation, Name.Decorator, Punctuation)),
            # TODO: "correctly" parse complex code attributes
            (r'(\[)(CCode|(?:Integer|Floating)Type)',
             bygroups(Punctuation, Name.Decorator)),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('as', 'base', 'break', 'case', 'catch', 'construct',
                 'continue', 'default', 'delete', 'do', 'else', 'enum',
                 'finally', 'for', 'foreach', 'get', 'if', 'in', 'is', 'lock',
                 'new', 'out', 'params', 'return', 'set', 'sizeof', 'switch',
                 'this', 'throw', 'try', 'typeof', 'while', 'yield'),
                suffix=r'\b'), Keyword),
            (words(('abstract', 'const', 'delegate', 'dynamic', 'ensures',
                    'extern', 'inline', 'internal', 'override', 'owned',
                    'private', 'protected', 'public', 'ref', 'requires',
                    'signal', 'static', 'throws', 'unowned', 'var', 'virtual',
                    'volatile', 'weak', 'yields'),
                   suffix=r'\b'), Keyword.Declaration),
            (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace,
                                                 Text), 'namespace'),
            (r'(class|errordomain|interface|struct)(\s+)',
             bygroups(Keyword.Declaration, Text), 'class'),
            (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)),
            # void is an actual keyword, others are in glib-2.0.vapi
            (words(
                ('void', 'bool', 'char', 'double', 'float', 'int', 'int8',
                 'int16', 'int32', 'int64', 'long', 'short', 'size_t',
                 'ssize_t', 'string', 'time_t', 'uchar', 'uint', 'uint8',
                 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'),
                suffix=r'\b'), Keyword.Type),
            (r'(true|false|null)\b', Name.Builtin),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ],
        'class': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')],
        'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')],
    }
Exemple #6
0
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    # Hexadecimal part in an hexadecimal integer/floating-point literal.
    # This includes decimal separators matching.
    _hexpart = r'[0-9a-fA-F](\'?[0-9a-fA-F])*'
    # Decimal part in an decimal integer/floating-point literal.
    # This includes decimal separators matching.
    _decpart = r'\d(\'?\d)*'
    # Integer literal suffix (e.g. 'ull' or 'll').
    _intsuffix = r'(([uU][lL]{0,2})|[lL]{1,2}[uU]?)?'

    # Identifier regex with C and C++ Universal Character Name (UCN) support.
    _ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'([LuU]|u8)?(")', bygroups(String.Affix, String), 'string'),
            (r"([LuU]|u8)?(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),

            # Hexadecimal floating-point literals (C11, C++17)
            (r'0[xX](' + _hexpart + r'\.' + _hexpart + r'|\.' + _hexpart +
             r'|' + _hexpart + r')[pP][+-]?' + _hexpart + r'[lL]?',
             Number.Float),
            (r'(-)?(' + _decpart + r'\.' + _decpart + r'|\.' + _decpart +
             r'|' + _decpart + r')[eE][+-]?' + _decpart + r'[fFlL]?',
             Number.Float),
            (r'(-)?((' + _decpart + r'\.(' + _decpart + r')?|\.' + _decpart +
             r')[fFlL]?)|(' + _decpart + r'[fFlL])', Number.Float),
            (r'(-)?0[xX]' + _hexpart + _intsuffix, Number.Hex),
            (r'(-)?0[bB][01](\'?[01])*' + _intsuffix, Number.Bin),
            (r'(-)?0(\'?[0-7])+' + _intsuffix, Number.Oct),
            (r'(-)?' + _decpart + _intsuffix, Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (r'(struct|union)(\s+)', bygroups(Keyword, Text), 'classname'),
            (words(('asm', 'auto', 'break', 'case', 'const', 'continue',
                    'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                    'if', 'register', 'restricted', 'return', 'sizeof',
                    'struct', 'static', 'switch', 'typedef', 'volatile',
                    'while', 'union', 'thread_local', 'alignas', 'alignof',
                    'static_assert', '_Pragma'),
                   suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'(' + _ident + r')(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (_ident, Name)
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:' + _ident + r'(?:[&*\s])+))'  # return arguments
                r'(' + _ident + r')'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:' + _ident + r'(?:[&*\s])+))'  # return arguments
                r'(' + _ident + r')'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            (r'\}', Punctuation),
            (r'[{;]', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')("[^"]+")([^\n]*)',
             bygroups(Comment.Preproc, using(this), Comment.PreprocFile,
                      Comment.Single)),
            (r'(include)(' + _ws1 + r')(<[^>]+>)([^\n]*)',
             bygroups(Comment.Preproc, using(this), Comment.PreprocFile,
                      Comment.Single)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ],
        'classname': [
            (_ident, Name.Class, '#pop'),
            # template specification
            (r'\s*(?=>)', Text, '#pop'),
            default('#pop')
        ]
    }

    stdlib_types = {
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
        'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
        'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'
    }
    c99_types = {
        'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t',
        'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t',
        'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t',
        'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t',
        'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t',
        'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t',
        'uintmax_t'
    }
    linux_types = {
        'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
        'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
        'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
        'timer_t', 'uid_t'
    }
    c11_atomic_types = {
        'atomic_bool', 'atomic_char', 'atomic_schar', 'atomic_uchar',
        'atomic_short', 'atomic_ushort', 'atomic_int', 'atomic_uint',
        'atomic_long', 'atomic_ulong', 'atomic_llong', 'atomic_ullong',
        'atomic_char16_t', 'atomic_char32_t', 'atomic_wchar_t',
        'atomic_int_least8_t', 'atomic_uint_least8_t', 'atomic_int_least16_t',
        'atomic_uint_least16_t', 'atomic_int_least32_t',
        'atomic_uint_least32_t', 'atomic_int_least64_t',
        'atomic_uint_least64_t', 'atomic_int_fast8_t', 'atomic_uint_fast8_t',
        'atomic_int_fast16_t', 'atomic_uint_fast16_t', 'atomic_int_fast32_t',
        'atomic_uint_fast32_t', 'atomic_int_fast64_t', 'atomic_uint_fast64_t',
        'atomic_intptr_t', 'atomic_uintptr_t', 'atomic_size_t',
        'atomic_ptrdiff_t', 'atomic_intmax_t', 'atomic_uintmax_t'
    }

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.c11highlighting = get_bool_opt(options, 'c11highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.c11highlighting and value in self.c11_atomic_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Exemple #7
0
class PhpLexer(RegexLexer):
    """
    For `PHP <http://www.php.net/>`_ source code.
    For PHP embedded in HTML, use the `HtmlPhpLexer`.

    Additional options accepted:

    `startinline`
        If given and ``True`` the lexer starts highlighting with
        php code (i.e.: no starting ``<?php`` required).  The default
        is ``False``.
    `funcnamehighlighting`
        If given and ``True``, highlight builtin function names
        (default: ``True``).
    `disabledmodules`
        If given, must be a list of module names whose function names
        should not be highlighted. By default all modules are highlighted
        except the special ``'unknown'`` module that includes functions
        that are known to php but are undocumented.

        To get a list of allowed modules have a look into the
        `_php_builtins` module:

        .. sourcecode:: pycon

            >>> from pygments.lexers._php_builtins import MODULES
            >>> MODULES.keys()
            ['PHP Options/Info', 'Zip', 'dba', ...]

        In fact the names of those modules match the module names from
        the php documentation.
    """

    name = 'PHP'
    aliases = ['php', 'php3', 'php4', 'php5']
    filenames = ['*.php', '*.php[345]', '*.inc']
    mimetypes = ['text/x-php']

    # Note that a backslash is included in the following two patterns
    # PHP uses a backslash as a namespace separator
    _ident_char = r'[\\\w]|[^\x00-\x7f]'
    _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
    _ident_end = r'(?:' + _ident_char + ')*'
    _ident_inner = _ident_begin + _ident_end

    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
    tokens = {
        'root': [
            (r'<\?(php)?', Comment.Preproc, 'php'),
            (r'[^<]+', Other),
            (r'<', Other)
        ],
        'php': [
            (r'\?>', Comment.Preproc, '#pop'),
            (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
             bygroups(String, String, String.Delimiter, String, String.Delimiter,
                      Punctuation, Text)),
            (r'\s+', Text),
            (r'#.*?\n', Comment.Single),
            (r'//.*?\n', Comment.Single),
            # put the empty comment here, it is otherwise seen as
            # the start of a docstring
            (r'/\*\*/', Comment.Multiline),
            (r'/\*\*.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(->|::)(\s*)(' + _ident_inner + ')',
             bygroups(Operator, Text, Name.Attribute)),
            (r'[~!%^&*+=|:.<>/@-]+', Operator),
            (r'\?', Operator),  # don't add to the charclass above!
            (r'[\[\]{}();,]+', Punctuation),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
            (r'(function)(\s+)(&?)(\s*)',
             bygroups(Keyword, Text, Operator, Text), 'functionname'),
            (r'(const)(\s+)(' + _ident_inner + ')',
             bygroups(Keyword, Text, Name.Constant)),
            (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
             r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
             r'FALSE|print|for|require|continue|foreach|require_once|'
             r'declare|return|default|static|do|switch|die|stdClass|'
             r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
             r'virtual|endfor|include_once|while|endforeach|global|'
             r'endif|list|endswitch|new|endwhile|not|'
             r'array|E_ALL|NULL|final|php_user_filter|interface|'
             r'implements|public|private|protected|abstract|clone|try|'
             r'catch|throw|this|use|namespace|trait|yield|'
             r'finally)\b', Keyword),
            (r'(true|false|null)\b', Keyword.Constant),
            include('magicconstants'),
            (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable),
            (r'\$+' + _ident_inner, Name.Variable),
            (_ident_inner, Name.Other),
            (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
            (r'\d+e[+-]?[0-9]+', Number.Float),
            (r'0[0-7]+', Number.Oct),
            (r'0x[a-f0-9]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'0b[01]+', Number.Bin),
            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
            (r'"', String.Double, 'string'),
        ],
        'magicfuncs': [
            # source: http://php.net/manual/en/language.oop5.magic.php
            (words((
                '__construct', '__destruct', '__call', '__callStatic', '__get', '__set',
                '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke',
                '__set_state', '__clone', '__debugInfo',), suffix=r'\b'),
             Name.Function.Magic),
        ],
        'magicconstants': [
            # source: http://php.net/manual/en/language.constants.predefined.php
            (words((
                '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__',
                '__TRAIT__', '__METHOD__', '__NAMESPACE__',),
                suffix=r'\b'),
             Name.Constant),
        ],
        'classname': [
            (_ident_inner, Name.Class, '#pop')
        ],
        'functionname': [
            include('magicfuncs'),
            (_ident_inner, Name.Function, '#pop'),
            default('#pop')
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'[^{$"\\]+', String.Double),
            (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
            (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?',
             String.Interpol),
            (r'(\{\$\{)(.*?)(\}\})',
             bygroups(String.Interpol, using(this, _startinline=True),
                      String.Interpol)),
            (r'(\{)(\$.*?)(\})',
             bygroups(String.Interpol, using(this, _startinline=True),
                      String.Interpol)),
            (r'(\$\{)(\S+)(\})',
             bygroups(String.Interpol, Name.Variable, String.Interpol)),
            (r'[${\\]', String.Double)
        ],
    }

    def __init__(self, **options):
        self.funcnamehighlighting = get_bool_opt(
            options, 'funcnamehighlighting', True)
        self.disabledmodules = get_list_opt(
            options, 'disabledmodules', ['unknown'])
        self.startinline = get_bool_opt(options, 'startinline', False)

        # private option argument for the lexer itself
        if '_startinline' in options:
            self.startinline = options.pop('_startinline')

        # collect activated functions in a set
        self._functions = set()
        if self.funcnamehighlighting:
            from pygments.lexers._php_builtins import MODULES
            for key, value in iteritems(MODULES):
                if key not in self.disabledmodules:
                    self._functions.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.startinline:
            stack.append('php')
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text, stack):
            if token is Name.Other:
                if value in self._functions:
                    yield index, Name.Builtin, value
                    continue
            yield index, token, value

    def analyse_text(text):
        if shebang_matches(text, r'php'):
            return True
        rv = 0.0
        if re.search(r'<\?(?!xml)', text):
            rv += 0.3
        return rv
Exemple #8
0
class CythonLexer(RegexLexer):
    """
    For Pyrex and `Cython <http://cython.org>`_ source code.

    .. versionadded:: 1.1
    """

    name = 'Cython'
    aliases = ['cython', 'pyx', 'pyrex']
    filenames = ['*.pyx', '*.pxd', '*.pxi']
    mimetypes = ['text/x-cython', 'application/x-cython']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
            (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
            (r'[^\S\n]+', Text),
            (r'#.*$', Comment),
            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'(<)([a-zA-Z0-9.?]+)(>)',
             bygroups(Punctuation, Keyword.Type, Punctuation)),
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
            (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
             bygroups(Keyword, Number.Integer, Operator, Name, Operator, Name,
                      Punctuation)),
            include('keywords'),
            (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
            # (should actually start a block with only cdefs)
            (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
            (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
            (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
            include('builtins'),
            include('backtick'),
            ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
            ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
            ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
            ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
            ('[uU]?"""', String, combined('stringescape', 'tdqs')),
            ("[uU]?'''", String, combined('stringescape', 'tsqs')),
            ('[uU]?"', String, combined('stringescape', 'dqs')),
            ("[uU]?'", String, combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
        ],
        'keywords': [
            (words(('assert', 'break', 'by', 'continue', 'ctypedef', 'del',
                    'elif', 'else', 'except', 'except?', 'exec', 'finally',
                    'for', 'fused', 'gil', 'global', 'if', 'include', 'lambda',
                    'nogil', 'pass', 'print', 'raise', 'return', 'try',
                    'while', 'yield', 'as', 'with'),
                   suffix=r'\b'), Keyword),
            (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
        ],
        'builtins': [
            (words(
                ('__import__', 'abs', 'all', 'any', 'apply', 'basestring',
                 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable',
                 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex',
                 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
                 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset',
                 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input',
                 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
                 'list', 'locals', 'long', 'map', 'max', 'min', 'next',
                 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range',
                 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round',
                 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str',
                 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode',
                 'unsigned', 'vars', 'xrange', 'zip'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Name.Builtin),
            (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
             r')\b', Name.Builtin.Pseudo),
            (words(
                ('ArithmeticError', 'AssertionError', 'AttributeError',
                 'BaseException', 'DeprecationWarning', 'EOFError',
                 'EnvironmentError', 'Exception', 'FloatingPointError',
                 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
                 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
                 'KeyboardInterrupt', 'LookupError', 'MemoryError',
                 'NameError', 'NotImplemented', 'NotImplementedError',
                 'OSError', 'OverflowError', 'OverflowWarning',
                 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
                 'RuntimeWarning', 'StandardError', 'StopIteration',
                 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
                 'TabError', 'TypeError', 'UnboundLocalError',
                 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
                 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
                 'ValueError', 'Warning', 'ZeroDivisionError'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Name.Exception),
        ],
        'numbers': [(r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
                    (r'0\d+', Number.Oct), (r'0[xX][a-fA-F0-9]+', Number.Hex),
                    (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer)],
        'backtick': [
            ('`.*?`', String.Backtick),
        ],
        'name': [
            (r'@\w+', Name.Decorator),
            ('[a-zA-Z_]\w*', Name),
        ],
        'funcname': [('[a-zA-Z_]\w*', Name.Function, '#pop')],
        'cdef': [
            (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
            (r'(struct|enum|union|class)\b', Keyword),
            (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)', bygroups(Name.Function,
                                                          Text), '#pop'),
            (r'([a-zA-Z_]\w*)(\s*)(,)',
             bygroups(Name.Function, Text, Punctuation)),
            (r'from\b', Keyword, '#pop'),
            (r'as\b', Keyword),
            (r':', Punctuation, '#pop'),
            (r'(?=["\'])', Text, '#pop'),
            (r'[a-zA-Z_]\w*', Keyword.Type),
            (r'.', Text),
        ],
        'classname': [('[a-zA-Z_]\w*', Name.Class, '#pop')],
        'import': [
            (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
            default('#pop')  # all else: go back
        ],
        'fromimport': [
            (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
            (r'[a-zA-Z_.][\w.]*', Name.Namespace),
            # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
            default('#pop'),
        ],
        'stringescape':
        [(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
          r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)],
        'strings': [
            (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            (r'[^\\\'"%\n]+', String),
            # quotes, percents and backslashes must be parsed one at a time
            (r'[\'"\\]', String),
            # unhandled string formatting sign
            (r'%', String)
            # newlines are an error (use "nl" state)
        ],
        'nl': [(r'\n', String)],
        'dqs': [
            (r'"', String, '#pop'),
            (r'\\\\|\\"|\\\n',
             String.Escape),  # included here again for raw strings
            include('strings')
        ],
        'sqs': [
            (r"'", String, '#pop'),
            (r"\\\\|\\'|\\\n",
             String.Escape),  # included here again for raw strings
            include('strings')
        ],
        'tdqs': [(r'"""', String, '#pop'),
                 include('strings'),
                 include('nl')],
        'tsqs': [(r"'''", String, '#pop'),
                 include('strings'),
                 include('nl')],
    }
Exemple #9
0
class JavascriptLexer(RegexLexer):
    """
    For JavaScript source code.
    """

    name = 'JavaScript'
    aliases = ['js', 'javascript']
    filenames = [
        '*.js',
        '*.jsm',
    ]
    mimetypes = [
        'application/javascript',
        'application/x-javascript',
        'text/x-javascript',
        'text/javascript',
    ]

    flags = re.DOTALL | re.UNICODE | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'\A#! ?/.*?\n', Comment.Hashbang),  # recognized by node.js
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|'
             r'this)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
             r'extends|final|float|goto|implements|import|int|interface|long|native|'
             r'package|private|protected|public|short|static|super|synchronized|throws|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
             r'window)\b', Name.Builtin),
            (JS_IDENT, Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Exemple #10
0
class TypeScriptLexer(RegexLexer):
    """
    For `TypeScript <http://typescriptlang.org/>`_ source code.
    .. versionadded:: 1.6
    """
    name = 'TypeScript'
    aliases = ['ts', 'typescript']
    filenames = ['*.ts', '*.tsx']
    mimetypes = ['text/x-typescript']
    flags = re.DOTALL | re.MULTILINE
    # Higher priority than the TypoScriptLexer, as TypeScript is far more
    # common these days
    priority = 0.5
    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'tag': [
            (r'\s+', Text),
            (r'[,\|]', Punctuation),
            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator,
                                               Text), 'attr'),
            (r'[\w:-]+', Name.Attribute),
            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text,
                                       Punctuation), '#pop'),
        ],
        'attr': [("{", Punctuation, 'root'), ('".*?"', String, '#pop'),
                 ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'),
                 default('#pop')],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'(<)([\w:.-]+)', bygroups(Punctuation, Name.Tag), 'tag'),
            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation)),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            ("{", Punctuation, 'root'),
            ("}", Punctuation, '#pop'),
            (r'[(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[)\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|of|'
             r'this)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
             r'extends|final|float|goto|implements|import|int|interface|long|native|'
             r'package|private|protected|public|short|static|super|synchronized|throws|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
             r'window)\b', Name.Builtin),
            # Match stuff like: module name {...}
            (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)',
             bygroups(Keyword.Reserved, Text, Name.Other,
                      Text), 'slashstartsregex'),
            # Match variable type keywords
            (r'\b(string|bool|number)\b', Keyword.Type),
            # Match stuff like: constructor
            (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved),
            # Match stuff like: super(argument, list)
            (r'(super)(\s*)(\([\w,?.$\s]+\s*\))',
             bygroups(Keyword.Reserved, Text), 'slashstartsregex'),
            # Match stuff like: function() {...}
            (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'),
            # Match stuff like: (function: return type)
            (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)',
             bygroups(Name.Other, Text, Keyword.Type)),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'`', String.Backtick, 'interp'),
            # Match stuff like: Decorators
            (r'@\w+', Keyword.Declaration),
        ],
        # The 'interp*' rules match those in JavascriptLexer. Changes made
        # there should be reflected here as well.
        'interp': [
            (r'`', String.Backtick, '#pop'),
            (r'\\.', String.Backtick),
            (r'\$\{', String.Interpol, 'interp-inside'),
            (r'\$', String.Backtick),
            (r'[^`\\$]+', String.Backtick),
        ],
        'interp-inside': [
            # TODO: should this include single-line comments and allow nesting strings?
            (r'\}', String.Interpol, '#pop'),
            include('root'),
        ],
    }
Exemple #11
0
class AdaLexer(RegexLexer):
    """
    For Ada source code.

    .. versionadded:: 1.3
    """

    name = 'Ada'
    aliases = ['ada', 'ada95', 'ada2005']
    filenames = ['*.adb', '*.ads', '*.ada']
    mimetypes = ['text/x-ada']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'--.*?\n', Comment.Single),
            (r'[^\S\n]+', Text),
            (r'function|procedure|entry', Keyword.Declaration, 'subprogram'),
            (r'(subtype|type)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
            (r'task|protected', Keyword.Declaration),
            (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)),
            (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'),
            (r'(pragma)(\s+)(\w+)',
             bygroups(Keyword.Reserved, Text, Comment.Preproc)),
            (r'(true|false|null)\b', Keyword.Constant),
            (words(
                ('Address', 'Byte', 'Boolean', 'Character', 'Controlled',
                 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type',
                 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer',
                 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive',
                 'Reference_Type', 'Short_Float', 'Short_Integer',
                 'Short_Short_Float', 'Short_Short_Integer', 'String',
                 'Wide_Character', 'Wide_String'),
                suffix=r'\b'), Keyword.Type),
            (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word),
            (r'generic|private', Keyword.Declaration),
            (r'package', Keyword.Declaration, 'package'),
            (r'array\b', Keyword.Reserved, 'array_def'),
            (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
            (r'(\w+)(\s*)(:)(\s*)(constant)',
             bygroups(Name.Constant, Text, Punctuation, Text,
                      Keyword.Reserved)),
            (r'<<\w+>>', Name.Label),
            (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)',
             bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)),
            (words(
                ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased',
                 'all', 'array', 'at', 'begin', 'body', 'case', 'constant',
                 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif',
                 'end', 'entry', 'exception', 'exit', 'interface', 'for',
                 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of',
                 'or', 'others', 'out', 'overriding', 'pragma', 'protected',
                 'raise', 'range', 'record', 'renames', 'requeue', 'return',
                 'reverse', 'select', 'separate', 'subtype', 'synchronized',
                 'task', 'tagged', 'terminate', 'then', 'type', 'until',
                 'when', 'while', 'xor'),
                prefix=r'\b',
                suffix=r'\b'), Keyword.Reserved),
            (r'"[^"]*"', String),
            include('attribute'),
            include('numbers'),
            (r"'[^']'", String.Character),
            (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
            (r"(<>|=>|:=|[()|:;,.'])", Punctuation),
            (r'[*<>+=/&-]', Operator),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'[0-9_]+#[0-9a-f]+#', Number.Hex),
            (r'[0-9_]+\.[0-9_]*', Number.Float),
            (r'[0-9_]+', Number.Integer),
        ],
        'attribute': [
            (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)),
        ],
        'subprogram': [
            (r'\(', Punctuation, ('#pop', 'formal_part')),
            (r';', Punctuation, '#pop'),
            (r'is\b', Keyword.Reserved, '#pop'),
            (r'"[^"]+"|\w+', Name.Function),
            include('root'),
        ],
        'end': [
            ('(if|case|record|loop|select)', Keyword.Reserved),
            ('"[^"]+"|[\w.]+', Name.Function),
            ('\s+', Text),
            (';', Punctuation, '#pop'),
        ],
        'type_def': [
            (r';', Punctuation, '#pop'),
            (r'\(', Punctuation, 'formal_part'),
            (r'with|and|use', Keyword.Reserved),
            (r'array\b', Keyword.Reserved, ('#pop', 'array_def')),
            (r'record\b', Keyword.Reserved, ('record_def')),
            (r'(null record)(;)', bygroups(Keyword.Reserved,
                                           Punctuation), '#pop'),
            include('root'),
        ],
        'array_def': [
            (r';', Punctuation, '#pop'),
            (r'(\w+)(\s+)(range)',
             bygroups(Keyword.Type, Text, Keyword.Reserved)),
            include('root'),
        ],
        'record_def': [
            (r'end record', Keyword.Reserved, '#pop'),
            include('root'),
        ],
        'import': [
            (r'[\w.]+', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'formal_part': [
            (r'\)', Punctuation, '#pop'),
            (r'\w+', Name.Variable),
            (r',|:[^=]', Punctuation),
            (r'(in|not|null|out|access)\b', Keyword.Reserved),
            include('root'),
        ],
        'package': [
            ('body', Keyword.Declaration),
            ('is\s+new|renames', Keyword.Reserved),
            ('is', Keyword.Reserved, '#pop'),
            (';', Punctuation, '#pop'),
            ('\(', Punctuation, 'package_instantiation'),
            ('([\w.]+)', Name.Class),
            include('root'),
        ],
        'package_instantiation': [
            (r'("[^"]+"|\w+)(\s+)(=>)',
             bygroups(Name.Variable, Text, Punctuation)),
            (r'[\w.\'"]', Text),
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
    }
Exemple #12
0
class CoffeeScriptLexer(RegexLexer):
    """
    For `CoffeeScript`_ source code.

    .. _CoffeeScript: http://coffeescript.org

    .. versionadded:: 1.3
    """

    name = 'CoffeeScript'
    aliases = ['coffee-script', 'coffeescript', 'coffee']
    filenames = ['*.coffee']
    mimetypes = ['text/coffeescript']

    flags = re.DOTALL
    tokens = {
        'commentsandwhitespace': [
            (r'\s+', Text),
            (r'###[^#].*?###', Comment.Multiline),
            (r'#(?!##[^#]).*?\n', Comment.Single),
        ],
        'multilineregex': [
            (r'[^/#]+', String.Regex),
            (r'///([gim]+\b|\B)', String.Regex, '#pop'),
            (r'#\{', String.Interpol, 'interpoling_string'),
            (r'[/#]', String.Regex),
        ],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'///', String.Regex, ('#pop', 'multilineregex')),
            (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            default('#pop'),
        ],
        'root': [
            # this next expr leads to infinite loops root -> slashstartsregex
            # (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|'
             r'\|\||\\(?=\n)|'
             r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'(?:\([^()]*\))?\s*[=-]>', Name.Function),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(?<![.$])(for|own|in|of|while|until|'
             r'loop|break|return|continue|'
             r'switch|when|then|if|unless|else|'
             r'throw|try|catch|finally|new|delete|typeof|instanceof|super|'
             r'extends|this|class|by)\b', Keyword, 'slashstartsregex'),
            (r'(?<![.$])(true|false|yes|no|on|off|null|'
             r'NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'eval|isFinite|isNaN|parseFloat|parseInt|document|window)\b',
             Name.Builtin),
            (r'[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable,
             'slashstartsregex'),
            (r'@[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable.Instance,
             'slashstartsregex'),
            (r'@', Name.Other, 'slashstartsregex'),
            (r'@?[$a-zA-Z_][\w$]*', Name.Other, 'slashstartsregex'),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            ('"""', String, 'tdqs'),
            ("'''", String, 'tsqs'),
            ('"', String, 'dqs'),
            ("'", String, 'sqs'),
        ],
        'strings': [
            (r'[^#\\\'"]+', String),
            # note that all coffee script strings are multi-line.
            # hashmarks, quotes and backslashes must be parsed one at a time
        ],
        'interpoling_string': [(r'\}', String.Interpol, "#pop"),
                               include('root')],
        'dqs': [
            (r'"', String, '#pop'),
            (r'\\.|\'', String),  # double-quoted string don't need ' escapes
            (r'#\{', String.Interpol, "interpoling_string"),
            (r'#', String),
            include('strings')
        ],
        'sqs': [
            (r"'", String, '#pop'),
            (r'#|\\.|"',
             String),  # single quoted strings don't need " escapses
            include('strings')
        ],
        'tdqs': [
            (r'"""', String, '#pop'),
            (r'\\.|\'|"', String),  # no need to escape quotes in triple-string
            (r'#\{', String.Interpol, "interpoling_string"),
            (r'#', String),
            include('strings'),
        ],
        'tsqs': [
            (r"'''", String, '#pop'),
            (r'#|\\.|\'|"',
             String),  # no need to escape quotes in triple-strings
            include('strings')
        ],
    }
Exemple #13
0
class ObjectiveJLexer(RegexLexer):
    """
    For Objective-J source code with preprocessor directives.

    .. versionadded:: 1.3
    """

    name = 'Objective-J'
    aliases = ['objective-j', 'objectivej', 'obj-j', 'objj']
    filenames = ['*.j']
    mimetypes = ['text/x-objective-j']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)*'

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'root': [
            include('whitespace'),

            # function definition
            (r'^(' + _ws + r'[+-]' + _ws + r')([(a-zA-Z_].*?[^(])(' + _ws +
             r'\{)',
             bygroups(using(this), using(this, state='function_signature'),
                      using(this))),

            # class definition
            (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
             'classname'),
            (r'(@class|@protocol)(\s*)', bygroups(Keyword,
                                                  Text), 'forward_classname'),
            (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)),
            include('statements'),
            ('[{()}]', Punctuation),
            (';', Punctuation),
        ],
        'whitespace': [
            (r'(@import)(\s+)("(?:\\\\|\\"|[^"])*")',
             bygroups(Comment.Preproc, Text, String.Double)),
            (r'(@import)(\s+)(<(?:\\\\|\\>|[^>])*>)',
             bygroups(Comment.Preproc, Text, String.Double)),
            (r'(#(?:include|import))(\s+)("(?:\\\\|\\"|[^"])*")',
             bygroups(Comment.Preproc, Text, String.Double)),
            (r'(#(?:include|import))(\s+)(<(?:\\\\|\\>|[^>])*>)',
             bygroups(Comment.Preproc, Text, String.Double)),
            (r'#if\s+0', Comment.Preproc, 'if0'),
            (r'#', Comment.Preproc, 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
            (r'<!--', Comment),
        ],
        'slashstartsregex': [
            include('whitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop'),
        ],
        'badregex': [
            (r'\n', Text, '#pop'),
        ],
        'statements': [
            (r'(L|@)?"', String, 'string'),
            (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
             String.Char),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|'
             r'else|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'prototype|__proto__)\b', Keyword, 'slashstartsregex'),
            (r'(var|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(@selector|@private|@protected|@public|@encode|'
             r'@synchronized|@try|@throw|@catch|@finally|@end|@property|'
             r'@synthesize|@dynamic|@for|@accessors|new)\b', Keyword),
            (r'(int|long|float|short|double|char|unsigned|signed|void|'
             r'id|BOOL|bool|boolean|IBOutlet|IBAction|SEL|@outlet|@action)\b',
             Keyword.Type),
            (r'(self|super)\b', Name.Builtin),
            (r'(TRUE|YES|FALSE|NO|Nil|nil|NULL)\b', Keyword.Constant),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(ABS|ASIN|ACOS|ATAN|ATAN2|SIN|COS|TAN|EXP|POW|CEIL|FLOOR|ROUND|'
             r'MIN|MAX|RAND|SQRT|E|LN2|LN10|LOG2E|LOG10E|PI|PI2|PI_2|SQRT1_2|'
             r'SQRT2)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
             r'window)\b', Name.Builtin),
            (r'([$a-zA-Z_]\w*)(' + _ws + r')(?=\()',
             bygroups(Name.Function, using(this))),
            (r'[$a-zA-Z_]\w*', Name),
        ],
        'classname': [
            # interface definition that inherits
            (r'([a-zA-Z_]\w*)(' + _ws + r':' + _ws + r')([a-zA-Z_]\w*)?',
             bygroups(Name.Class, using(this), Name.Class), '#pop'),
            # interface definition for a category
            (r'([a-zA-Z_]\w*)(' + _ws + r'\()([a-zA-Z_]\w*)(\))',
             bygroups(Name.Class, using(this), Name.Label, Text), '#pop'),
            # simple interface / implementation
            (r'([a-zA-Z_]\w*)', Name.Class, '#pop'),
        ],
        'forward_classname': [
            (r'([a-zA-Z_]\w*)(\s*,\s*)', bygroups(Name.Class, Text), '#push'),
            (r'([a-zA-Z_]\w*)(\s*;?)', bygroups(Name.Class, Text), '#pop'),
        ],
        'function_signature': [
            include('whitespace'),

            # start of a selector w/ parameters
            (
                r'(\(' + _ws + r')'  # open paren
                r'([a-zA-Z_]\w+)'  # return type
                r'(' + _ws + r'\)' + _ws + r')'  # close paren
                r'([$a-zA-Z_]\w+' + _ws + r':)',  # function name
                bygroups(using(this), Keyword.Type, using(this),
                         Name.Function),
                'function_parameters'),

            # no-param function
            (
                r'(\(' + _ws + r')'  # open paren
                r'([a-zA-Z_]\w+)'  # return type
                r'(' + _ws + r'\)' + _ws + r')'  # close paren
                r'([$a-zA-Z_]\w+)',  # function name
                bygroups(using(this), Keyword.Type, using(this),
                         Name.Function),
                "#pop"),

            # no return type given, start of a selector w/ parameters
            (
                r'([$a-zA-Z_]\w+' + _ws + r':)',  # function name
                bygroups(Name.Function),
                'function_parameters'),

            # no return type given, no-param function
            (
                r'([$a-zA-Z_]\w+)',  # function name
                bygroups(Name.Function),
                "#pop"),
            default('#pop'),
        ],
        'function_parameters': [
            include('whitespace'),

            # parameters
            (
                r'(\(' + _ws + ')'  # open paren
                r'([^)]+)'  # type
                r'(' + _ws + r'\)' + _ws + r')'  # close paren
                r'([$a-zA-Z_]\w+)',  # param name
                bygroups(using(this), Keyword.Type, using(this), Text)),

            # one piece of a selector name
            (
                r'([$a-zA-Z_]\w+' + _ws + r':)',  # function name
                Name.Function),

            # smallest possible selector piece
            (r'(:)', Name.Function),

            # var args
            (r'(,' + _ws + r'\.\.\.)', using(this)),

            # param name
            (r'([$a-zA-Z_]\w+)', Text),
        ],
        'expression': [
            (r'([$a-zA-Z_]\w*)(\()', bygroups(Name.Function, Punctuation)),
            (r'(\))', Punctuation, "#pop"),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    def analyse_text(text):
        if re.search('^\s*@import\s+[<"]', text, re.MULTILINE):
            # special directive found in most Objective-J files
            return True
        return False
Exemple #14
0
class LassoLexer(RegexLexer):
    """
    For `Lasso <http://www.lassosoft.com/>`_ source code, covering both Lasso 9
    syntax and LassoScript for Lasso 8.6 and earlier. For Lasso embedded in
    HTML, use the `LassoHtmlLexer`.

    Additional options accepted:

    `builtinshighlighting`
        If given and ``True``, highlight builtin types, traits, methods, and
        members (default: ``True``).
    `requiredelimiters`
        If given and ``True``, only highlight code between delimiters as Lasso
        (default: ``False``).

    .. versionadded:: 1.6
    """

    name = 'Lasso'
    aliases = ['lasso', 'lassoscript']
    filenames = ['*.lasso', '*.lasso[89]']
    alias_filenames = ['*.incl', '*.inc', '*.las']
    mimetypes = ['text/x-lasso']
    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE

    tokens = {
        'root': [
            (r'^#!.+lasso9\b', Comment.Preproc, 'lasso'),
            (r'\[no_square_brackets\]', Comment.Preproc, 'nosquarebrackets'),
            (r'\[noprocess\]', Comment.Preproc, ('delimiters', 'noprocess')),
            (r'\[', Comment.Preproc, ('delimiters', 'squarebrackets')),
            (r'<\?(LassoScript|lasso|=)', Comment.Preproc, ('delimiters',
                                                            'anglebrackets')),
            (r'<(!--.*?-->)?', Other, 'delimiters'),
            (r'\s+', Other),
            default(('delimiters', 'lassofile')),
        ],
        'delimiters': [
            (r'\[no_square_brackets\]', Comment.Preproc, 'nosquarebrackets'),
            (r'\[noprocess\]', Comment.Preproc, 'noprocess'),
            (r'\[', Comment.Preproc, 'squarebrackets'),
            (r'<\?(LassoScript|lasso|=)', Comment.Preproc, 'anglebrackets'),
            (r'<(!--.*?-->)?', Other),
            (r'[^[<]+', Other),
        ],
        'nosquarebrackets': [
            (r'<\?(LassoScript|lasso|=)', Comment.Preproc, 'anglebrackets'),
            (r'<', Other),
            (r'[^<]+', Other),
        ],
        'noprocess': [
            (r'\[/noprocess\]', Comment.Preproc, '#pop'),
            (r'\[', Other),
            (r'[^[]', Other),
        ],
        'squarebrackets': [
            (r'\]', Comment.Preproc, '#pop'),
            include('lasso'),
        ],
        'anglebrackets': [
            (r'\?>', Comment.Preproc, '#pop'),
            include('lasso'),
        ],
        'lassofile': [
            (r'\]|\?>', Comment.Preproc, '#pop'),
            include('lasso'),
        ],
        'whitespacecomments': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*\*!.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment.Multiline),
        ],
        'lasso': [
            # whitespace/comments
            include('whitespacecomments'),

            # literals
            (r'\d*\.\d+(e[+-]?\d+)?', Number.Float),
            (r'0x[\da-f]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'([+-]?)(infinity|NaN)\b', bygroups(Operator, Number)),
            (r"'", String.Single, 'singlestring'),
            (r'"', String.Double, 'doublestring'),
            (r'`[^`]*`', String.Backtick),

            # names
            (r'\$[a-z_][\w.]*', Name.Variable),
            (r'#([a-z_][\w.]*|\d+)', Name.Variable.Instance),
            (r"(\.)('[a-z_][\w.]*')",
             bygroups(Name.Builtin.Pseudo, Name.Variable.Class)),
            (r"(self)(\s*->\s*)('[a-z_][\w.]*')",
             bygroups(Name.Builtin.Pseudo, Operator, Name.Variable.Class)),
            (r'(\.\.?)([a-z_][\w.]*(=(?!=))?)',
             bygroups(Name.Builtin.Pseudo, Name.Other.Member)),
            (r'(->\\?\s*|&\s*)([a-z_][\w.]*(=(?!=))?)',
             bygroups(Operator, Name.Other.Member)),
            (r'(self|inherited)\b', Name.Builtin.Pseudo),
            (r'-[a-z_][\w.]*', Name.Attribute),
            (r'::\s*[a-z_][\w.]*', Name.Label),
            (r'(error_(code|msg)_\w+|Error_AddError|Error_ColumnRestriction|'
             r'Error_DatabaseConnectionUnavailable|Error_DatabaseTimeout|'
             r'Error_DeleteError|Error_FieldRestriction|Error_FileNotFound|'
             r'Error_InvalidDatabase|Error_InvalidPassword|'
             r'Error_InvalidUsername|Error_ModuleNotFound|'
             r'Error_NoError|Error_NoPermission|Error_OutOfMemory|'
             r'Error_ReqColumnMissing|Error_ReqFieldMissing|'
             r'Error_RequiredColumnMissing|Error_RequiredFieldMissing|'
             r'Error_UpdateError)\b', Name.Exception),

            # definitions
            (r'(define)(\s+)([a-z_][\w.]*)(\s*=>\s*)(type|trait|thread)\b',
             bygroups(Keyword.Declaration, Text, Name.Class, Operator,
                      Keyword)),
            (r'(define)(\s+)([a-z_][\w.]*)(\s*->\s*)([a-z_][\w.]*=?|[-+*/%])',
             bygroups(Keyword.Declaration, Text, Name.Class, Operator,
                      Name.Function), 'signature'),
            (r'(define)(\s+)([a-z_][\w.]*)',
             bygroups(Keyword.Declaration, Text, Name.Function), 'signature'),
            (r'(public|protected|private|provide)(\s+)(([a-z_][\w.]*=?|[-+*/%])'
             r'(?=\s*\())', bygroups(Keyword, Text,
                                     Name.Function), 'signature'),
            (r'(public|protected|private|provide)(\s+)([a-z_][\w.]*)',
             bygroups(Keyword, Text, Name.Function)),

            # keywords
            (r'(true|false|none|minimal|full|all|void)\b', Keyword.Constant),
            (r'(local|var|variable|global|data(?=\s))\b', Keyword.Declaration),
            (r'(array|date|decimal|duration|integer|map|pair|string|tag|xml|'
             r'null|bytes|list|queue|set|stack|staticarray|tie)\b',
             Keyword.Type),
            (r'([a-z_][\w.]*)(\s+)(in)\b', bygroups(Name, Text, Keyword)),
            (r'(let|into)(\s+)([a-z_][\w.]*)', bygroups(Keyword, Text, Name)),
            (r'require\b', Keyword, 'requiresection'),
            (r'(/?)(Namespace_Using)\b',
             bygroups(Punctuation, Keyword.Namespace)),
            (r'(/?)(Cache|Database_Names|Database_SchemaNames|'
             r'Database_TableNames|Define_Tag|Define_Type|Email_Batch|'
             r'Encode_Set|HTML_Comment|Handle|Handle_Error|Header|If|Inline|'
             r'Iterate|LJAX_Target|Link|Link_CurrentAction|Link_CurrentGroup|'
             r'Link_CurrentRecord|Link_Detail|Link_FirstGroup|'
             r'Link_FirstRecord|Link_LastGroup|Link_LastRecord|Link_NextGroup|'
             r'Link_NextRecord|Link_PrevGroup|Link_PrevRecord|Log|Loop|'
             r'NoProcess|Output_None|Portal|Private|Protect|Records|Referer|'
             r'Referrer|Repeating|ResultSet|Rows|Search_Args|Search_Arguments|'
             r'Select|Sort_Args|Sort_Arguments|Thread_Atomic|Value_List|While|'
             r'Abort|Case|Else|If_Empty|If_False|If_Null|If_True|Loop_Abort|'
             r'Loop_Continue|Loop_Count|Params|Params_Up|Return|Return_Value|'
             r'Run_Children|SOAP_DefineTag|SOAP_LastRequest|SOAP_LastResponse|'
             r'Tag_Name|ascending|average|by|define|descending|do|equals|'
             r'frozen|group|handle_failure|import|in|into|join|let|match|max|'
             r'min|on|order|parent|protected|provide|public|require|returnhome|'
             r'skip|split_thread|sum|take|thread|to|trait|type|where|with|'
             r'yield|yieldhome)\b', bygroups(Punctuation, Keyword)),

            # other
            (r',', Punctuation, 'commamember'),
            (r'(and|or|not)\b', Operator.Word),
            (r'([a-z_][\w.]*)(\s*::\s*[a-z_][\w.]*)?(\s*=(?!=))',
             bygroups(Name, Name.Label, Operator)),
            (r'(/?)([\w.]+)', bygroups(Punctuation, Name.Other)),
            (r'(=)(n?bw|n?ew|n?cn|lte?|gte?|n?eq|n?rx|ft)\b',
             bygroups(Operator, Operator.Word)),
            (r':=|[-+*/%=<>&|!?\\]+', Operator),
            (r'[{}():;,@^]', Punctuation),
        ],
        'singlestring': [
            (r"'", String.Single, '#pop'),
            (r"[^'\\]+", String.Single),
            include('escape'),
            (r"\\", String.Single),
        ],
        'doublestring': [
            (r'"', String.Double, '#pop'),
            (r'[^"\\]+', String.Double),
            include('escape'),
            (r'\\', String.Double),
        ],
        'escape': [
            (r'\\(U[\da-f]{8}|u[\da-f]{4}|x[\da-f]{1,2}|[0-7]{1,3}|:[^:]+:|'
             r'[abefnrtv?"\'\\]|$)', String.Escape),
        ],
        'signature': [
            (r'=>', Operator, '#pop'),
            (r'\)', Punctuation, '#pop'),
            (r'[(,]', Punctuation, 'parameter'),
            include('lasso'),
        ],
        'parameter': [
            (r'\)', Punctuation, '#pop'),
            (r'-?[a-z_][\w.]*', Name.Attribute, '#pop'),
            (r'\.\.\.', Name.Builtin.Pseudo),
            include('lasso'),
        ],
        'requiresection': [
            (r'(([a-z_][\w.]*=?|[-+*/%])(?=\s*\())', Name, 'requiresignature'),
            (r'(([a-z_][\w.]*=?|[-+*/%])(?=(\s*::\s*[\w.]+)?\s*,))', Name),
            (r'[a-z_][\w.]*=?|[-+*/%]', Name, '#pop'),
            (r'::\s*[a-z_][\w.]*', Name.Label),
            (r',', Punctuation),
            include('whitespacecomments'),
        ],
        'requiresignature': [
            (r'(\)(?=(\s*::\s*[\w.]+)?\s*,))', Punctuation, '#pop'),
            (r'\)', Punctuation, '#pop:2'),
            (r'-?[a-z_][\w.]*', Name.Attribute),
            (r'::\s*[a-z_][\w.]*', Name.Label),
            (r'\.\.\.', Name.Builtin.Pseudo),
            (r'[(,]', Punctuation),
            include('whitespacecomments'),
        ],
        'commamember': [
            (r'(([a-z_][\w.]*=?|[-+*/%])'
             r'(?=\s*(\(([^()]*\([^()]*\))*[^)]*\)\s*)?(::[\w.\s]+)?=>))',
             Name.Function, 'signature'),
            include('whitespacecomments'),
            default('#pop'),
        ],
    }

    def __init__(self, **options):
        self.builtinshighlighting = get_bool_opt(options,
                                                 'builtinshighlighting', True)
        self.requiredelimiters = get_bool_opt(options, 'requiredelimiters',
                                              False)

        self._builtins = set()
        self._members = set()
        if self.builtinshighlighting:
            from pygments.lexers._lasso_builtins import BUILTINS, MEMBERS
            for key, value in iteritems(BUILTINS):
                self._builtins.update(value)
            for key, value in iteritems(MEMBERS):
                self._members.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.requiredelimiters:
            stack.append('delimiters')
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text, stack):
            if (token is Name.Other and value.lower() in self._builtins
                    or token is Name.Other.Member
                    and value.lower().rstrip('=') in self._members):
                yield index, Name.Builtin, value
                continue
            yield index, token, value

    def analyse_text(text):
        rv = 0.0
        if 'bin/lasso9' in text:
            rv += 0.8
        if re.search(r'<\?lasso', text, re.I):
            rv += 0.4
        if re.search(r'local\(', text, re.I):
            rv += 0.4
        return rv
Exemple #15
0
class TypeScriptLexer(RegexLexer):
    """
    For `TypeScript <http://typescriptlang.org/>`_ source code.

    .. versionadded:: 1.6
    """

    name = 'TypeScript'
    aliases = ['ts']
    filenames = ['*.ts']
    mimetypes = ['text/x-typescript']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'this)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration,
             'slashstartsregex'),
            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
             r'extends|final|float|goto|implements|import|int|interface|long|native|'
             r'package|private|protected|public|short|static|super|synchronized|throws|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
             r'decodeURIComponent|encodeURI|encodeURIComponent|'
             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
             r'window)\b', Name.Builtin),
            # Match stuff like: module name {...}
            (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)',
             bygroups(Keyword.Reserved, Text, Name.Other,
                      Text), 'slashstartsregex'),
            # Match variable type keywords
            (r'\b(string|bool|number)\b', Keyword.Type),
            # Match stuff like: constructor
            (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved),
            # Match stuff like: super(argument, list)
            (r'(super)(\s*)(\([\w,?.$\s]+\s*\))',
             bygroups(Keyword.Reserved, Text), 'slashstartsregex'),
            # Match stuff like: function() {...}
            (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'),
            # Match stuff like: (function: return type)
            (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)',
             bygroups(Name.Other, Text, Keyword.Type)),
            (r'[$a-zA-Z_]\w*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Exemple #16
0
class NedLexer(RegexLexer):
    name = 'ned'
    filenames = ['*.ned']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(true|false)\b', Name.Builtin),
            (r'(<-->|-->|<--|\.\.)', Keyword),
            (r'(bool|double|int|xml)\b', Keyword.Type),
            (r'(inout|input|output)\b', Keyword.Type),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(("channel", "channelinterface", "simple", "module",
                    "network", "moduleinterface"),
                   suffix=r'\b'), Keyword),
            (words(
                ("parameters", "gates", "types", "submodules", "connections"),
                suffix=r'\b'), Keyword),
            (words(("volatile", "allowunconnected", "extends", "for", "if",
                    "import", "like", "package", "property"),
                   suffix=r'\b'), Keyword),
            (words(("sizeof", "const", "default", "ask", "this", "index",
                    "typename", "xmldoc"),
                   suffix=r'\b'), Keyword),
            (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta",
                    "binomial", "cauchy", "ceil", "chi_square", "cos",
                    "erlang_k", "exp", "exponential", "fabs", "floor", "fmod",
                    "gamma_d", "genk_exponential", "genk_intuniform",
                    "genk_normal", "genk_truncnormal", "genk_uniform",
                    "geometric", "hypergeometric", "hypot", "intuniform",
                    "log", "log10", "lognormal", "max", "min", "negbinomial",
                    "normal", "pareto_shifted", "poisson", "pow", "simTime",
                    "sin", "sqrt", "student_t", "tan", "triang", "truncnormal",
                    "uniform", "weibull", "xml", "xmldoc"),
                   suffix=r'\b'), Name.Builtin),
            ('@[a-zA-Z_]\w*', Name.Builtin),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ]
    }
Exemple #17
0
class ReachLexer(RegexLexer):
    """
    For Reach source code.
    """

    name = 'Reach'
    aliases = ['rsh', 'reach']
    filenames = ['*.rsh']

    # TODO?
    mimetypes = [
        'application/javascript', 'application/x-javascript',
        'text/x-javascript', 'text/javascript'
    ]

    flags = re.DOTALL | re.UNICODE | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gimuy]+\b|\B)', String.Regex, '#pop'),
            (r'(?=/)', Text, ('#pop', 'badregex')),
            default('#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'root': [
            (r'\A#! ?/.*?\n', Comment.Hashbang),  # recognized by node.js
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'(\.\d+|[0-9]+\.[0-9]*)([eE][-+]?[0-9]+)?', Number.Float),
            (r'0[bB][01]+', Number.Bin),
            (r'0[oO][0-7]+', Number.Oct),
            (r'0[xX][0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'\.\.\.', Punctuation),
            (r'=>', Operator),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator,
             'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (
                r'(for|in|while|do|break|return|continue|match|switch|case|default|if|else|'
                r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|'
                # Reach ones
                r'interact|commit|exit|only|each|race|fork|paySpec|parallelReduce|when|timeout|timeRemaining|throwTimeout|publish|pay|declassify|transfer|'
                r'invariant|assert|require|assume|possible|unknowable|forall|'
                r'this|of)\b',
                Keyword,
                'slashstartsregex'),
            (
                r'(var|let|with|function|'
                # reach ones
                r'export|const|import|from|as'
                r')\b',
                Keyword.Declaration,
                'slashstartsregex'),
            (r'(abstract|boolean|byte|char|class|debugger|double|enum|'
             r'extends|final|float|goto|implements|int|interface|long|native|'
             r'package|private|protected|public|short|static|super|synchronized|throws|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (
                r'(Array|Boolean|Date|Error|Function|Foldable|Math|netscape|'
                r'Number|Object|Packages|RegExp|String|Promise|Proxy|sun|decodeURI|'
                r'decodeURIComponent|encodeURI|encodeURIComponent|'
                r'Error|eval|isFinite|isNaN|isSafeInteger|parseFloat|parseInt|'
                # The reach ones
                r'UInt|Int|FixedPoint|Interval|IntervalType|Reach|App|Fun|Null|Bool|Bytes|Address|Token|Tuple|Struct|Participant|ParticipantClass|View|Data|Digest|Map|Set|Refine|Anybody|deployMode|verifyArithmetic|verifyPerConnector|connectors|ETH|ALGO|'
                r'deploy|balance|digest|implies|ensure|hasRandom|makeCommitment|checkCommitment|closeTo|lastConsensusTime|remote|'
                r'and|or|add|sub|mul|div|mod|lt|le|gt|ge|lsh|rsh|band|bior|bxor|eq|neq|'
                r'polyEq|polyNeq|typeEq|intEq|ite|typeOf|isType|is|'
                r'array|makeEnum|'
                r'document|this|window)\b',
                Name.Builtin),
            (JS_IDENT, Name.Other),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'`', String.Backtick, 'interp'),
        ],
        'interp': [
            (r'`', String.Backtick, '#pop'),
            (r'\\\\', String.Backtick),
            (r'\\`', String.Backtick),
            (r'\$\{', String.Interpol, 'interp-inside'),
            (r'\$', String.Backtick),
            (r'[^`\\$]+', String.Backtick),
        ],
        'interp-inside': [
            # TODO: should this include single-line comments and allow nesting strings?
            (r'\}', String.Interpol, '#pop'),
            include('root'),
        ],
        # (\\\\|\\`|[^`])*`', String.Backtick),
    }
Exemple #18
0
class PythonLexer(RegexLexer):
    """
    For `Python <http://www.python.org>`_ source code.
    """

    name = 'Python'
    aliases = ['python', 'py', 'sage']
    filenames = [
        '*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'
    ]
    mimetypes = ['text/x-python', 'application/x-python']

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%', ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
             bygroups(Text, String.Affix, String.Doc)),
            (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
             bygroups(Text, String.Affix, String.Doc)),
            (r'[^\S\n]+', Text),
            (r'\A#!.+$', Comment.Hashbang),
            (r'#.*$', Comment.Single),
            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
            include('keywords'),
            (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
            (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace,
                                               Text), 'fromimport'),
            (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace,
                                                 Text), 'import'),
            include('builtins'),
            include('magicfuncs'),
            include('magicvars'),
            include('backtick'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
             bygroups(String.Affix, String.Double), 'tdqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
             bygroups(String.Affix, String.Single), 'tsqs'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
             bygroups(String.Affix, String.Double), 'dqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
             bygroups(String.Affix, String.Single), 'sqs'),
            ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'tdqs')),
            ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'tsqs')),
            ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'dqs')),
            ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
        ],
        'keywords': [
            (words(('assert', 'break', 'continue', 'del', 'elif', 'else',
                    'except', 'exec', 'finally', 'for', 'global', 'if',
                    'lambda', 'pass', 'print', 'raise', 'return', 'try',
                    'while', 'yield', 'yield from', 'as', 'with'),
                   suffix=r'\b'), Keyword),
        ],
        'builtins': [
            (words(
                ('__import__', 'abs', 'all', 'any', 'apply', 'basestring',
                 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable',
                 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex',
                 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
                 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset',
                 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input',
                 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
                 'list', 'locals', 'long', 'map', 'max', 'min', 'next',
                 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range',
                 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round',
                 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str',
                 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars',
                 'xrange', 'zip'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Name.Builtin),
            (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
             r')\b', Name.Builtin.Pseudo),
            (words(
                ('ArithmeticError', 'AssertionError', 'AttributeError',
                 'BaseException', 'DeprecationWarning', 'EOFError',
                 'EnvironmentError', 'Exception', 'FloatingPointError',
                 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
                 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
                 'KeyboardInterrupt', 'LookupError', 'MemoryError',
                 'NameError', 'NotImplemented', 'NotImplementedError',
                 'OSError', 'OverflowError', 'OverflowWarning',
                 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
                 'RuntimeWarning', 'StandardError', 'StopIteration',
                 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
                 'TabError', 'TypeError', 'UnboundLocalError',
                 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
                 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
                 'ValueError', 'VMSError', 'Warning', 'WindowsError',
                 'ZeroDivisionError'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Name.Exception),
        ],
        'magicfuncs': [
            (words(
                ('__abs__', '__add__', '__and__', '__call__', '__cmp__',
                 '__coerce__', '__complex__', '__contains__', '__del__',
                 '__delattr__', '__delete__', '__delitem__', '__delslice__',
                 '__div__', '__divmod__', '__enter__', '__eq__', '__exit__',
                 '__float__', '__floordiv__', '__ge__', '__get__',
                 '__getattr__', '__getattribute__', '__getitem__',
                 '__getslice__', '__gt__', '__hash__', '__hex__', '__iadd__',
                 '__iand__', '__idiv__', '__ifloordiv__', '__ilshift__',
                 '__imod__', '__imul__', '__index__', '__init__',
                 '__instancecheck__', '__int__', '__invert__', '__iop__',
                 '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__',
                 '__itruediv__', '__ixor__', '__le__', '__len__', '__long__',
                 '__lshift__', '__lt__', '__missing__', '__mod__', '__mul__',
                 '__ne__', '__neg__', '__new__', '__nonzero__', '__oct__',
                 '__op__', '__or__', '__pos__', '__pow__', '__radd__',
                 '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
                 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__',
                 '__rmul__', '__rop__', '__ror__', '__rpow__', '__rrshift__',
                 '__rshift__', '__rsub__', '__rtruediv__', '__rxor__',
                 '__set__', '__setattr__', '__setitem__', '__setslice__',
                 '__str__', '__sub__', '__subclasscheck__', '__truediv__',
                 '__unicode__', '__xor__'),
                suffix=r'\b'), Name.Function.Magic),
        ],
        'magicvars': [
            (words(
                ('__bases__', '__class__', '__closure__', '__code__',
                 '__defaults__', '__dict__', '__doc__', '__file__', '__func__',
                 '__globals__', '__metaclass__', '__module__', '__mro__',
                 '__name__', '__self__', '__slots__', '__weakref__'),
                suffix=r'\b'), Name.Variable.Magic),
        ],
        'numbers': [(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
                    (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
                    (r'0[0-7]+j?', Number.Oct), (r'0[bB][01]+', Number.Bin),
                    (r'0[xX][a-fA-F0-9]+', Number.Hex),
                    (r'\d+L', Number.Integer.Long),
                    (r'\d+j?', Number.Integer)],
        'backtick': [
            ('`.*?`', String.Backtick),
        ],
        'name': [
            (r'@[\w.]+', Name.Decorator),
            ('[a-zA-Z_]\w*', Name),
        ],
        'funcname': [
            include('magicfuncs'),
            ('[a-zA-Z_]\w*', Name.Function, '#pop'),
            default('#pop'),
        ],
        'classname': [('[a-zA-Z_]\w*', Name.Class, '#pop')],
        'import': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'as\b', Keyword.Namespace),
            (r',', Operator),
            (r'[a-zA-Z_][\w.]*', Name.Namespace),
            default('#pop')  # all else: go back
        ],
        'fromimport': [
            (r'(?:[ \t]|\\\n)+', Text),
            (r'import\b', Keyword.Namespace, '#pop'),
            # if None occurs here, it's "raise x from None", since None can
            # never be a module name
            (r'None\b', Name.Builtin.Pseudo, '#pop'),
            # sadly, in "raise x from y" y will be highlighted as namespace too
            (r'[a-zA-Z_.][\w.]*', Name.Namespace),
            # anything else here also means "raise x from y" and is therefore
            # not an error
            default('#pop'),
        ],
        'stringescape':
        [(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
          r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)],
        'strings-single':
        innerstring_rules(String.Single),
        'strings-double':
        innerstring_rules(String.Double),
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include('strings-double')
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include('strings-single')
        ],
        'tdqs': [(r'"""', String.Double, '#pop'),
                 include('strings-double'), (r'\n', String.Double)],
        'tsqs': [(r"'''", String.Single, '#pop'),
                 include('strings-single'), (r'\n', String.Single)],
    }

    def analyse_text(text):
        return shebang_matches(text, r'pythonw?(2(\.\d)?)?') or \
            'import ' in text[:1000]
Exemple #19
0
class NotmuchLexer(RegexLexer):
    """
    For `Notmuch <https://notmuchmail.org/>`_ email text format.

    .. versionadded:: 2.5

    Additional options accepted:

    `body_lexer`
        If given, highlight the contents of the message body with the specified
        lexer, else guess it according to the body content (default: ``None``).
    """

    name = 'Notmuch'
    aliases = ['notmuch']

    def _highlight_code(self, match):
        code = match.group(1)

        try:
            if self.body_lexer:
                lexer = get_lexer_by_name(self.body_lexer)
            else:
                lexer = guess_lexer(code.strip())
        except ClassNotFound:
            lexer = get_lexer_by_name('text')

        yield from lexer.get_tokens_unprocessed(code)

    tokens = {
        'root': [
            (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
        ],
        'message-attr': [
            (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
             bygroups(Name.Attribute, Number.Integer)),
            (r'(\s*filename:\s*)(.+\n)',
             bygroups(Name.Attribute, String)),
            default('#pop'),
        ],
        'message': [
            (r'\fmessage\}\n', Keyword, '#pop'),
            (r'\fheader\{\n', Keyword, 'header'),
            (r'\fbody\{\n', Keyword, 'body'),
        ],
        'header': [
            (r'\fheader\}\n', Keyword, '#pop'),
            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
             bygroups(Name.Attribute, String)),
            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
             bygroups(Generic.Strong, Literal, Name.Tag)),
        ],
        'body': [
            (r'\fpart\{\n', Keyword, 'part'),
            (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
            (r'\fbody\}\n', Keyword, '#pop'),
        ],
        'part-attr': [
            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
             bygroups(Punctuation, Name.Attribute, String)),
            (r'(,\s*)(Content-type:\s*)(.+\n)',
             bygroups(Punctuation, Name.Attribute, String)),
            default('#pop'),
        ],
        'part': [
            (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
            (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
            (r'^Non-text part: .*\n', Comment),
            (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
        ],
    }

    def analyse_text(text):
        return 1.0 if text.startswith('\fmessage{') else 0.0

    def __init__(self, **options):
        self.body_lexer = options.get('body_lexer', None)
        RegexLexer.__init__(self, **options)
Exemple #20
0
class ZephirLexer(RegexLexer):
    """
    For `Zephir language <http://zephir-lang.com/>`_ source code.

    Zephir is a compiled high level language aimed
    to the creation of C-extensions for PHP.

    .. versionadded:: 2.0
    """

    name = 'Zephir'
    aliases = ['zephir']
    filenames = ['*.zep']

    zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
    zephir_type = ['bit', 'bits', 'string']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'commentsandwhitespace': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline)
        ],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
             r'([gim]+\b|\B)', String.Regex, '#pop'),
            default('#pop')
        ],
        'badregex': [
            (r'\n', Text, '#pop')
        ],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
             r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
             r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
             r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
             r'empty)\b', Keyword, 'slashstartsregex'),
            (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
            (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
             r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
             r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
             r'transient|volatile)\b', Keyword.Reserved),
            (r'(true|false|null|undefined)\b', Keyword.Constant),
            (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
             r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
             r'window)\b', Name.Builtin),
            (r'[$a-zA-Z_][\w\\]*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Exemple #21
0
class LlvmMirLexer(RegexLexer):
    """
    Lexer for the overall LLVM MIR document format.

    MIR is a human readable serialization format that's used to represent LLVM's
    machine specific intermediate representation. It allows LLVM's developers to
    see the state of the compilation process at various points, as well as test
    individual pieces of the compiler.

    For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.

    .. versionadded:: 2.6
    """
    name = 'LLVM-MIR'
    aliases = ['llvm-mir']
    filenames = ['*.mir']

    tokens = {
        'root': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents starting with | are LLVM-IR
            (r'--- \|$', Keyword, 'llvm_ir'),
            # Other documents are MIR
            (r'---', Keyword, 'llvm_mir'),
            # Consume everything else in one token for efficiency
            (r'[^-#]+|.', Text),
        ],
        'llvm_ir': [
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Delegate to the LlvmLexer
            (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
        ],
        'llvm_mir': [
            # Comments are hashes at the YAML level
            (r'#.*', Comment),
            # Documents end with '...' or '---'
            (r'(\.\.\.|(?=---))', Keyword, '#pop'),
            # Handle the simple attributes
            (r'name:', Keyword, 'name'),
            (words(('alignment', ), suffix=':'), Keyword, 'number'),
            (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
                    'selected', 'exposesReturnsTwice'),
                   suffix=':'), Keyword, 'boolean'),
            # Handle the attributes don't highlight inside
            (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
                    'machineFunctionInfo'),
                   suffix=':'), Keyword),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'body: *\|', Keyword, 'llvm_mir_body'),
            # Consume everything else
            (r'.+', Text),
            (r'\n', Text),
        ],
        'name': [
            (r'[^\n]+', Name),
            default('#pop'),
        ],
        'boolean': [
            (r' *(true|false)', Name.Builtin),
            default('#pop'),
        ],
        'number': [
            (r' *[0-9]+', Number),
            default('#pop'),
        ],
        'llvm_mir_body': [
            # Documents end with '...' or '---'.
            # We have to pop llvm_mir_body and llvm_mir
            (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
            # Delegate the body block to the LlvmMirBodyLexer
            (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
            # The '...' is optional. If we didn't already find it then it isn't
            # there. There might be a '---' instead though.
            (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
        ],
    }
Exemple #22
0
class GDScriptLexer(RegexLexer):
    """
    For `Godot source code <https://www.godotengine.org>`_ source code.
    """

    name = 'GDScript'
    aliases = ['gdscript', 'gd']
    filenames = ['*.gd']
    mimetypes = ['text/x-gdscript', 'application/x-gdscript']

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsux%]', String.Interpol),
            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%', ttype),
            # newlines are an error (use "nl" state)
        ]

    tokens = {
        'root': [
            (r'\n', Text),
            (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
             bygroups(Text, String.Affix, String.Doc)),
            (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
             bygroups(Text, String.Affix, String.Doc)),
            (r'[^\S\n]+', Text),
            (r'#.*$', Comment.Single),
            (r'[]{}:(),;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|and|or|not)\b', Operator.Word),
            (r'!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|]', Operator),
            include('keywords'),
            (r'(func)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
            include('builtins'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
             bygroups(String.Affix, String.Double), 'tdqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
             bygroups(String.Affix, String.Single), 'tsqs'),
            ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
             bygroups(String.Affix, String.Double), 'dqs'),
            ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
             bygroups(String.Affix, String.Single), 'sqs'),
            ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'tdqs')),
            ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'tsqs')),
            ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
             combined('stringescape', 'dqs')),
            ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
             combined('stringescape', 'sqs')),
            include('name'),
            include('numbers'),
        ],
        'keywords': [
            (words((
                'do', 'var', 'const', 'extends', 'is', 'export', 'onready', 'tool',
                'static', 'setget', 'signal', 'breakpoint', 'switch', 'case',
                'assert', 'break', 'continue', 'elif', 'else', 'for', 'if',
                'pass', 'return', 'while', 'match'), suffix=r'\b'),
             Keyword),
        ],
        'builtins': [
            (words((
                'Color8', 'abs', 'acos', 'asin', 'assert', 'atan', 'atan2',
                'bytes2var', 'ceil', 'clamp', 'convert', 'cos', 'cosh',
                'db2linear', 'decimals', 'dectime', 'deg2rad', 'dict2inst',
                'ease', 'exp', 'floor', 'fmod', 'fposmod', 'funcref', 'hash',
                'inst2dict', 'instance_from_id', 'is_inf', 'is_nan', 'lerp',
                'linear2db', 'load', 'log', 'max', 'min', 'nearest_po2', 'pow',
                'preload', 'print', 'print_stack', 'printerr', 'printraw',
                'prints', 'printt', 'rad2deg', 'rand_range', 'rand_seed',
                'randf', 'randi', 'randomize', 'range', 'round', 'seed', 'sign',
                'sin', 'sinh', 'sqrt', 'stepify', 'str', 'str2var', 'tan',
                'tan', 'tanh', 'type_exist', 'typeof', 'var2bytes', 'var2str',
                'weakref', 'yield'),
                prefix=r'(?<!\.)', suffix=r'\b'),
             Name.Builtin),
            (r'(?<!\.)(self|false|true'
             r')\b', Name.Builtin.Pseudo),
            (words((
                'null', 'bool', 'int', 'float', 'String', 'Vector2', 'Vector3',
                'Matrix32', 'Array', 'ByteArray', 'IntArray', 'FloatArray',
                'StringArray', 'Vector2Array', 'Vector3Array', 'ColorArray',
                'Plane', 'Quat', 'AABB', 'Matrix3', 'Transform', 'Color',
                'Image', 'NodePath', 'RID', 'Object', 'InputEvent', 'Rect2'
            ), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin.Type),
        ],
        'numbers': [
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
            (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
            (r'\d+j?', Number.Integer)
        ],
        'name': [
            ('[a-zA-Z_]\w*', Name),
        ],
        'funcname': [
            ('[a-zA-Z_]\w*', Name.Function, '#pop'),
            default('#pop'),
        ],
        'classname': [
            ('[a-zA-Z_]\w*', Name.Class, '#pop')
        ],
        'stringescape': [
            (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
        ],
        'strings-single': innerstring_rules(String.Single),
        'strings-double': innerstring_rules(String.Double),
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
            include('strings-double')
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
            include('strings-single')
        ],
        'tdqs': [
            (r'"""', String.Double, '#pop'),
            include('strings-double'),
            (r'\n', String.Double)
        ],
        'tsqs': [
            (r"'''", String.Single, '#pop'),
            include('strings-single'),
            (r'\n', String.Single)
        ],
    }
Exemple #23
0
class ThriftLexer(RegexLexer):
    """
    For `Thrift <https://thrift.apache.org/>`__ interface definitions.

    .. versionadded:: 2.1
    """
    name = 'Thrift'
    aliases = ['thrift']
    filenames = ['*.thrift']
    mimetypes = ['application/x-thrift']

    tokens = {
        'root': [
            include('whitespace'),
            include('comments'),
            (r'"', String.Double, combined('stringescape', 'dqs')),
            (r'\'', String.Single, combined('stringescape', 'sqs')),
            (r'(namespace)(\s+)', bygroups(Keyword.Namespace,
                                           Text.Whitespace), 'namespace'),
            (r'(enum|union|struct|service|exception)(\s+)',
             bygroups(Keyword.Declaration, Text.Whitespace), 'class'),
            (
                r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)'  # return arguments
                r'((?:[^\W\d]|\$)[\w$]*)'  # method name
                r'(\s*)(\()',  # signature start
                bygroups(using(this), Name.Function, Text, Operator)),
            include('keywords'),
            include('numbers'),
            (r'[&=]', Operator),
            (r'[:;,{}()<>\[\]]', Punctuation),
            (r'[a-zA-Z_](\.\w|\w)*', Name),
        ],
        'whitespace': [
            (r'\n', Text.Whitespace),
            (r'\s+', Text.Whitespace),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'//.*?\n', Comment),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
        ],
        'stringescape': [
            (r'\\([\\nrt"\'])', String.Escape),
        ],
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'[^\\"\n]+', String.Double),
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r'[^\\\'\n]+', String.Single),
        ],
        'namespace': [
            (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'class': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'keywords': [
            (r'(async|oneway|extends|throws|required|optional)\b', Keyword),
            (r'(true|false)\b', Keyword.Constant),
            (r'(const|typedef)\b', Keyword.Declaration),
            (words(('cpp_namespace', 'cpp_include', 'cpp_type', 'java_package',
                    'cocoa_prefix', 'csharp_namespace', 'delphi_namespace',
                    'php_namespace', 'py_module', 'perl_package',
                    'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix',
                    'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace',
                    'xsd_attrs', 'include'),
                   suffix=r'\b'), Keyword.Namespace),
            (words(
                ('void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
                 'string', 'binary', 'map', 'list', 'set', 'slist', 'senum'),
                suffix=r'\b'), Keyword.Type),
            (words(
                ('BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__',
                 '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
                 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin',
                 'break', 'case', 'catch', 'class', 'clone', 'continue',
                 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic',
                 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare',
                 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile',
                 'ensure', 'except', 'exec', 'finally', 'float', 'for',
                 'foreach', 'function', 'global', 'goto', 'if', 'implements',
                 'import', 'in', 'inline', 'instanceof', 'interface', 'is',
                 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not',
                 'or', 'pass', 'public', 'print', 'private', 'protected',
                 'raise', 'redo', 'rescue', 'retry', 'register', 'return',
                 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized',
                 'then', 'this', 'throw', 'transient', 'try', 'undef',
                 'unless', 'unsigned', 'until', 'use', 'var', 'virtual',
                 'volatile', 'when', 'while', 'with', 'xor', 'yield'),
                prefix=r'\b',
                suffix=r'\b'), Keyword.Reserved),
        ],
        'numbers': [
            (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)',
             Number.Float),
            (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex),
            (r'[+-]?[0-9]+', Number.Integer),
        ],
    }
Exemple #24
0
class CppLexer(CFamilyLexer):
    """
    For C++ source code with preprocessor directives.

    Additional options accepted:

    `stdlibhighlighting`
        Highlight common types found in the C/C++ standard library (e.g. `size_t`).
        (default: ``True``).

    `c99highlighting`
        Highlight common types found in the C99 standard library (e.g. `int8_t`).
        Actually, this includes all fixed-width integer types.
        (default: ``True``).

    `c11highlighting`
        Highlight atomic types found in the C11 standard library (e.g. `atomic_bool`).
        (default: ``True``).

    `platformhighlighting`
        Highlight common types found in the platform SDK headers (e.g. `clockid_t` on Linux).
        (default: ``True``).
    """
    name = 'C++'
    aliases = ['cpp', 'c++']
    filenames = [
        '*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx',
        '*.C', '*.H', '*.cp', '*.CPP'
    ]
    mimetypes = ['text/x-c++hdr', 'text/x-c++src']
    priority = 0.1

    tokens = {
        'statements': [
            (r'(class|concept|typename)(\s+)', bygroups(Keyword,
                                                        Text), 'classname'),
            (words(
                ('catch', 'const_cast', 'delete', 'dynamic_cast', 'explicit',
                 'export', 'friend', 'mutable', 'namespace', 'new', 'operator',
                 'private', 'protected', 'public', 'reinterpret_cast', 'class',
                 'restrict', 'static_cast', 'template', 'this', 'throw',
                 'throws', 'try', 'typeid', 'using', 'virtual', 'constexpr',
                 'nullptr', 'concept', 'decltype', 'noexcept', 'override',
                 'final', 'constinit', 'consteval', 'co_await', 'co_return',
                 'co_yield', 'requires', 'import', 'module', 'typename'),
                suffix=r'\b'), Keyword),
            (r'char(16_t|32_t|8_t)\b', Keyword.Type),
            (r'(enum)(\s+)', bygroups(Keyword, Text), 'enumname'),

            # C++11 raw strings
            (r'((?:[LuU]|u8)?R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")',
             bygroups(String.Affix, String, String.Delimiter, String.Delimiter,
                      String, String.Delimiter, String)),
            inherit,
        ],
        'root': [
            inherit,
            # C++ Microsoft-isms
            (words(('virtual_inheritance', 'uuidof', 'super',
                    'single_inheritance', 'multiple_inheritance', 'interface',
                    'event'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            # Offload C++ extensions, http://offload.codeplay.com/
            (r'__(offload|blockingoffload|outer)\b', Keyword.Pseudo),
        ],
        'enumname': [
            include('whitespace'),
            # 'enum class' and 'enum struct' C++11 support
            (words(('class', 'struct'), suffix=r'\b'), Keyword),
            (CFamilyLexer._ident, Name.Class, '#pop'),
            # template specification
            (r'\s*(?=>)', Text, '#pop'),
            default('#pop')
        ]
    }

    def analyse_text(text):
        if re.search('#include <[a-z_]+>', text):
            return 0.2
        if re.search('using namespace ', text):
            return 0.4
Exemple #25
0
class FactorLexer(RegexLexer):
    """
    Lexer for the `Factor <http://factorcode.org>`_ language.

    .. versionadded:: 1.4
    """
    name = 'Factor'
    aliases = ['factor']
    filenames = ['*.factor']
    mimetypes = ['text/x-factor']

    flags = re.MULTILINE | re.UNICODE

    builtin_kernel = words(
        ('-rot', '2bi', '2bi@', '2bi*', '2curry', '2dip', '2drop', '2dup',
         '2keep', '2nip', '2over', '2tri', '2tri@', '2tri*', '3bi', '3curry',
         '3dip', '3drop', '3dup', '3keep', '3tri', '4dip', '4drop', '4dup',
         '4keep', '<wrapper>', '=', '>boolean', 'clone', '?', '?execute',
         '?if', 'and', 'assert', 'assert=', 'assert?', 'bi', 'bi-curry',
         'bi-curry@', 'bi-curry*', 'bi@', 'bi*', 'boa', 'boolean', 'boolean?',
         'both?', 'build', 'call', 'callstack', 'callstack>array',
         'callstack?', 'clear', '(clone)', 'compose', 'compose?', 'curry',
         'curry?', 'datastack', 'die', 'dip', 'do', 'drop', 'dup', 'dupd',
         'either?', 'eq?', 'equal?', 'execute', 'hashcode', 'hashcode*',
         'identity-hashcode', 'identity-tuple', 'identity-tuple?', 'if', 'if*',
         'keep', 'loop', 'most', 'new', 'nip', 'not', 'null', 'object', 'or',
         'over', 'pick', 'prepose', 'retainstack', 'rot', 'same?', 'swap',
         'swapd', 'throw', 'tri', 'tri-curry', 'tri-curry@', 'tri-curry*',
         'tri@', 'tri*', 'tuple', 'tuple?', 'unless', 'unless*', 'until',
         'when', 'when*', 'while', 'with', 'wrapper', 'wrapper?', 'xor'),
        suffix=r'\s')

    builtin_assocs = words(
        ('2cache', '<enum>', '>alist', '?at', '?of', 'assoc', 'assoc-all?',
         'assoc-any?', 'assoc-clone-like', 'assoc-combine', 'assoc-diff',
         'assoc-diff!', 'assoc-differ', 'assoc-each', 'assoc-empty?',
         'assoc-filter', 'assoc-filter!', 'assoc-filter-as', 'assoc-find',
         'assoc-hashcode', 'assoc-intersect', 'assoc-like', 'assoc-map',
         'assoc-map-as', 'assoc-partition', 'assoc-refine', 'assoc-size',
         'assoc-stack', 'assoc-subset?', 'assoc-union', 'assoc-union!',
         'assoc=', 'assoc>map', 'assoc?', 'at', 'at+', 'at*', 'cache',
         'change-at', 'clear-assoc', 'delete-at', 'delete-at*', 'enum',
         'enum?', 'extract-keys', 'inc-at', 'key?', 'keys', 'map>assoc',
         'maybe-set-at', 'new-assoc', 'of', 'push-at', 'rename-at', 'set-at',
         'sift-keys', 'sift-values', 'substitute', 'unzip', 'value-at',
         'value-at*', 'value?', 'values', 'zip'),
        suffix=r'\s')

    builtin_combinators = words(
        ('2cleave', '2cleave>quot', '3cleave', '3cleave>quot', '4cleave',
         '4cleave>quot', 'alist>quot', 'call-effect', 'case', 'case-find',
         'case>quot', 'cleave', 'cleave>quot', 'cond', 'cond>quot',
         'deep-spread>quot', 'execute-effect', 'linear-case-quot', 'no-case',
         'no-case?', 'no-cond', 'no-cond?', 'recursive-hashcode',
         'shallow-spread>quot', 'spread', 'to-fixed-point', 'wrong-values',
         'wrong-values?'),
        suffix=r'\s')

    builtin_math = words(
        ('-', '/', '/f', '/i', '/mod', '2/', '2^', '<', '<=', '<fp-nan>', '>',
         '>=', '>bignum', '>fixnum', '>float', '>integer', '(all-integers?)',
         '(each-integer)', '(find-integer)', '*', '+', '?1+', 'abs', 'align',
         'all-integers?', 'bignum', 'bignum?', 'bit?', 'bitand', 'bitnot',
         'bitor', 'bits>double', 'bits>float', 'bitxor', 'complex', 'complex?',
         'denominator', 'double>bits', 'each-integer', 'even?', 'find-integer',
         'find-last-integer', 'fixnum', 'fixnum?', 'float', 'float>bits',
         'float?', 'fp-bitwise=', 'fp-infinity?', 'fp-nan-payload', 'fp-nan?',
         'fp-qnan?', 'fp-sign', 'fp-snan?', 'fp-special?', 'if-zero',
         'imaginary-part', 'integer', 'integer>fixnum',
         'integer>fixnum-strict', 'integer?', 'log2', 'log2-expects-positive',
         'log2-expects-positive?', 'mod', 'neg', 'neg?', 'next-float',
         'next-power-of-2', 'number', 'number=', 'number?', 'numerator',
         'odd?', 'out-of-fixnum-range', 'out-of-fixnum-range?', 'power-of-2?',
         'prev-float', 'ratio', 'ratio?', 'rational', 'rational?', 'real',
         'real-part', 'real?', 'recip', 'rem', 'sgn', 'shift', 'sq', 'times',
         'u<', 'u<=', 'u>', 'u>=', 'unless-zero', 'unordered?', 'when-zero',
         'zero?'),
        suffix=r'\s')

    builtin_sequences = words(
        ('1sequence', '2all?', '2each', '2map', '2map-as', '2map-reduce',
         '2reduce', '2selector', '2sequence', '3append', '3append-as', '3each',
         '3map', '3map-as', '3sequence', '4sequence', '<repetition>',
         '<reversed>', '<slice>', '?first', '?last', '?nth', '?second',
         '?set-nth', 'accumulate', 'accumulate!', 'accumulate-as', 'all?',
         'any?', 'append', 'append!', 'append-as', 'assert-sequence',
         'assert-sequence=', 'assert-sequence?', 'binary-reduce',
         'bounds-check', 'bounds-check?', 'bounds-error', 'bounds-error?',
         'but-last', 'but-last-slice', 'cartesian-each', 'cartesian-map',
         'cartesian-product', 'change-nth', 'check-slice', 'check-slice-error',
         'clone-like', 'collapse-slice', 'collector', 'collector-for',
         'concat', 'concat-as', 'copy', 'count', 'cut', 'cut-slice', 'cut*',
         'delete-all', 'delete-slice', 'drop-prefix', 'each', 'each-from',
         'each-index', 'empty?', 'exchange', 'filter', 'filter!', 'filter-as',
         'find', 'find-from', 'find-index', 'find-index-from', 'find-last',
         'find-last-from', 'first', 'first2', 'first3', 'first4', 'flip',
         'follow', 'fourth', 'glue', 'halves', 'harvest', 'head', 'head-slice',
         'head-slice*', 'head*', 'head?', 'if-empty', 'immutable',
         'immutable-sequence', 'immutable-sequence?', 'immutable?', 'index',
         'index-from', 'indices', 'infimum', 'infimum-by', 'insert-nth',
         'interleave', 'iota', 'iota-tuple', 'iota-tuple?', 'join', 'join-as',
         'last', 'last-index', 'last-index-from', 'length', 'lengthen', 'like',
         'longer', 'longer?', 'longest', 'map', 'map!', 'map-as', 'map-find',
         'map-find-last', 'map-index', 'map-integers', 'map-reduce', 'map-sum',
         'max-length', 'member-eq?', 'member?', 'midpoint@', 'min-length',
         'mismatch', 'move', 'new-like', 'new-resizable', 'new-sequence',
         'non-negative-integer-expected', 'non-negative-integer-expected?',
         'nth', 'nths', 'pad-head', 'pad-tail', 'padding', 'partition', 'pop',
         'pop*', 'prefix', 'prepend', 'prepend-as', 'produce', 'produce-as',
         'product', 'push', 'push-all', 'push-either', 'push-if', 'reduce',
         'reduce-index', 'remove', 'remove!', 'remove-eq', 'remove-eq!',
         'remove-nth', 'remove-nth!', 'repetition', 'repetition?',
         'replace-slice', 'replicate', 'replicate-as', 'rest', 'rest-slice',
         'reverse', 'reverse!', 'reversed', 'reversed?', 'second', 'selector',
         'selector-for', 'sequence', 'sequence-hashcode', 'sequence=',
         'sequence?', 'set-first', 'set-fourth', 'set-last', 'set-length',
         'set-nth', 'set-second', 'set-third', 'short', 'shorten', 'shorter',
         'shorter?', 'shortest', 'sift', 'slice', 'slice-error',
         'slice-error?', 'slice?', 'snip', 'snip-slice', 'start', 'start*',
         'subseq', 'subseq?', 'suffix', 'suffix!', 'sum', 'sum-lengths',
         'supremum', 'supremum-by', 'surround', 'tail', 'tail-slice',
         'tail-slice*', 'tail*', 'tail?', 'third', 'trim', 'trim-head',
         'trim-head-slice', 'trim-slice', 'trim-tail', 'trim-tail-slice',
         'unclip', 'unclip-last', 'unclip-last-slice', 'unclip-slice',
         'unless-empty', 'virtual-exemplar', 'virtual-sequence',
         'virtual-sequence?', 'virtual@', 'when-empty'),
        suffix=r'\s')

    builtin_namespaces = words(
        ('+@', 'change', 'change-global', 'counter', 'dec', 'get',
         'get-global', 'global', 'inc', 'init-namespaces', 'initialize',
         'is-global', 'make-assoc', 'namespace', 'namestack', 'off', 'on',
         'set', 'set-global', 'set-namestack', 'toggle', 'with-global',
         'with-scope', 'with-variable', 'with-variables'),
        suffix=r'\s')

    builtin_arrays = words(
        ('1array', '2array', '3array', '4array', '<array>', '>array', 'array',
         'array?', 'pair', 'pair?', 'resize-array'),
        suffix=r'\s')

    builtin_io = words(
        ('(each-stream-block-slice)', '(each-stream-block)',
         '(stream-contents-by-block)', '(stream-contents-by-element)',
         '(stream-contents-by-length-or-block)', '(stream-contents-by-length)',
         '+byte+', '+character+', 'bad-seek-type', 'bad-seek-type?', 'bl',
         'contents', 'each-block', 'each-block-size', 'each-block-slice',
         'each-line', 'each-morsel', 'each-stream-block',
         'each-stream-block-slice', 'each-stream-line', 'error-stream',
         'flush', 'input-stream', 'input-stream?', 'invalid-read-buffer',
         'invalid-read-buffer?', 'lines', 'nl', 'output-stream',
         'output-stream?', 'print', 'read', 'read-into', 'read-partial',
         'read-partial-into', 'read-until', 'read1', 'readln', 'seek-absolute',
         'seek-absolute?', 'seek-end', 'seek-end?', 'seek-input',
         'seek-output', 'seek-relative', 'seek-relative?', 'stream-bl',
         'stream-contents', 'stream-contents*', 'stream-copy', 'stream-copy*',
         'stream-element-type', 'stream-flush', 'stream-length',
         'stream-lines', 'stream-nl', 'stream-print', 'stream-read',
         'stream-read-into', 'stream-read-partial', 'stream-read-partial-into',
         'stream-read-partial-unsafe', 'stream-read-unsafe',
         'stream-read-until', 'stream-read1', 'stream-readln', 'stream-seek',
         'stream-seekable?', 'stream-tell', 'stream-write', 'stream-write1',
         'tell-input', 'tell-output', 'with-error-stream',
         'with-error-stream*', 'with-error>output',
         'with-input-output+error-streams', 'with-input-output+error-streams*',
         'with-input-stream', 'with-input-stream*', 'with-output-stream',
         'with-output-stream*', 'with-output>error',
         'with-output+error-stream', 'with-output+error-stream*',
         'with-streams', 'with-streams*', 'write', 'write1'),
        suffix=r'\s')

    builtin_strings = words(('1string', '<string>', '>string', 'resize-string',
                             'string', 'string?'),
                            suffix=r'\s')

    builtin_vectors = words(
        ('1vector', '<vector>', '>vector', '?push', 'vector', 'vector?'),
        suffix=r'\s')

    builtin_continuations = words(
        ('<condition>', '<continuation>', '<restart>', 'attempt-all',
         'attempt-all-error', 'attempt-all-error?', 'callback-error-hook',
         'callcc0', 'callcc1', 'cleanup', 'compute-restarts', 'condition',
         'condition?', 'continuation', 'continuation?', 'continue',
         'continue-restart', 'continue-with', 'current-continuation', 'error',
         'error-continuation', 'error-in-thread', 'error-thread', 'ifcc',
         'ignore-errors', 'in-callback?', 'original-error', 'recover',
         'restart', 'restart?', 'restarts', 'rethrow', 'rethrow-restarts',
         'return', 'return-continuation', 'thread-error-hook',
         'throw-continue', 'throw-restarts', 'with-datastack', 'with-return'),
        suffix=r'\s')

    tokens = {
        'root': [
            # factor allows a file to start with a shebang
            (r'#!.*$', Comment.Preproc),
            default('base'),
        ],
        'base': [
            (r'\s+', Text),

            # defining words
            (r'((?:MACRO|MEMO|TYPED)?:[:]?)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Function)),
            (r'(M:[:]?)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class, Text, Name.Function)),
            (r'(C:)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Function, Text, Name.Class)),
            (r'(GENERIC:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)),
            (r'(HOOK:|GENERIC#)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Function, Text, Name.Function)),
            (r'\(\s', Name.Function, 'stackeffect'),
            (r';\s', Keyword),

            # imports and namespaces
            (r'(USING:)(\s+)', bygroups(Keyword.Namespace, Text), 'vocabs'),
            (r'(USE:|UNUSE:|IN:|QUALIFIED:)(\s+)(\S+)',
             bygroups(Keyword.Namespace, Text, Name.Namespace)),
            (r'(QUALIFIED-WITH:)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword.Namespace, Text, Name.Namespace, Text,
                      Name.Namespace)),
            (r'(FROM:|EXCLUDE:)(\s+)(\S+)(\s+=>\s)',
             bygroups(Keyword.Namespace, Text, Name.Namespace, Text), 'words'),
            (r'(RENAME:)(\s+)(\S+)(\s+)(\S+)(\s+=>\s+)(\S+)',
             bygroups(Keyword.Namespace, Text, Name.Function, Text,
                      Name.Namespace, Text, Name.Function)),
            (r'(ALIAS:|TYPEDEF:)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword.Namespace, Text, Name.Function, Text,
                      Name.Function)),
            (r'(DEFER:|FORGET:|POSTPONE:)(\s+)(\S+)',
             bygroups(Keyword.Namespace, Text, Name.Function)),

            # tuples and classes
            (r'(TUPLE:|ERROR:)(\s+)(\S+)(\s+<\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class, Text, Name.Class), 'slots'),
            (r'(TUPLE:|ERROR:|BUILTIN:)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class), 'slots'),
            (r'(MIXIN:|UNION:|INTERSECTION:)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class)),
            (r'(PREDICATE:)(\s+)(\S+)(\s+<\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class, Text, Name.Class)),
            (r'(C:)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Function, Text, Name.Class)),
            (r'(INSTANCE:)(\s+)(\S+)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Class, Text, Name.Class)),
            (r'(SLOT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)),
            (r'(SINGLETON:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)),
            (r'SINGLETONS:', Keyword, 'classes'),

            # other syntax
            (r'(CONSTANT:|SYMBOL:|MAIN:|HELP:)(\s+)(\S+)',
             bygroups(Keyword, Text, Name.Function)),
            (r'SYMBOLS:\s', Keyword, 'words'),
            (r'SYNTAX:\s', Keyword),
            (r'ALIEN:\s', Keyword),
            (r'(STRUCT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)),
            (r'(FUNCTION:)(\s+\S+\s+)(\S+)(\s+\(\s+[^)]+\)\s)',
             bygroups(Keyword.Namespace, Text, Name.Function, Text)),
            (r'(FUNCTION-ALIAS:)(\s+)(\S+)(\s+\S+\s+)(\S+)(\s+\(\s+[^)]+\)\s)',
             bygroups(Keyword.Namespace, Text, Name.Function, Text,
                      Name.Function, Text)),

            # vocab.private
            (r'(?:<PRIVATE|PRIVATE>)\s', Keyword.Namespace),

            # strings
            (r'"""\s+(?:.|\n)*?\s+"""', String),
            (r'"(?:\\\\|\\"|[^"])*"', String),
            (r'\S+"\s+(?:\\\\|\\"|[^"])*"', String),
            (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char),

            # comments
            (r'!\s+.*$', Comment),
            (r'#!\s+.*$', Comment),
            (r'/\*\s+(?:.|\n)*?\s\*/\s', Comment),

            # boolean constants
            (r'[tf]\s', Name.Constant),

            # symbols and literals
            (r'[\\$]\s+\S+', Name.Constant),
            (r'M\\\s+\S+\s+\S+', Name.Constant),

            # numbers
            (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s',
             Number),
            (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number),
            (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s',
             Number),
            (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s',
             Number),
            (r'0b[01]+\s', Number.Bin),
            (r'0o[0-7]+\s', Number.Oct),
            (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number),
            (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s',
             Number),

            # keywords
            (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s',
             Keyword),

            # builtins
            (builtin_kernel, Name.Builtin),
            (builtin_assocs, Name.Builtin),
            (builtin_combinators, Name.Builtin),
            (builtin_math, Name.Builtin),
            (builtin_sequences, Name.Builtin),
            (builtin_namespaces, Name.Builtin),
            (builtin_arrays, Name.Builtin),
            (builtin_io, Name.Builtin),
            (builtin_strings, Name.Builtin),
            (builtin_vectors, Name.Builtin),
            (builtin_continuations, Name.Builtin),

            # everything else is text
            (r'\S+', Text),
        ],
        'stackeffect': [
            (r'\s+', Text),
            (r'\(\s+', Name.Function, 'stackeffect'),
            (r'\)\s', Name.Function, '#pop'),
            (r'--\s', Name.Function),
            (r'\S+', Name.Variable),
        ],
        'slots': [
            (r'\s+', Text),
            (r';\s', Keyword, '#pop'),
            (r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)',
             bygroups(Text, Name.Variable, Text)),
            (r'\S+', Name.Variable),
        ],
        'vocabs': [
            (r'\s+', Text),
            (r';\s', Keyword, '#pop'),
            (r'\S+', Name.Namespace),
        ],
        'classes': [
            (r'\s+', Text),
            (r';\s', Keyword, '#pop'),
            (r'\S+', Name.Class),
        ],
        'words': [
            (r'\s+', Text),
            (r';\s', Keyword, '#pop'),
            (r'\S+', Name.Function),
        ],
    }
Exemple #26
0
class KokaLexer(RegexLexer):
    """
    Lexer for the `Koka <http://koka.codeplex.com>`_
    language.

    .. versionadded:: 1.6
    """

    name = 'Koka'
    aliases = ['koka']
    filenames = ['*.kk', '*.kki']
    mimetypes = ['text/x-koka']

    keywords = [
        'infix',
        'infixr',
        'infixl',
        'type',
        'cotype',
        'rectype',
        'alias',
        'struct',
        'con',
        'fun',
        'function',
        'val',
        'var',
        'external',
        'if',
        'then',
        'else',
        'elif',
        'return',
        'match',
        'private',
        'public',
        'private',
        'module',
        'import',
        'as',
        'include',
        'inline',
        'rec',
        'try',
        'yield',
        'enum',
        'interface',
        'instance',
    ]

    # keywords that are followed by a type
    typeStartKeywords = [
        'type',
        'cotype',
        'rectype',
        'alias',
        'struct',
        'enum',
    ]

    # keywords valid in a type
    typekeywords = [
        'forall',
        'exists',
        'some',
        'with',
    ]

    # builtin names and special names
    builtin = [
        'for',
        'while',
        'repeat',
        'foreach',
        'foreach-indexed',
        'error',
        'catch',
        'finally',
        'cs',
        'js',
        'file',
        'ref',
        'assigned',
    ]

    # symbols that can be in an operator
    symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'

    # symbol boundary: an operator keyword should not be followed by any of these
    sboundary = '(?!' + symbols + ')'

    # name boundary: a keyword should not be followed by any of these
    boundary = r'(?![\w/])'

    # koka token abstractions
    tokenType = Name.Attribute
    tokenTypeDef = Name.Class
    tokenConstructor = Generic.Emph

    # main lexer
    tokens = {
        'root': [
            include('whitespace'),

            # go into type mode
            (r'::?' + sboundary, tokenType, 'type'),
            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text,
                                                  tokenTypeDef), 'alias-type'),
            (r'(struct)(\s+)([a-z]\w*)?',
             bygroups(Keyword, Text, tokenTypeDef), 'struct-type'),
            ((r'(%s)' % '|'.join(typeStartKeywords)) + r'(\s+)([a-z]\w*)?',
             bygroups(Keyword, Text, tokenTypeDef), 'type'),

            # special sequences of tokens (we use ?: for non-capturing group as
            # required by 'bygroups')
            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
             bygroups(Keyword, Text, Keyword, Name.Namespace)),
            (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
             r'((?:[a-z]\w*/)*[a-z]\w*))?',
             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
                      Keyword, Name.Namespace)),
            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
             r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
             bygroups(Keyword, Text, Name.Function)),
            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
             r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
             bygroups(Keyword, Text, Keyword, Name.Function)),

            # keywords
            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
            (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
            (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
            (r'::?|:=|\->|[=.]' + sboundary, Keyword),

            # names
            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
             bygroups(Name.Namespace, tokenConstructor)),
            (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
            (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
             bygroups(Name.Namespace, Name)),
            (r'_\w*', Name.Variable),

            # literal string
            (r'@"', String.Double, 'litstring'),

            # operators
            (symbols + "|/(?![*/])", Operator),
            (r'`', Operator),
            (r'[{}()\[\];,]', Punctuation),

            # literals. No check for literal characters with len > 1
            (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
            (r'0[xX][0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r"'", String.Char, 'char'),
            (r'"', String.Double, 'string'),
        ],

        # type started by alias
        'alias-type': [(r'=', Keyword), include('type')],

        # type started by struct
        'struct-type': [(r'(?=\((?!,*\)))', Punctuation, '#pop'),
                        include('type')],

        # type started by colon
        'type': [(r'[(\[<]', tokenType, 'type-nested'),
                 include('type-content')],

        # type nested in brackets: can contain parameters, comma etc.
        'type-nested': [
            (r'[)\]>]', tokenType, '#pop'),
            (r'[(\[<]', tokenType, 'type-nested'),
            (r',', tokenType),
            (r'([a-z]\w*)(\s*)(:)(?!:)',
             bygroups(Name, Text, tokenType)),  # parameter name
            include('type-content')
        ],

        # shared contents of a type
        'type-content': [
            include('whitespace'),

            # keywords
            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
            (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', Keyword,
             '#pop'),  # need to match because names overlap...

            # kinds
            (r'[EPHVX]' + boundary, tokenType),

            # type names
            (r'[a-z][0-9]*(?![\w/])', tokenType),
            (r'_\w*', tokenType.Variable),  # Generic.Emph
            (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace,
                                                     tokenType)),
            (r'((?:[a-z]\w*/)*)([a-z]\w+)', bygroups(Name.Namespace,
                                                     tokenType)),

            # type keyword operators
            (r'::|->|[.:|]', tokenType),

            # catchall
            default('#pop')
        ],

        # comments and literals
        'whitespace': [(r'\n\s*#.*$', Comment.Preproc), (r'\s+', Text),
                       (r'/\*', Comment.Multiline, 'comment'),
                       (r'//.*$', Comment.Single)],
        'comment': [
            (r'[^/*]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'litstring': [
            (r'[^"]+', String.Double),
            (r'""', String.Escape),
            (r'"', String.Double, '#pop'),
        ],
        'string': [
            (r'[^\\"\n]+', String.Double),
            include('escape-sequence'),
            (r'["\n]', String.Double, '#pop'),
        ],
        'char': [
            (r'[^\\\'\n]+', String.Char),
            include('escape-sequence'),
            (r'[\'\n]', String.Char, '#pop'),
        ],
        'escape-sequence': [
            (r'\\[nrt\\"\']', String.Escape),
            (r'\\x[0-9a-fA-F]{2}', String.Escape),
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            # Yes, \U literals are 6 hex digits.
            (r'\\U[0-9a-fA-F]{6}', String.Escape)
        ]
    }
Exemple #27
0
class BatchLexer(RegexLexer):
    """
    Lexer for the DOS/Windows Batch file format.

    .. versionadded:: 0.7
    """
    name = 'Batchfile'
    aliases = ['bat', 'batch', 'dosbatch', 'winbatch']
    filenames = ['*.bat', '*.cmd']
    mimetypes = ['application/x-dos-batch']

    flags = re.MULTILINE | re.IGNORECASE

    _nl = r'\n\x1a'
    _punct = r'&<>|'
    _ws = r'\t\v\f\r ,;=\xa0'
    _nlws = r'\s\x1a\xa0,;='
    _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws)
    _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' %
                           (_nl, _ws, _nl, _punct))
    _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl)
    _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws
    _label = r'(?:(?:[^%s%s+:^]|\^[%s]?[\w\W])*)' % (_nlws, _punct, _nl)
    _label_compound = r'(?:(?:[^%s%s+:^)]|\^[%s]?[^)])*)' % (_nlws, _punct,
                                                             _nl)
    _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator
    _opword = r'(?:equ|geq|gtr|leq|lss|neq)'
    _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl)
    _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|'
                 r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|'
                 r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|'
                 r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:'
                 r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' %
                 (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl))
    _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s])+)' % (_nl, _nlws, _punct)
    _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s)])+)' % (_nl, _nlws,
                                                                _punct)
    _token = r'(?:[%s]+|%s)' % (_punct, _core_token)
    _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound)
    _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
               (_punct, _string, _variable, _core_token))

    def _make_begin_state(compound,
                          _core_token=_core_token,
                          _core_token_compound=_core_token_compound,
                          _keyword_terminator=_keyword_terminator,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _space=_space,
                          _start_label=_start_label,
                          _stoken=_stoken,
                          _token_terminator=_token_terminator,
                          _variable=_variable,
                          _ws=_ws):
        rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                            ')' if compound else '')
        rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
        rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
        set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
        suffix = ''
        if compound:
            _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
            _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
            suffix = '/compound'
        return [
            ((r'\)', Punctuation, '#pop') if compound else
             (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
              Comment.Single)),
            (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
            (_space, using(this, state='text')),
            include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
            (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
            (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
             r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
             (_nl, _token_terminator, _space,
              _core_token_compound if compound else _core_token, _nl, _nl),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
             (_keyword_terminator, rest, _nl, _nl, rest),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                    'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                    'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                    'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                    'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                    'title', 'type', 'ver', 'verify', 'vol'),
                   suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
            (r'(call)(%s?)(:)' % _space,
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'call%s' % suffix),
            (r'call%s' % _keyword_terminator, Keyword),
            (r'(for%s(?!\^))(%s)(/f%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/f', 'for')),
            (r'(for%s(?!\^))(%s)(/l%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/l', 'for')),
            (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
            (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'label%s' % suffix),
            (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
             (_token_terminator, _space, _token_terminator, _space,
              _token_terminator, _space),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), Keyword,
                      using(this, state='text')), ('(?', 'if')),
            (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
             (_token_terminator, _space, _stoken, _keyword_terminator,
              rest_of_line_compound if compound else rest_of_line),
             Comment.Single, 'follow%s' % suffix),
            (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), 'arithmetic%s' % suffix),
            (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
             r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
             (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
              ')' if compound else '', _nl, _nl),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), using(this, state='variable'),
                      Punctuation), 'follow%s' % suffix),
            default('follow%s' % suffix)
        ]

    def _make_follow_state(compound,
                           _label=_label,
                           _label_compound=_label_compound,
                           _nl=_nl,
                           _space=_space,
                           _start_label=_start_label,
                           _token=_token,
                           _token_compound=_token_compound,
                           _ws=_ws):
        suffix = '/compound' if compound else ''
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [
            (r'%s([%s]*)(%s)(.*)' %
             (_start_label, _ws, _label_compound if compound else _label),
             bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
            include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'),
            (r'\|\|?|&&?', Punctuation, '#pop'),
            include('text')
        ]
        return state

    def _make_arithmetic_state(compound,
                               _nl=_nl,
                               _punct=_punct,
                               _string=_string,
                               _variable=_variable,
                               _ws=_ws,
                               _nlws=_nlws):
        op = r'=+\-*/!~'
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex),
                  (r'\d+', Number.Integer), (r'[(),]+', Punctuation),
                  (r'([%s]|%%|\^\^)+' % op, Operator),
                  (r'(%s|%s|(\^[%s]?)?[^()%s%%\^"%s%s]|\^[%s]?%s)+' %
                   (_string, _variable, _nl, op, _nlws, _punct, _nlws,
                    r'[^)]' if compound else r'[\w\W]'),
                   using(this, state='variable')),
                  (r'(?=[\x00|&])', Text, '#pop'),
                  include('follow')]
        return state

    def _make_call_state(compound,
                         _label=_label,
                         _label_compound=_label_compound):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                      bygroups(Punctuation, Name.Label), '#pop'))
        return state

    def _make_label_state(compound,
                          _label=_label,
                          _label_compound=_label_compound,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _variable=_variable):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append(
            (r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' %
             (_label_compound if compound else _label, _string, _variable, _nl,
              r'[^)]' if compound else r'[\w\W]', _nl, _punct,
              r')' if compound else ''), bygroups(Name.Label,
                                                  Comment.Single), '#pop'))
        return state

    def _make_redirect_state(compound,
                             _core_token_compound=_core_token_compound,
                             _nl=_nl,
                             _punct=_punct,
                             _stoken=_stoken,
                             _string=_string,
                             _space=_space,
                             _variable=_variable,
                             _nlws=_nlws):
        stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                           (_punct, _string, _variable, _core_token_compound))
        return [
            (r'((?:(?<=[%s])\d)?)(>>?&|<&)([%s]*)(\d)' % (_nlws, _nlws),
             bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
            (r'((?:(?<=[%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
             (_nlws, _nl, _space, stoken_compound if compound else _stoken),
             bygroups(Number.Integer, Punctuation, using(this, state='text')))
        ]

    tokens = {
        'root':
        _make_begin_state(False),
        'follow':
        _make_follow_state(False),
        'arithmetic':
        _make_arithmetic_state(False),
        'call':
        _make_call_state(False),
        'label':
        _make_label_state(False),
        'redirect':
        _make_redirect_state(False),
        'root/compound':
        _make_begin_state(True),
        'follow/compound':
        _make_follow_state(True),
        'arithmetic/compound':
        _make_arithmetic_state(True),
        'call/compound':
        _make_call_state(True),
        'label/compound':
        _make_label_state(True),
        'redirect/compound':
        _make_redirect_state(True),
        'variable-or-escape':
        [(_variable, Name.Variable),
         (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)],
        'string': [(r'"', String.Double, '#pop'), (_variable, Name.Variable),
                   (r'\^!|%%', String.Escape),
                   (r'[^"%%^%s]+|[%%^]' % _nl, String.Double),
                   default('#pop')],
        'sqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Single)],
        'bqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Backtick)],
        'text': [(r'"', String.Double, 'string'),
                 include('variable-or-escape'),
                 (r'[^"%%^%s%s\d)]+|.' % (_nlws, _punct), Text)],
        'variable': [(r'"', String.Double, 'string'),
                     include('variable-or-escape'),
                     (r'[^"%%^%s]+|.' % _nl, Name.Variable)],
        'for': [(r'(%s)(in)(%s)(\()' % (_space, _space),
                 bygroups(using(this, state='text'), Keyword,
                          using(this, state='text'), Punctuation), '#pop'),
                include('follow')],
        'for2': [(r'\)', Punctuation),
                 (r'(%s)(do%s)' % (_space, _token_terminator),
                  bygroups(using(this, state='text'), Keyword), '#pop'),
                 (r'[%s]+' % _nl, Text),
                 include('follow')],
        'for/f': [(r'(")((?:%s|[^"])*?")([%s]*)(\))' % (_variable, _nlws),
                   bygroups(String.Double, using(this, state='string'), Text,
                            Punctuation)),
                  (r'"', String.Double, ('#pop', 'for2', 'string')),
                  (r"('(?:%%%%|%s|[\w\W])*?')([%s]*)(\))" % (_variable, _nlws),
                   bygroups(using(this, state='sqstring'), Text, Punctuation)),
                  (r'(`(?:%%%%|%s|[\w\W])*?`)([%s]*)(\))' % (_variable, _nlws),
                   bygroups(using(this, state='bqstring'), Text, Punctuation)),
                  include('for2')],
        'for/l': [(r'-?\d+', Number.Integer),
                  include('for2')],
        'if': [
            (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' %
             (_token_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Number.Integer), '#pop'),
            (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text'),
                      using(this, state='variable')), '#pop'),
            (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text')), '#pop'),
            (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number),
             bygroups(using(this, state='arithmetic'), Operator.Word,
                      using(this, state='arithmetic')), '#pop'),
            (_stoken, using(this, state='text'), ('#pop', 'if2')),
        ],
        'if2': [(r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken),
                 bygroups(using(this, state='text'), Operator,
                          using(this, state='text')), '#pop'),
                (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken),
                 bygroups(using(this, state='text'), Operator.Word,
                          using(this, state='text')), '#pop')],
        '(?': [(_space, using(this, state='text')),
               (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')),
               default('#pop')],
        'else?': [(_space, using(this, state='text')),
                  (r'else%s' % _token_terminator, Keyword, '#pop'),
                  default('#pop')]
    }
Exemple #28
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words((
                    '@selector', '@private', '@protected', '@public', '@encode',
                    '@synchronized', '@try', '@throw', '@catch', '@finally',
                    '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer',
                    '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong',
                    'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic',
                    'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in',
                    'out', 'inout', 'release', 'class', '@dynamic', '@optional',
                    '@required', '@autoreleasepool'), suffix=r'\b'),
                 Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'),
                 Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                (r'([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname': [
                (r'([a-zA-Z$_][\w$]*)(\s*,\s*)',
                 bygroups(Name.Class, Text), 'oc_forward_classname'),
                (r'([a-zA-Z$_][\w$]*)(\s*;?)',
                 bygroups(Name.Class, Text), '#pop')
            ],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (r'^([-+])(\s*)'                         # method marker
                 r'(\(.*?\))?(\s*)'                      # return type
                 r'([a-zA-Z$_][\w$]*:?)',        # begin of method name
                 bygroups(Punctuation, Text, using(this),
                          Text, Name.Function),
                 'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Exemple #29
0
 def _make_begin_state(compound,
                       _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl,
                       _punct=_punct,
                       _string=_string,
                       _space=_space,
                       _start_label=_start_label,
                       _stoken=_stoken,
                       _token_terminator=_token_terminator,
                       _variable=_variable,
                       _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), 'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation), 'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Exemple #30
0
class CoqLexer(RegexLexer):
    """
    For the `Coq <http://coq.inria.fr/>`_ theorem prover.

    .. versionadded:: 1.5
    """

    name = 'Coq'
    aliases = ['coq']
    filenames = ['*.v']
    mimetypes = ['text/x-coq']

    keywords1 = (
        # Vernacular commands
        'Section',
        'Module',
        'End',
        'Require',
        'Import',
        'Export',
        'Variable',
        'Variables',
        'Parameter',
        'Parameters',
        'Axiom',
        'Hypothesis',
        'Hypotheses',
        'Notation',
        'Local',
        'Tactic',
        'Reserved',
        'Scope',
        'Open',
        'Close',
        'Bind',
        'Delimit',
        'Definition',
        'Let',
        'Ltac',
        'Fixpoint',
        'CoFixpoint',
        'Morphism',
        'Relation',
        'Implicit',
        'Arguments',
        'Set',
        'Unset',
        'Contextual',
        'Strict',
        'Prenex',
        'Implicits',
        'Inductive',
        'CoInductive',
        'Record',
        'Structure',
        'Canonical',
        'Coercion',
        'Theorem',
        'Lemma',
        'Corollary',
        'Proposition',
        'Fact',
        'Remark',
        'Example',
        'Proof',
        'Goal',
        'Save',
        'Qed',
        'Defined',
        'Hint',
        'Resolve',
        'Rewrite',
        'View',
        'Search',
        'Show',
        'Print',
        'Printing',
        'All',
        'Graph',
        'Projections',
        'inside',
        'outside',
        'Check',
    )
    keywords2 = (
        # Gallina
        'forall',
        'exists',
        'exists2',
        'fun',
        'fix',
        'cofix',
        'struct',
        'match',
        'end',
        'in',
        'return',
        'let',
        'if',
        'is',
        'then',
        'else',
        'for',
        'of',
        'nosimpl',
        'with',
        'as',
    )
    keywords3 = (
        # Sorts
        'Type',
        'Prop',
    )
    keywords4 = (
        # Tactics
        'pose',
        'set',
        'move',
        'case',
        'elim',
        'apply',
        'clear',
        'hnf',
        'intro',
        'intros',
        'generalize',
        'rename',
        'pattern',
        'after',
        'destruct',
        'induction',
        'using',
        'refine',
        'inversion',
        'injection',
        'rewrite',
        'congr',
        'unlock',
        'compute',
        'ring',
        'field',
        'replace',
        'fold',
        'unfold',
        'change',
        'cutrewrite',
        'simpl',
        'have',
        'suff',
        'wlog',
        'suffices',
        'without',
        'loss',
        'nat_norm',
        'assert',
        'cut',
        'trivial',
        'revert',
        'bool_congr',
        'nat_congr',
        'symmetry',
        'transitivity',
        'auto',
        'split',
        'left',
        'right',
        'autorewrite',
        'tauto',
    )
    keywords5 = (
        # Terminators
        'by',
        'done',
        'exact',
        'reflexivity',
        'tauto',
        'romega',
        'omega',
        'assumption',
        'solve',
        'contradiction',
        'discriminate',
    )
    keywords6 = (
        # Control
        'do',
        'last',
        'first',
        'try',
        'idtac',
        'repeat',
    )
    # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
    # 'downto', 'else', 'end', 'exception', 'external', 'false',
    # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
    # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
    # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
    # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
    # 'type', 'val', 'virtual', 'when', 'while', 'with'
    keyopts = (
        '!=',
        '#',
        '&',
        '&&',
        r'\(',
        r'\)',
        r'\*',
        r'\+',
        ',',
        '-',
        r'-\.',
        '->',
        r'\.',
        r'\.\.',
        ':',
        '::',
        ':=',
        ':>',
        ';',
        ';;',
        '<',
        '<-',
        '<->',
        '=',
        '>',
        '>]',
        r'>\}',
        r'\?',
        r'\?\?',
        r'\[',
        r'\[<',
        r'\[>',
        r'\[\|',
        ']',
        '_',
        '`',
        r'\{',
        r'\{<',
        r'\|',
        r'\|]',
        r'\}',
        '~',
        '=>',
        r'/\\',
        r'\\/',
        u'Π',
        u'λ',
    )
    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list',
                  'array')

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\(\*', Comment, 'comment'),
            (words(keywords1, prefix=r'\b', suffix=r'\b'), Keyword.Namespace),
            (words(keywords2, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(keywords3, prefix=r'\b', suffix=r'\b'), Keyword.Type),
            (words(keywords4, prefix=r'\b', suffix=r'\b'), Keyword),
            (words(keywords5, prefix=r'\b', suffix=r'\b'), Keyword.Pseudo),
            (words(keywords6, prefix=r'\b', suffix=r'\b'), Keyword.Reserved),
            (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name.Class),
            (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
            (r"[^\W\d][\w']*", Name),
            (r'\d[\d_]*', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
            (r'0[oO][0-7][0-7_]*', Number.Oct),
            (r'0[bB][01][01_]*', Number.Bin),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element
            (r'"', String.Double, 'string'),
            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'comment': [
            (r'[^(*)]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            (r'[(*)]', Comment),
        ],
        'string': [
            (r'[^"]+', String.Double),
            (r'""', String.Double),
            (r'"', String.Double, '#pop'),
        ],
        'dotted': [(r'\s+', Text), (r'\.', Punctuation),
                   (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
                   (r'[A-Z][\w\']*', Name.Class, '#pop'),
                   (r'[a-z][a-z0-9_\']*', Name, '#pop'),
                   default('#pop')],
    }

    def analyse_text(text):
        if text.startswith('(*'):
            return True
Exemple #31
0
class RubyLexer(ExtendedRegexLexer):
    """
    For `Ruby <http://www.ruby-lang.org>`_ source code.
    """

    name = 'Ruby'
    aliases = ['rb', 'ruby', 'duby']
    filenames = [
        '*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby',
        'Gemfile'
    ]
    mimetypes = ['text/x-ruby', 'application/x-ruby']

    flags = re.DOTALL | re.MULTILINE

    def heredoc_callback(self, match, ctx):
        # okay, this is the hardest part of parsing Ruby...
        # match: 1 = <<[-~]?, 2 = quote? 3 = name 4 = quote? 5 = rest of line

        start = match.start(1)
        yield start, Operator, match.group(1)  # <<[-~]?
        yield match.start(2), String.Heredoc, match.group(2)  # quote ", ', `
        yield match.start(3), String.Delimiter, match.group(3)  # heredoc name
        yield match.start(4), String.Heredoc, match.group(4)  # quote again

        heredocstack = ctx.__dict__.setdefault('heredocstack', [])
        outermost = not bool(heredocstack)
        heredocstack.append((match.group(1) in ('<<-', '<<~'), match.group(3)))

        ctx.pos = match.start(5)
        ctx.end = match.end(5)
        # this may find other heredocs
        for i, t, v in self.get_tokens_unprocessed(context=ctx):
            yield i, t, v
        ctx.pos = match.end()

        if outermost:
            # this is the outer heredoc again, now we can process them all
            for tolerant, hdname in heredocstack:
                lines = []
                for match in line_re.finditer(ctx.text, ctx.pos):
                    if tolerant:
                        check = match.group().strip()
                    else:
                        check = match.group().rstrip()
                    if check == hdname:
                        for amatch in lines:
                            yield amatch.start(), String.Heredoc, amatch.group(
                            )
                        yield match.start(), String.Delimiter, match.group()
                        ctx.pos = match.end()
                        break
                    else:
                        lines.append(match)
                else:
                    # end of heredoc not found -- error!
                    for amatch in lines:
                        yield amatch.start(), Error, amatch.group()
            ctx.end = len(ctx.text)
            del heredocstack[:]

    def gen_rubystrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Regex, match.group(
                4)  # end[mixounse]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            # easy ones
            (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
            (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r':"', String.Symbol, 'simple-sym'),
            (r'([a-zA-Z_]\w*)(:)(?!:)',
             bygroups(String.Symbol, Punctuation)),  # Since Ruby 1.9
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-' + name] = [
                include('string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in \
                ('\\{', '\\}', '{}', 'cb'), \
                ('\\[', '\\]', '\\[\\]', 'sb'), \
                ('\\(', '\\)', '()', 'pa'), \
                ('<', '>', '<>', 'ab'):
            states[name + '-intp-string'] = [
                (r'\\[\\' + bracecc + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            states['strings'].append(
                (r'%[QWx]?' + lbrace, String.Other, name + '-intp-string'))
            states[name + '-string'] = [
                (r'\\[\\' + bracecc + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            states['strings'].append(
                (r'%[qsw]' + lbrace, String.Other, name + '-string'))
            states[name + '-regex'] = [
                (r'\\[\\' + bracecc + ']', String.Regex),
                (lbrace, String.Regex, '#push'),
                (rbrace + '[mixounse]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + bracecc + ']', String.Regex),
                (r'[^\\#' + bracecc + ']+', String.Regex),
            ]
            states['strings'].append(
                (r'%r' + lbrace, String.Regex, name + '-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
             intp_regex_callback),
            # regular fancy strings with qsw
            (r'%[qsw]([\W_])((?:\\\1|(?!\1).)*)\1', String.Other),
            (r'(%[QWx]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
        ]

        return states

    tokens = {
        'root': [
            (r'\A#!.+?$', Comment.Hashbang),
            (r'#.*?$', Comment.Single),
            (r'=begin\s.*?\n=end.*?$', Comment.Multiline),
            # keywords
            (words(
                ('BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'defined?',
                 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if', 'in',
                 'next', 'redo', 'rescue', 'raise', 'retry', 'return', 'super',
                 'then', 'undef', 'unless', 'until', 'when', 'while', 'yield'),
                suffix=r'\b'), Keyword),
            # start of function, class and module names
            (r'(module)(\s+)([a-zA-Z_]\w*'
             r'(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)),
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            # special methods
            (words(('initialize', 'new', 'loop', 'include', 'extend', 'raise',
                    'attr_reader', 'attr_writer', 'attr_accessor', 'attr',
                    'catch', 'throw', 'private', 'module_function', 'public',
                    'protected', 'true', 'false', 'nil'),
                   suffix=r'\b'), Keyword.Pseudo),
            (r'(not|and|or)\b', Operator.Word),
            (words(('autoload', 'block_given', 'const_defined', 'eql', 'equal',
                    'frozen', 'include', 'instance_of', 'is_a', 'iterator',
                    'kind_of', 'method_defined', 'nil',
                    'private_method_defined', 'protected_method_defined',
                    'public_method_defined', 'respond_to', 'tainted'),
                   suffix=r'\?'), Name.Builtin),
            (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
            (words(
                ('Array', 'Float', 'Integer', 'String', '__id__', '__send__',
                 'abort', 'ancestors', 'at_exit', 'autoload', 'binding',
                 'callcc', 'caller', 'catch', 'chomp', 'chop', 'class_eval',
                 'class_variables', 'clone', 'const_defined?', 'const_get',
                 'const_missing', 'const_set', 'constants', 'display', 'dup',
                 'eval', 'exec', 'exit', 'extend', 'fail', 'fork', 'format',
                 'freeze', 'getc', 'gets', 'global_variables', 'gsub', 'hash',
                 'id', 'included_modules', 'inspect', 'instance_eval',
                 'instance_method', 'instance_methods',
                 'instance_variable_get', 'instance_variable_set',
                 'instance_variables', 'lambda', 'load', 'local_variables',
                 'loop', 'method', 'method_missing', 'methods', 'module_eval',
                 'name', 'object_id', 'open', 'p', 'print', 'printf',
                 'private_class_method', 'private_instance_methods',
                 'private_methods', 'proc', 'protected_instance_methods',
                 'protected_methods', 'public_class_method',
                 'public_instance_methods', 'public_methods', 'putc', 'puts',
                 'raise', 'rand', 'readline', 'readlines', 'require', 'scan',
                 'select', 'self', 'send', 'set_trace_func',
                 'singleton_methods', 'sleep', 'split', 'sprintf', 'srand',
                 'sub', 'syscall', 'system', 'taint', 'test', 'throw', 'to_a',
                 'to_s', 'trace_var', 'trap', 'untaint', 'untrace_var',
                 'warn'),
                prefix=r'(?<!\.)',
                suffix=r'\b'), Name.Builtin),
            (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
            # normal heredocs
            (r'(?<!\w)(<<[-~]?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
             heredoc_callback),
            # empty string heredocs
            (r'(<<[-~]?)("|\')()(\2)(.*?\n)', heredoc_callback),
            (r'__END__', Comment.Preproc, 'end-part'),
            # multiline regex (after keywords or files)
            (r'(?:^|(?<=[=<>~!:])|'
             r'(?<=(?:\s|;)when\s)|'
             r'(?<=(?:\s|;)or\s)|'
             r'(?<=(?:\s|;)and\s)|'
             r'(?<=\.index\s)|'
             r'(?<=\.scan\s)|'
             r'(?<=\.sub\s)|'
             r'(?<=\.sub!\s)|'
             r'(?<=\.gsub\s)|'
             r'(?<=\.gsub!\s)|'
             r'(?<=\.match\s)|'
             r'(?<=(?:\s|;)if\s)|'
             r'(?<=(?:\s|;)elsif\s)|'
             r'(?<=^when\s)|'
             r'(?<=^index\s)|'
             r'(?<=^scan\s)|'
             r'(?<=^sub\s)|'
             r'(?<=^gsub\s)|'
             r'(?<=^sub!\s)|'
             r'(?<=^gsub!\s)|'
             r'(?<=^match\s)|'
             r'(?<=^if\s)|'
             r'(?<=^elsif\s)'
             r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
            # multiline regex (in method calls or subscripts)
            (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
            # multiline regex (this time the funny no whitespace rule)
            (r'(\s+)(/)(?![\s=])', bygroups(Text,
                                            String.Regex), 'multiline-regex'),
            # lex numbers and ignore following regular expressions which
            # are division operators in fact (grrrr. i hate that. any
            # better ideas?)
            # since pygments 0.7 we also eat a "?" operator after numbers
            # so that the char operator does not work. Chars are not allowed
            # there so that you can use the ternary operator.
            # stupid example:
            #   x>=0?n[x]:""
            (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
             bygroups(Number.Oct, Text, Operator)),
            (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
             bygroups(Number.Hex, Text, Operator)),
            (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
             bygroups(Number.Bin, Text, Operator)),
            (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
             bygroups(Number.Integer, Text, Operator)),
            # Names
            (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
            (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
            (r'\$\w+', Name.Variable.Global),
            (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
            (r'\$-[0adFiIlpvw]', Name.Variable.Global),
            (r'::', Operator),
            include('strings'),
            # chars
            (
                r'\?(\\[MC]-)*'  # modifiers
                r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
                r'(?!\w)',
                String.Char),
            (r'[A-Z]\w+', Name.Constant),
            # this is needed because ruby attributes can look
            # like keywords (class) or like this: ` ?!?
            (words(RUBY_OPERATORS,
                   prefix=r'(\.|::)'), bygroups(Operator, Name.Operator)),
            (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
             bygroups(Operator, Name)),
            (r'[a-zA-Z_]\w*[!?]?', Name),
            (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
             r'!~|&&?|\|\||\.{1,3})', Operator),
            (r'[-+/*%=<>&!^|~]=?', Operator),
            (r'[(){};,/?:\\]', Punctuation),
            (r'\s+', Text)
        ],
        'funcname': [(r'\(', Punctuation, 'defexpr'),
                     (r'(?:([a-zA-Z_]\w*)(\.))?'
                      r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
                      r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
                      bygroups(Name.Class, Operator, Name.Function), '#pop'),
                     default('#pop')],
        'classname': [(r'\(', Punctuation, 'defexpr'),
                      (r'<<', Operator, '#pop'),
                      (r'[A-Z_]\w*', Name.Class, '#pop'),
                      default('#pop')],
        'defexpr': [(r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
                    (r'\(', Operator, '#push'),
                    include('root')],
        'in-intp': [
            (r'\{', String.Interpol, '#push'),
            (r'\}', String.Interpol, '#pop'),
            include('root'),
        ],
        'string-intp': [(r'#\{', String.Interpol, 'in-intp'),
                        (r'#@@?[a-zA-Z_]\w*', String.Interpol),
                        (r'#\$[a-zA-Z_]\w*', String.Interpol)],
        'string-intp-escaped': [
            include('string-intp'),
            (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})',
             String.Escape)
        ],
        'interpolated-regex': [
            include('string-intp'),
            (r'[\\#]', String.Regex),
            (r'[^\\#]+', String.Regex),
        ],
        'interpolated-string': [
            include('string-intp'),
            (r'[\\#]', String.Other),
            (r'[^\\#]+', String.Other),
        ],
        'multiline-regex': [
            include('string-intp'),
            (r'\\\\', String.Regex),
            (r'\\/', String.Regex),
            (r'[\\#]', String.Regex),
            (r'[^\\/#]+', String.Regex),
            (r'/[mixounse]*', String.Regex, '#pop'),
        ],
        'end-part': [(r'.+', Comment.Preproc, '#pop')]
    }
    tokens.update(gen_rubystrings_rules())

    def analyse_text(text):
        return shebang_matches(text, r'ruby(1\.\d)?')
Exemple #32
0
 def _make_begin_state(compound, _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl, _punct=_punct, _string=_string,
                       _space=_space, _start_label=_start_label,
                       _stoken=_stoken, _token_terminator=_token_terminator,
                       _variable=_variable, _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix),
         (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'),
         (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'), Keyword),
          'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation),
          'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Exemple #33
0
class Python3Lexer(RegexLexer):
    """
    For `Python <http://www.python.org>`_ source code (version 3.0).

    .. versionadded:: 0.10
    """

    name = 'Python 3'
    aliases = ['python3', 'py3']
    filenames = []  # Nothing until Python 3 gets widespread
    mimetypes = ['text/x-python3', 'application/x-python3']

    flags = re.MULTILINE | re.UNICODE

    uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)

    def innerstring_rules(ttype):
        return [
            # the old style '%s' % (...) string formatting (still valid in Py3)
            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
             '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
            # the new style '{}'.format(...) string formatting
            (
                r'\{'
                '((\w+)((\.\w+)|(\[[^\]]+\]))*)?'  # field name
                '(\![sra])?'  # conversion
                '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
                '\}',
                String.Interpol),

            # backslashes, quotes and formatting signs must be parsed one at a time
            (r'[^\\\'"%{\n]+', ttype),
            (r'[\'"\\]', ttype),
            # unhandled string formatting sign
            (r'%|(\{{1,2})', ttype)
            # newlines are an error (use "nl" state)
        ]

    tokens = PythonLexer.tokens.copy()
    tokens['keywords'] = [
        (words(('assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
                'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
                'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
                'yield from', 'as', 'with'),
               suffix=r'\b'), Keyword),
        (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
    ]
    tokens['builtins'] = [
        (words(('__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray',
                'bytes', 'chr', 'classmethod', 'cmp', 'compile', 'complex',
                'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
                'filter', 'float', 'format', 'frozenset', 'getattr', 'globals',
                'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance',
                'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max',
                'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord',
                'pow', 'print', 'property', 'range', 'repr', 'reversed',
                'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
                'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip'),
               prefix=r'(?<!\.)',
               suffix=r'\b'), Name.Builtin),
        (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
        (
            words(
                (
                    'ArithmeticError',
                    'AssertionError',
                    'AttributeError',
                    'BaseException',
                    'BufferError',
                    'BytesWarning',
                    'DeprecationWarning',
                    'EOFError',
                    'EnvironmentError',
                    'Exception',
                    'FloatingPointError',
                    'FutureWarning',
                    'GeneratorExit',
                    'IOError',
                    'ImportError',
                    'ImportWarning',
                    'IndentationError',
                    'IndexError',
                    'KeyError',
                    'KeyboardInterrupt',
                    'LookupError',
                    'MemoryError',
                    'NameError',
                    'NotImplementedError',
                    'OSError',
                    'OverflowError',
                    'PendingDeprecationWarning',
                    'ReferenceError',
                    'ResourceWarning',
                    'RuntimeError',
                    'RuntimeWarning',
                    'StopIteration',
                    'SyntaxError',
                    'SyntaxWarning',
                    'SystemError',
                    'SystemExit',
                    'TabError',
                    'TypeError',
                    'UnboundLocalError',
                    'UnicodeDecodeError',
                    'UnicodeEncodeError',
                    'UnicodeError',
                    'UnicodeTranslateError',
                    'UnicodeWarning',
                    'UserWarning',
                    'ValueError',
                    'VMSError',
                    'Warning',
                    'WindowsError',
                    'ZeroDivisionError',
                    # new builtin exceptions from PEP 3151
                    'BlockingIOError',
                    'ChildProcessError',
                    'ConnectionError',
                    'BrokenPipeError',
                    'ConnectionAbortedError',
                    'ConnectionRefusedError',
                    'ConnectionResetError',
                    'FileExistsError',
                    'FileNotFoundError',
                    'InterruptedError',
                    'IsADirectoryError',
                    'NotADirectoryError',
                    'PermissionError',
                    'ProcessLookupError',
                    'TimeoutError'),
                prefix=r'(?<!\.)',
                suffix=r'\b'),
            Name.Exception),
    ]
    tokens['magicfuncs'] = [
        (words(
            ('__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
             '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
             '__call__', '__complex__', '__contains__', '__del__',
             '__delattr__', '__delete__', '__delitem__', '__dir__',
             '__divmod__', '__enter__', '__eq__', '__exit__', '__float__',
             '__floordiv__', '__format__', '__ge__', '__get__', '__getattr__',
             '__getattribute__', '__getitem__', '__gt__', '__hash__',
             '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__',
             '__imatmul__', '__imod__', '__import__', '__imul__', '__index__',
             '__init__', '__instancecheck__', '__int__', '__invert__',
             '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__',
             '__itruediv__', '__ixor__', '__le__', '__len__',
             '__length_hint__', '__lshift__', '__lt__', '__matmul__',
             '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
             '__new__', '__next__', '__or__', '__pos__', '__pow__',
             '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
             '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
             '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
             '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
             '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
             '__sub__', '__subclasscheck__', '__truediv__', '__xor__'),
            suffix=r'\b'), Name.Function.Magic),
    ]
    tokens['magicvars'] = [
        (words(('__annotations__', '__bases__', '__class__', '__closure__',
                '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
                '__func__', '__globals__', '__kwdefaults__', '__module__',
                '__mro__', '__name__', '__objclass__', '__qualname__',
                '__self__', '__slots__', '__weakref__'),
               suffix=r'\b'), Name.Variable.Magic),
    ]
    tokens['numbers'] = [
        (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
        (r'\d+[eE][+-]?[0-9]+j?', Number.Float), (r'0[oO][0-7]+', Number.Oct),
        (r'0[bB][01]+', Number.Bin), (r'0[xX][a-fA-F0-9]+', Number.Hex),
        (r'\d+', Number.Integer)
    ]
    tokens['backtick'] = []
    tokens['name'] = [
        (r'@\w+', Name.Decorator),
        (r'@', Operator),  # new matrix multiplication operator
        (uni_name, Name),
    ]
    tokens['funcname'] = [(uni_name, Name.Function, '#pop')]
    tokens['classname'] = [(uni_name, Name.Class, '#pop')]
    tokens['import'] = [
        (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
        (r'\.', Name.Namespace),
        (uni_name, Name.Namespace),
        (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
        default('#pop')  # all else: go back
    ]
    tokens['fromimport'] = [
        (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
        (r'\.', Name.Namespace),
        (uni_name, Name.Namespace),
        default('#pop'),
    ]
    tokens['strings-single'] = innerstring_rules(String.Single)
    tokens['strings-double'] = innerstring_rules(String.Double)

    def analyse_text(text):
        return shebang_matches(text, r'pythonw?3(\.\d)?')
Exemple #34
0
    'string-single': [
        (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Single),
        (r'#\{', String.Interpol, 'interpolation'),
        (r"'", String.Single, '#pop'),
    ],

    'string-url': [
        (r'(\\#|#(?=[^\n{])|[^\n#)])+', String.Other),
        (r'#\{', String.Interpol, 'interpolation'),
        (r'\)', String.Other, '#pop'),
    ],

    'pseudo-class': [
        (r'[\w-]+', Name.Decorator),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'class': [
        (r'[\w-]+', Name.Class),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'id': [
        (r'[\w-]+', Name.Namespace),
        (r'#\{', String.Interpol, 'interpolation'),
        default('#pop'),
    ],

    'for': [
Exemple #35
0
    "string-double": [
        (r'(\\.|#(?=[^\n{])|[^\n"#])+', String.Double),
        (r"#\{", String.Interpol, "interpolation"),
        (r'"', String.Double, "#pop"),
    ],
    "string-single": [
        (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Double),
        (r"#\{", String.Interpol, "interpolation"),
        (r"'", String.Double, "#pop"),
    ],
    "string-url": [
        (r"(\\#|#(?=[^\n{])|[^\n#)])+", String.Other),
        (r"#\{", String.Interpol, "interpolation"),
        (r"\)", String.Other, "#pop"),
    ],
    "pseudo-class": [(r"[\w-]+", Name.Decorator), (r"#\{", String.Interpol, "interpolation"), default("#pop")],
    "class": [(r"[\w-]+", Name.Class), (r"#\{", String.Interpol, "interpolation"), default("#pop")],
    "id": [(r"[\w-]+", Name.Namespace), (r"#\{", String.Interpol, "interpolation"), default("#pop")],
    "for": [(r"(from|to|through)", Operator.Word), include("value")],
}


def _indentation(lexer, match, ctx):
    indentation = match.group(0)
    yield match.start(), Text, indentation
    ctx.last_indentation = indentation
    ctx.pos = match.end()

    if (
        hasattr(ctx, "block_state")
        and ctx.block_state