def do_indent(level): # Print paragraphs of indentation level >= {level} as String.Doc, # ignoring blank lines. Then return to 'root' state. return [ (_rx_indent(level), String.Doc), (r'\s*\n', Text), default('#pop:2') ]
def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', 'cb'), (r'\[', r'\]', 'sb'), (r'\(', r'\)', 'pa'), (r'<', r'>', 'ab'), (r'/', r'/', 'slas'), (r'\|', r'\|', 'pipe'), ('"', '"', 'quot'), ("'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, token, interpol=False) return states
class Perl6Lexer(ExtendedRegexLexer): """ For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code. .. versionadded:: 2.0 """ name = 'Perl6' aliases = ['perl6', 'pl6', 'raku'] filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', '*.rakutest', '*.rakudoc'] mimetypes = ['text/x-perl6', 'application/x-perl6'] flags = re.MULTILINE | re.DOTALL | re.UNICODE PERL6_IDENTIFIER_RANGE = r"['\w:-]" PERL6_KEYWORDS = ( #Phasers 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST', 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO', #Keywords 'anon','augment','but','class','constant','default','does','else', 'elsif','enum','for','gather','given','grammar','has','if','import', 'is','let','loop','made','make','method','module','multi','my','need', 'orwith','our','proceed','proto','repeat','require','return', 'return-rw','returns','role','rule','state','sub','submethod','subset', 'succeed','supersede','token','try','unit','unless','until','use', 'when','while','with','without', #Traits 'export','native','repr','required','rw','symbol', ) PERL6_BUILTINS = ( 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos', 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action', 'actions','add','add_attribute','add_enum_value','add_fallback', 'add_method','add_parent','add_private_method','add_role','add_trustee', 'adverb','after','all','allocate','allof','allowed','alternative-names', 'annotations','antipair','antipairs','any','anyof','app_lifetime', 'append','arch','archname','args','arity','Array','asec','asech','asin', 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2', 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch', 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc', 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth', 'await','backtrace','Bag','BagHash','bail-out','base','basename', 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr', 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool', 'bool-only','bounds','break','Bridge','broken','BUILD','build-date', 'bytes','cache','callframe','calling-package','CALL-ME','callsame', 'callwith','can','cancel','candidates','cando','can-ok','canonpath', 'caps','caption','Capture','cas','catdir','categorize','categorize-list', 'catfile','catpath','cause','ceiling','cglobal','changed','Channel', 'chars','chdir','child','child-name','child-typename','chmod','chomp', 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup', 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate', 'column','comb','combinations','command','comment','compiler','Complex', 'compose','compose_type','composer','condition','config', 'configure_destroy','configure_type_checking','conj','connect', 'constraints','construct','contains','contents','copy','cos','cosec', 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores', 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d', 'Date','DateTime','day','daycount','day-of-month','day-of-week', 'day-of-year','days-in-month','declaration','decode','decoder','deepmap', 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS', 'denominator','desc','DESTROY','destroyers','devnull','diag', 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames', 'do','does','does-ok','done','done-testing','duckmap','dynamic','e', 'eager','earlier','elems','emit','enclosing','encode','encoder', 'encoding','end','ends-with','enum_from_value','enum_value_list', 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE', 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY', 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage', 'expmod','extension','f','fail','fails-like','fc','feature','file', 'filename','find_method','find_method_qualified','finish','first','flat', 'flatmap','flip','floor','flunk','flush','fmt','format','formatter', 'freeze','from','from-list','from-loop','from-posix','full', 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs', 'grep','handle','handled','handles','hardware','has_accessor','Hash', 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id', 'illegal','im','in','indent','index','indices','indir','infinite', 'infix','infix:<+>','infix:<->','install_method_cache','Instant', 'instead','Int','int-bounds','interval','in-timezone','invalid-str', 'invert','invocant','IO','IO::Notification.watch-path','is_trusted', 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply', 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year', 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting', 'is-win','item','iterator','join','keep','kept','KERNELnames','key', 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later', 'lazy','lc','leading','level','like','line','lines','link','List', 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb', 'made','MAIN','make','Map','match','max','maxpairs','merge','message', 'method','method_table','methods','migrate','min','minmax','minpairs', 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month', 'move','mro','msb','multi','multiness','my','name','named','named_names', 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type', 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle', 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out', 'nodemap','nok','none','norm','not','note','now','nude','Num', 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes', 'ok','old','on-close','one','on-switch','open','opened','operation', 'optional','ord','ords','orig','os-error','osname','out-buffer','pack', 'package','package-kind','package-name','packages','pair','pairs', 'pairup','parameter','params','parent','parent-name','parents','parse', 'parse-base','parsefile','parse-names','parts','pass','path','path-sep', 'payload','peer-host','peer-port','periods','perl','permutations','phaser', 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll', 'polymod','pop','pos','positional','posix','postfix','postmatch', 'precomp-ext','precomp-target','pred','prefix','prematch','prepend', 'print','printf','print-nl','print-to','private','private_method_table', 'proc','produce','Promise','prompt','protect','pull-one','push', 'push-all','push-at-least','push-exactly','push-until-lazy','put', 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw', 're','read','readchars','readonly','ready','Real','reallocate','reals', 'reason','rebless','receive','recv','redispatcher','redo','reduce', 'rel2abs','relative','release','rename','repeated','replacement', 'report','reserved','resolve','restore','result','resume','rethrow', 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish', 'roll','rootdir','roots','rotate','rotor','round','roundrobin', 'routine-type','run','rwx','s','samecase','samemark','samewith','say', 'schedule-on','scheduler','scope','sec','sech','second','seek','self', 'send','Set','set_hidden','set_name','set_package','set_rw','set_value', 'SetHash','set-instruments','setup_finalization','shape','share','shell', 'shift','sibling','sigil','sign','signal','signals','signature','sin', 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one', 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp', 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port', 'sort','source','source-package','spawn','SPEC','splice','split', 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable', 'start','started','starts-with','status','stderr','stdout','Str', 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst', 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum', 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap', 'target','target-name','tc','tclc','tell','then','throttle','throw', 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix', 'total','trailing','trans','tree','trim','trim-leading','trim-trailing', 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type', 'type_captures','typename','uc','udp','uncaught_handler','unimatch', 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival', 'univals','unlike','unlink','unlock','unpack','unpolar','unshift', 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR', 'variable','verbose-config','version','VMnames','volume','vow','w','wait', 'warn','watch','watch-path','week','weekday-of-month','week-number', 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO', 'whole-second','WHY','wordcase','words','workaround','wrap','write', 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest', ) PERL6_BUILTIN_CLASSES = ( #Booleans 'False','True', #Classes 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace', 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf', 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code', 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler', 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding', 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant', 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles', 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path', 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32', 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec', 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32', 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List', 'Lock','Lock::Async','long','longlong','Macro','Map','Match', 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW', 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer', 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance', 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer', 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash', 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64', 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block', 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator', 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading', 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc', 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat', 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler', 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip', 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier', 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry', 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage', 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler', 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable', 'Version','VM','Whatever','WhateverCode','WrapHandle' ) PERL6_OPERATORS = ( 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', 'not', '<==', '==>', '<<==', '==>>','unicmp', ) # Perl 6 has a *lot* of possible bracketing characters # this list was lifted from STD.pm6 (https://github.com/perl6/std) PERL6_BRACKETS = { u'\u0028': u'\u0029', u'\u003c': u'\u003e', u'\u005b': u'\u005d', u'\u007b': u'\u007d', u'\u00ab': u'\u00bb', u'\u0f3a': u'\u0f3b', u'\u0f3c': u'\u0f3d', u'\u169b': u'\u169c', u'\u2018': u'\u2019', u'\u201a': u'\u2019', u'\u201b': u'\u2019', u'\u201c': u'\u201d', u'\u201e': u'\u201d', u'\u201f': u'\u201d', u'\u2039': u'\u203a', u'\u2045': u'\u2046', u'\u207d': u'\u207e', u'\u208d': u'\u208e', u'\u2208': u'\u220b', u'\u2209': u'\u220c', u'\u220a': u'\u220d', u'\u2215': u'\u29f5', u'\u223c': u'\u223d', u'\u2243': u'\u22cd', u'\u2252': u'\u2253', u'\u2254': u'\u2255', u'\u2264': u'\u2265', u'\u2266': u'\u2267', u'\u2268': u'\u2269', u'\u226a': u'\u226b', u'\u226e': u'\u226f', u'\u2270': u'\u2271', u'\u2272': u'\u2273', u'\u2274': u'\u2275', u'\u2276': u'\u2277', u'\u2278': u'\u2279', u'\u227a': u'\u227b', u'\u227c': u'\u227d', u'\u227e': u'\u227f', u'\u2280': u'\u2281', u'\u2282': u'\u2283', u'\u2284': u'\u2285', u'\u2286': u'\u2287', u'\u2288': u'\u2289', u'\u228a': u'\u228b', u'\u228f': u'\u2290', u'\u2291': u'\u2292', u'\u2298': u'\u29b8', u'\u22a2': u'\u22a3', u'\u22a6': u'\u2ade', u'\u22a8': u'\u2ae4', u'\u22a9': u'\u2ae3', u'\u22ab': u'\u2ae5', u'\u22b0': u'\u22b1', u'\u22b2': u'\u22b3', u'\u22b4': u'\u22b5', u'\u22b6': u'\u22b7', u'\u22c9': u'\u22ca', u'\u22cb': u'\u22cc', u'\u22d0': u'\u22d1', u'\u22d6': u'\u22d7', u'\u22d8': u'\u22d9', u'\u22da': u'\u22db', u'\u22dc': u'\u22dd', u'\u22de': u'\u22df', u'\u22e0': u'\u22e1', u'\u22e2': u'\u22e3', u'\u22e4': u'\u22e5', u'\u22e6': u'\u22e7', u'\u22e8': u'\u22e9', u'\u22ea': u'\u22eb', u'\u22ec': u'\u22ed', u'\u22f0': u'\u22f1', u'\u22f2': u'\u22fa', u'\u22f3': u'\u22fb', u'\u22f4': u'\u22fc', u'\u22f6': u'\u22fd', u'\u22f7': u'\u22fe', u'\u2308': u'\u2309', u'\u230a': u'\u230b', u'\u2329': u'\u232a', u'\u23b4': u'\u23b5', u'\u2768': u'\u2769', u'\u276a': u'\u276b', u'\u276c': u'\u276d', u'\u276e': u'\u276f', u'\u2770': u'\u2771', u'\u2772': u'\u2773', u'\u2774': u'\u2775', u'\u27c3': u'\u27c4', u'\u27c5': u'\u27c6', u'\u27d5': u'\u27d6', u'\u27dd': u'\u27de', u'\u27e2': u'\u27e3', u'\u27e4': u'\u27e5', u'\u27e6': u'\u27e7', u'\u27e8': u'\u27e9', u'\u27ea': u'\u27eb', u'\u2983': u'\u2984', u'\u2985': u'\u2986', u'\u2987': u'\u2988', u'\u2989': u'\u298a', u'\u298b': u'\u298c', u'\u298d': u'\u298e', u'\u298f': u'\u2990', u'\u2991': u'\u2992', u'\u2993': u'\u2994', u'\u2995': u'\u2996', u'\u2997': u'\u2998', u'\u29c0': u'\u29c1', u'\u29c4': u'\u29c5', u'\u29cf': u'\u29d0', u'\u29d1': u'\u29d2', u'\u29d4': u'\u29d5', u'\u29d8': u'\u29d9', u'\u29da': u'\u29db', u'\u29f8': u'\u29f9', u'\u29fc': u'\u29fd', u'\u2a2b': u'\u2a2c', u'\u2a2d': u'\u2a2e', u'\u2a34': u'\u2a35', u'\u2a3c': u'\u2a3d', u'\u2a64': u'\u2a65', u'\u2a79': u'\u2a7a', u'\u2a7d': u'\u2a7e', u'\u2a7f': u'\u2a80', u'\u2a81': u'\u2a82', u'\u2a83': u'\u2a84', u'\u2a8b': u'\u2a8c', u'\u2a91': u'\u2a92', u'\u2a93': u'\u2a94', u'\u2a95': u'\u2a96', u'\u2a97': u'\u2a98', u'\u2a99': u'\u2a9a', u'\u2a9b': u'\u2a9c', u'\u2aa1': u'\u2aa2', u'\u2aa6': u'\u2aa7', u'\u2aa8': u'\u2aa9', u'\u2aaa': u'\u2aab', u'\u2aac': u'\u2aad', u'\u2aaf': u'\u2ab0', u'\u2ab3': u'\u2ab4', u'\u2abb': u'\u2abc', u'\u2abd': u'\u2abe', u'\u2abf': u'\u2ac0', u'\u2ac1': u'\u2ac2', u'\u2ac3': u'\u2ac4', u'\u2ac5': u'\u2ac6', u'\u2acd': u'\u2ace', u'\u2acf': u'\u2ad0', u'\u2ad1': u'\u2ad2', u'\u2ad3': u'\u2ad4', u'\u2ad5': u'\u2ad6', u'\u2aec': u'\u2aed', u'\u2af7': u'\u2af8', u'\u2af9': u'\u2afa', u'\u2e02': u'\u2e03', u'\u2e04': u'\u2e05', u'\u2e09': u'\u2e0a', u'\u2e0c': u'\u2e0d', u'\u2e1c': u'\u2e1d', u'\u2e20': u'\u2e21', u'\u3008': u'\u3009', u'\u300a': u'\u300b', u'\u300c': u'\u300d', u'\u300e': u'\u300f', u'\u3010': u'\u3011', u'\u3014': u'\u3015', u'\u3016': u'\u3017', u'\u3018': u'\u3019', u'\u301a': u'\u301b', u'\u301d': u'\u301e', u'\ufd3e': u'\ufd3f', u'\ufe17': u'\ufe18', u'\ufe35': u'\ufe36', u'\ufe37': u'\ufe38', u'\ufe39': u'\ufe3a', u'\ufe3b': u'\ufe3c', u'\ufe3d': u'\ufe3e', u'\ufe3f': u'\ufe40', u'\ufe41': u'\ufe42', u'\ufe43': u'\ufe44', u'\ufe47': u'\ufe48', u'\ufe59': u'\ufe5a', u'\ufe5b': u'\ufe5c', u'\ufe5d': u'\ufe5e', u'\uff08': u'\uff09', u'\uff1c': u'\uff1e', u'\uff3b': u'\uff3d', u'\uff5b': u'\uff5d', u'\uff5f': u'\uff60', u'\uff62': u'\uff63', } def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''): if boundary_regex_fragment is None: return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \ suffix + r')\b' else: return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \ boundary_regex_fragment + r')' def brackets_callback(token_class): def callback(lexer, match, context): groups = match.groupdict() opening_chars = groups['delimiter'] n_chars = len(opening_chars) adverbs = groups.get('adverbs') closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) text = context.text if closer is None: # it's not a mirrored character, which means we # just need to look for the next occurrence end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) else: # we need to look for the corresponding closing character, # keep nesting in mind closing_chars = closer * n_chars nesting_level = 1 search_pos = match.start('delimiter') while nesting_level > 0: next_open_pos = text.find(opening_chars, search_pos + n_chars) next_close_pos = text.find(closing_chars, search_pos + n_chars) if next_close_pos == -1: next_close_pos = len(text) nesting_level = 0 elif next_open_pos != -1 and next_open_pos < next_close_pos: nesting_level += 1 search_pos = next_open_pos else: # next_close_pos < next_open_pos nesting_level -= 1 search_pos = next_close_pos end_pos = next_close_pos if end_pos < 0: # if we didn't find a closer, just highlight the # rest of the text in this class end_pos = len(text) if adverbs is not None and re.search(r':to\b', adverbs): heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos] end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[end_pos:], re.MULTILINE) if end_heredoc: end_pos += end_heredoc.end() else: end_pos = len(text) yield match.start(), token_class, text[match.start():end_pos + n_chars] context.pos = end_pos + n_chars return callback def opening_brace_callback(lexer, match, context): stack = context.stack yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() # if we encounter an opening brace and we're one level # below a token state, it means we need to increment # the nesting level for braces so we know later when # we should return to the token rules. if len(stack) > 2 and stack[-2] == 'token': context.perl6_token_nesting_level += 1 def closing_brace_callback(lexer, match, context): stack = context.stack yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() # if we encounter a free closing brace and we're one level # below a token state, it means we need to check the nesting # level to see if we need to return to the token state. if len(stack) > 2 and stack[-2] == 'token': context.perl6_token_nesting_level -= 1 if context.perl6_token_nesting_level == 0: stack.pop() def embedded_perl6_callback(lexer, match, context): context.perl6_token_nesting_level = 1 yield match.start(), Text, context.text[match.start():match.end()] context.pos = match.end() context.stack.append('root') # If you're modifying these rules, be careful if you need to process '{' or '}' # characters. We have special logic for processing these characters (due to the fact # that you can nest Perl 6 code in regex blocks), so if you need to process one of # them, make sure you also process the corresponding one! tokens = { 'common': [ (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), (r'#[^\n]*$', Comment.Single), (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), (r'^=.*?\n\s*?\n', Comment.Multiline), (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), # deal with a special case in the Perl 6 grammar (role q { ... }) (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'), Name.Builtin), (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), # copied from PerlLexer (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'::\?\w+', Name.Variable.Global), (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'\$(?:<.*?>)+', Name.Variable), (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])' r'(?P=first_char)*)', brackets_callback(String)), # copied from PerlLexer (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), (r'0b[01]+(_[01]+)*', Number.Bin), (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), (r'\d+(_\d+)*', Number.Integer), (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'm\w+(?=\()', Name), (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])' r'(?P=first_char)*)', brackets_callback(String.Regex)), (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), (r'<[^\s=].*?\S>', String), (_build_word_match(PERL6_OPERATORS), Operator), (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name), (r"'(\\\\|\\[^\\]|[^'\\])*'", String), (r'"(\\\\|\\[^\\]|[^"\\])*"', String), ], 'root': [ include('common'), (r'\{', opening_brace_callback), (r'\}', closing_brace_callback), (r'.+?', Text), ], 'pre-token': [ include('common'), (r'\{', Text, ('#pop', 'token')), (r'.+?', Text), ], 'token-sym-brackets': [ (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), default(('#pop', 'pre-token')), ], 'token': [ (r'\}', Text, '#pop'), (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), # make sure that quotes in character classes aren't treated as strings (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), # make sure that '#' characters in quotes aren't treated as comments (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), (r'#.*?$', Comment.Single), (r'\{', embedded_perl6_callback), ('.+?', String.Regex), ], } def analyse_text(text): def strip_pod(lines): in_pod = False stripped_lines = [] for line in lines: if re.match(r'^=(?:end|cut)', line): in_pod = False elif re.match(r'^=\w+', line): in_pod = True elif not in_pod: stripped_lines.append(line) return stripped_lines # XXX handle block comments lines = text.splitlines() lines = strip_pod(lines) text = '\n'.join(lines) if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): return True saw_perl_decl = False rating = False # check for my/our/has declarations if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + r"+\s+)?[$@%&(]", text): rating = 0.8 saw_perl_decl = True for line in lines: line = re.sub('#.*', '', line) if re.match(r'^\s*$', line): continue # match v6; use v6; use v6.0; use v6.0.0; if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): return True # match class, module, role, enum, grammar declarations class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) if class_decl: if saw_perl_decl or class_decl.group('scope') is not None: return True rating = 0.05 continue break return rating def __init__(self, **options): super(Perl6Lexer, self).__init__(**options) self.encoding = options.get('encoding', 'utf-8')
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ # preprocessor directives: without whitespace (r'^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words( ('asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread', 'typename'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (r'[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'(include)(' + _ws1 + r')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } stdlib_types = { 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t' } c99_types = { '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' } linux_types = { 'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t' } def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type elif self.platformhighlighting and value in self.linux_types: token = Keyword.Type yield index, token, value
class ValaLexer(RegexLexer): """ For Vala source code with preprocessor directives. .. versionadded:: 1.1 """ name = 'Vala' aliases = ['vala', 'vapi'] filenames = ['*.vala', '*.vapi'] mimetypes = ['text/x-vala'] tokens = { 'whitespace': [ (r'^\s*#if\s+0', Comment.Preproc, 'if0'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'[L@]?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(?s)""".*?"""', String), # verbatim strings (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])', bygroups(Punctuation, Name.Decorator, Punctuation)), # TODO: "correctly" parse complex code attributes (r'(\[)(CCode|(?:Integer|Floating)Type)', bygroups(Punctuation, Name.Decorator)), (r'[()\[\],.]', Punctuation), (words( ('as', 'base', 'break', 'case', 'catch', 'construct', 'continue', 'default', 'delete', 'do', 'else', 'enum', 'finally', 'for', 'foreach', 'get', 'if', 'in', 'is', 'lock', 'new', 'out', 'params', 'return', 'set', 'sizeof', 'switch', 'this', 'throw', 'try', 'typeof', 'while', 'yield'), suffix=r'\b'), Keyword), (words(('abstract', 'const', 'delegate', 'dynamic', 'ensures', 'extern', 'inline', 'internal', 'override', 'owned', 'private', 'protected', 'public', 'ref', 'requires', 'signal', 'static', 'throws', 'unowned', 'var', 'virtual', 'volatile', 'weak', 'yields'), suffix=r'\b'), Keyword.Declaration), (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace, Text), 'namespace'), (r'(class|errordomain|interface|struct)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), # void is an actual keyword, others are in glib-2.0.vapi (words( ('void', 'bool', 'char', 'double', 'float', 'int', 'int8', 'int16', 'int32', 'int64', 'long', 'short', 'size_t', 'ssize_t', 'string', 'time_t', 'uchar', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'), suffix=r'\b'), Keyword.Type), (r'(true|false|null)\b', Name.Builtin), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ], 'class': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')], 'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')], }
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' # Hexadecimal part in an hexadecimal integer/floating-point literal. # This includes decimal separators matching. _hexpart = r'[0-9a-fA-F](\'?[0-9a-fA-F])*' # Decimal part in an decimal integer/floating-point literal. # This includes decimal separators matching. _decpart = r'\d(\'?\d)*' # Integer literal suffix (e.g. 'ull' or 'll'). _intsuffix = r'(([uU][lL]{0,2})|[lL]{1,2}[uU]?)?' # Identifier regex with C and C++ Universal Character Name (UCN) support. _ident = r'(?:[a-zA-Z_$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*' tokens = { 'whitespace': [ # preprocessor directives: without whitespace (r'^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'([LuU]|u8)?(")', bygroups(String.Affix, String), 'string'), (r"([LuU]|u8)?(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), # Hexadecimal floating-point literals (C11, C++17) (r'0[xX](' + _hexpart + r'\.' + _hexpart + r'|\.' + _hexpart + r'|' + _hexpart + r')[pP][+-]?' + _hexpart + r'[lL]?', Number.Float), (r'(-)?(' + _decpart + r'\.' + _decpart + r'|\.' + _decpart + r'|' + _decpart + r')[eE][+-]?' + _decpart + r'[fFlL]?', Number.Float), (r'(-)?((' + _decpart + r'\.(' + _decpart + r')?|\.' + _decpart + r')[fFlL]?)|(' + _decpart + r'[fFlL])', Number.Float), (r'(-)?0[xX]' + _hexpart + _intsuffix, Number.Hex), (r'(-)?0[bB][01](\'?[01])*' + _intsuffix, Number.Bin), (r'(-)?0(\'?[0-7])+' + _intsuffix, Number.Oct), (r'(-)?' + _decpart + _intsuffix, Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'(struct|union)(\s+)', bygroups(Keyword, Text), 'classname'), (words(('asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'struct', 'static', 'switch', 'typedef', 'volatile', 'while', 'union', 'thread_local', 'alignas', 'alignof', 'static_assert', '_Pragma'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'(' + _ident + r')(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (_ident, Name) ], 'root': [ include('whitespace'), # functions ( r'((?:' + _ident + r'(?:[&*\s])+))' # return arguments r'(' + _ident + r')' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:' + _ident + r'(?:[&*\s])+))' # return arguments r'(' + _ident + r')' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), (r'\}', Punctuation), (r'[{;]', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'(include)(' + _ws1 + r')("[^"]+")([^\n]*)', bygroups(Comment.Preproc, using(this), Comment.PreprocFile, Comment.Single)), (r'(include)(' + _ws1 + r')(<[^>]+>)([^\n]*)', bygroups(Comment.Preproc, using(this), Comment.PreprocFile, Comment.Single)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ], 'classname': [ (_ident, Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), default('#pop') ] } stdlib_types = { 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t' } c99_types = { 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' } linux_types = { 'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t' } c11_atomic_types = { 'atomic_bool', 'atomic_char', 'atomic_schar', 'atomic_uchar', 'atomic_short', 'atomic_ushort', 'atomic_int', 'atomic_uint', 'atomic_long', 'atomic_ulong', 'atomic_llong', 'atomic_ullong', 'atomic_char16_t', 'atomic_char32_t', 'atomic_wchar_t', 'atomic_int_least8_t', 'atomic_uint_least8_t', 'atomic_int_least16_t', 'atomic_uint_least16_t', 'atomic_int_least32_t', 'atomic_uint_least32_t', 'atomic_int_least64_t', 'atomic_uint_least64_t', 'atomic_int_fast8_t', 'atomic_uint_fast8_t', 'atomic_int_fast16_t', 'atomic_uint_fast16_t', 'atomic_int_fast32_t', 'atomic_uint_fast32_t', 'atomic_int_fast64_t', 'atomic_uint_fast64_t', 'atomic_intptr_t', 'atomic_uintptr_t', 'atomic_size_t', 'atomic_ptrdiff_t', 'atomic_intmax_t', 'atomic_uintmax_t' } def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) self.c11highlighting = get_bool_opt(options, 'c11highlighting', True) self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type elif self.c11highlighting and value in self.c11_atomic_types: token = Keyword.Type elif self.platformhighlighting and value in self.linux_types: token = Keyword.Type yield index, token, value
class PhpLexer(RegexLexer): """ For `PHP <http://www.php.net/>`_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``<?php`` required). The default is ``False``. `funcnamehighlighting` If given and ``True``, highlight builtin function names (default: ``True``). `disabledmodules` If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted except the special ``'unknown'`` module that includes functions that are known to php but are undocumented. To get a list of allowed modules have a look into the `_php_builtins` module: .. sourcecode:: pycon >>> from pygments.lexers._php_builtins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]', '*.inc'] mimetypes = ['text/x-php'] # Note that a backslash is included in the following two patterns # PHP uses a backslash as a namespace separator _ident_char = r'[\\\w]|[^\x00-\x7f]' _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])' _ident_end = r'(?:' + _ident_char + ')*' _ident_inner = _ident_begin + _ident_end flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other) ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', bygroups(String, String, String.Delimiter, String, String.Delimiter, Punctuation, Text)), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), # put the empty comment here, it is otherwise seen as # the start of a docstring (r'/\*\*/', Comment.Multiline), (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment.Multiline), (r'(->|::)(\s*)(' + _ident_inner + ')', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/@-]+', Operator), (r'\?', Operator), # don't add to the charclass above! (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)(' + _ident_inner + ')', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|' r'endif|list|endswitch|new|endwhile|not|' r'array|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this|use|namespace|trait|yield|' r'finally)\b', Keyword), (r'(true|false|null)\b', Keyword.Constant), include('magicconstants'), (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable), (r'\$+' + _ident_inner, Name.Variable), (_ident_inner, Name.Other), (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float), (r'\d+e[+-]?[0-9]+', Number.Float), (r'0[0-7]+', Number.Oct), (r'0x[a-f0-9]+', Number.Hex), (r'\d+', Number.Integer), (r'0b[01]+', Number.Bin), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'magicfuncs': [ # source: http://php.net/manual/en/language.oop5.magic.php (words(( '__construct', '__destruct', '__call', '__callStatic', '__get', '__set', '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke', '__set_state', '__clone', '__debugInfo',), suffix=r'\b'), Name.Function.Magic), ], 'magicconstants': [ # source: http://php.net/manual/en/language.constants.predefined.php (words(( '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__', '__TRAIT__', '__METHOD__', '__NAMESPACE__',), suffix=r'\b'), Name.Constant), ], 'classname': [ (_ident_inner, Name.Class, '#pop') ], 'functionname': [ include('magicfuncs'), (_ident_inner, Name.Function, '#pop'), default('#pop') ], 'string': [ (r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape), (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]', String.Double) ], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt( options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt( options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from pygments.lexers._php_builtins import MODULES for key, value in iteritems(MODULES): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): if shebang_matches(text, r'php'): return True rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 return rv
class CythonLexer(RegexLexer): """ For Pyrex and `Cython <http://cython.org>`_ source code. .. versionadded:: 1.1 """ name = 'Cython' aliases = ['cython', 'pyx', 'pyrex'] filenames = ['*.pyx', '*.pxd', '*.pxi'] mimetypes = ['text/x-cython', 'application/x-cython'] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'(<)([a-zA-Z0-9.?]+)(>)', bygroups(Punctuation, Keyword.Type, Punctuation)), (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', bygroups(Keyword, Number.Integer, Operator, Name, Operator, Name, Punctuation)), include('keywords'), (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), # (should actually start a block with only cdefs) (r'(cdef)(:)', bygroups(Keyword, Punctuation)), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), include('builtins'), include('backtick'), ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), ('[uU]?"""', String, combined('stringescape', 'tdqs')), ("[uU]?'''", String, combined('stringescape', 'tsqs')), ('[uU]?"', String, combined('stringescape', 'dqs')), ("[uU]?'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(('assert', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil', 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), Keyword), (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), ], 'builtins': [ (words( ('__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL' r')\b', Name.Builtin.Pseudo), (words( ('ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Exception), ], 'numbers': [(r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'0\d+', Number.Oct), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer)], 'backtick': [ ('`.*?`', String.Backtick), ], 'name': [ (r'@\w+', Name.Decorator), ('[a-zA-Z_]\w*', Name), ], 'funcname': [('[a-zA-Z_]\w*', Name.Function, '#pop')], 'cdef': [ (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved), (r'(struct|enum|union|class)\b', Keyword), (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)', bygroups(Name.Function, Text), '#pop'), (r'([a-zA-Z_]\w*)(\s*)(,)', bygroups(Name.Function, Text, Punctuation)), (r'from\b', Keyword, '#pop'), (r'as\b', Keyword), (r':', Punctuation, '#pop'), (r'(?=["\'])', Text, '#pop'), (r'[a-zA-Z_]\w*', Keyword.Type), (r'.', Text), ], 'classname': [('[a-zA-Z_]\w*', Name.Class, '#pop')], 'import': [ (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), (r'[a-zA-Z_][\w.]*', Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), default('#pop') # all else: go back ], 'fromimport': [ (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'), (r'[a-zA-Z_.][\w.]*', Name.Namespace), # ``cdef foo from "header"``, or ``for foo from 0 < i < 10`` default('#pop'), ], 'stringescape': [(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), # unhandled string formatting sign (r'%', String) # newlines are an error (use "nl" state) ], 'nl': [(r'\n', String)], 'dqs': [ (r'"', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings include('strings') ], 'sqs': [ (r"'", String, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings include('strings') ], 'tdqs': [(r'"""', String, '#pop'), include('strings'), include('nl')], 'tsqs': [(r"'''", String, '#pop'), include('strings'), include('nl')], }
class JavascriptLexer(RegexLexer): """ For JavaScript source code. """ name = 'JavaScript' aliases = ['js', 'javascript'] filenames = [ '*.js', '*.jsm', ] mimetypes = [ 'application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript', ] flags = re.DOTALL | re.UNICODE | re.MULTILINE tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'root': [ (r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|' r'this)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' r'extends|final|float|goto|implements|import|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), (JS_IDENT, Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class TypeScriptLexer(RegexLexer): """ For `TypeScript <http://typescriptlang.org/>`_ source code. .. versionadded:: 1.6 """ name = 'TypeScript' aliases = ['ts', 'typescript'] filenames = ['*.ts', '*.tsx'] mimetypes = ['text/x-typescript'] flags = re.DOTALL | re.MULTILINE # Higher priority than the TypoScriptLexer, as TypeScript is far more # common these days priority = 0.5 tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'tag': [ (r'\s+', Text), (r'[,\|]', Punctuation), (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), 'attr'), (r'[\w:-]+', Name.Attribute), (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), ], 'attr': [("{", Punctuation, 'root'), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), default('#pop')], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'(<)([\w:.-]+)', bygroups(Punctuation, Name.Tag), 'tag'), (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation)), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), ("{", Punctuation, 'root'), ("}", Punctuation, '#pop'), (r'[(\[;,]', Punctuation, 'slashstartsregex'), (r'[)\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|of|' r'this)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' r'extends|final|float|goto|implements|import|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), # Match stuff like: module name {...} (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)', bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'), # Match variable type keywords (r'\b(string|bool|number)\b', Keyword.Type), # Match stuff like: constructor (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved), # Match stuff like: super(argument, list) (r'(super)(\s*)(\([\w,?.$\s]+\s*\))', bygroups(Keyword.Reserved, Text), 'slashstartsregex'), # Match stuff like: function() {...} (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'), # Match stuff like: (function: return type) (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)', bygroups(Name.Other, Text, Keyword.Type)), (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'`', String.Backtick, 'interp'), # Match stuff like: Decorators (r'@\w+', Keyword.Declaration), ], # The 'interp*' rules match those in JavascriptLexer. Changes made # there should be reflected here as well. 'interp': [ (r'`', String.Backtick, '#pop'), (r'\\.', String.Backtick), (r'\$\{', String.Interpol, 'interp-inside'), (r'\$', String.Backtick), (r'[^`\\$]+', String.Backtick), ], 'interp-inside': [ # TODO: should this include single-line comments and allow nesting strings? (r'\}', String.Interpol, '#pop'), include('root'), ], }
class AdaLexer(RegexLexer): """ For Ada source code. .. versionadded:: 1.3 """ name = 'Ada' aliases = ['ada', 'ada95', 'ada2005'] filenames = ['*.adb', '*.ads', '*.ada'] mimetypes = ['text/x-ada'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'[^\S\n]+', Text), (r'--.*?\n', Comment.Single), (r'[^\S\n]+', Text), (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), (r'(subtype|type)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), (r'task|protected', Keyword.Declaration), (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, Comment.Preproc)), (r'(true|false|null)\b', Keyword.Constant), (words( ('Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float', 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'), Keyword.Type), (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), (r'generic|private', Keyword.Declaration), (r'package', Keyword.Declaration, 'package'), (r'array\b', Keyword.Reserved, 'array_def'), (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'(\w+)(\s*)(:)(\s*)(constant)', bygroups(Name.Constant, Text, Punctuation, Text, Keyword.Reserved)), (r'<<\w+>>', Name.Label), (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), (words( ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all', 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding', 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue', 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when', 'while', 'xor'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), (r'"[^"]*"', String), include('attribute'), include('numbers'), (r"'[^']'", String.Character), (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))), (r"(<>|=>|:=|[()|:;,.'])", Punctuation), (r'[*<>+=/&-]', Operator), (r'\n+', Text), ], 'numbers': [ (r'[0-9_]+#[0-9a-f]+#', Number.Hex), (r'[0-9_]+\.[0-9_]*', Number.Float), (r'[0-9_]+', Number.Integer), ], 'attribute': [ (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), ], 'subprogram': [ (r'\(', Punctuation, ('#pop', 'formal_part')), (r';', Punctuation, '#pop'), (r'is\b', Keyword.Reserved, '#pop'), (r'"[^"]+"|\w+', Name.Function), include('root'), ], 'end': [ ('(if|case|record|loop|select)', Keyword.Reserved), ('"[^"]+"|[\w.]+', Name.Function), ('\s+', Text), (';', Punctuation, '#pop'), ], 'type_def': [ (r';', Punctuation, '#pop'), (r'\(', Punctuation, 'formal_part'), (r'with|and|use', Keyword.Reserved), (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), (r'record\b', Keyword.Reserved, ('record_def')), (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'), include('root'), ], 'array_def': [ (r';', Punctuation, '#pop'), (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)), include('root'), ], 'record_def': [ (r'end record', Keyword.Reserved, '#pop'), include('root'), ], 'import': [ (r'[\w.]+', Name.Namespace, '#pop'), default('#pop'), ], 'formal_part': [ (r'\)', Punctuation, '#pop'), (r'\w+', Name.Variable), (r',|:[^=]', Punctuation), (r'(in|not|null|out|access)\b', Keyword.Reserved), include('root'), ], 'package': [ ('body', Keyword.Declaration), ('is\s+new|renames', Keyword.Reserved), ('is', Keyword.Reserved, '#pop'), (';', Punctuation, '#pop'), ('\(', Punctuation, 'package_instantiation'), ('([\w.]+)', Name.Class), include('root'), ], 'package_instantiation': [ (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)), (r'[\w.\'"]', Text), (r'\)', Punctuation, '#pop'), include('root'), ], }
class CoffeeScriptLexer(RegexLexer): """ For `CoffeeScript`_ source code. .. _CoffeeScript: http://coffeescript.org .. versionadded:: 1.3 """ name = 'CoffeeScript' aliases = ['coffee-script', 'coffeescript', 'coffee'] filenames = ['*.coffee'] mimetypes = ['text/coffeescript'] flags = re.DOTALL tokens = { 'commentsandwhitespace': [ (r'\s+', Text), (r'###[^#].*?###', Comment.Multiline), (r'#(?!##[^#]).*?\n', Comment.Single), ], 'multilineregex': [ (r'[^/#]+', String.Regex), (r'///([gim]+\b|\B)', String.Regex, '#pop'), (r'#\{', String.Interpol, 'interpoling_string'), (r'[/#]', String.Regex), ], 'slashstartsregex': [ include('commentsandwhitespace'), (r'///', String.Regex, ('#pop', 'multilineregex')), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), default('#pop'), ], 'root': [ # this next expr leads to infinite loops root -> slashstartsregex # (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|' r'\|\||\\(?=\n)|' r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&|^/])=?', Operator, 'slashstartsregex'), (r'(?:\([^()]*\))?\s*[=-]>', Name.Function), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(?<![.$])(for|own|in|of|while|until|' r'loop|break|return|continue|' r'switch|when|then|if|unless|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|super|' r'extends|this|class|by)\b', Keyword, 'slashstartsregex'), (r'(?<![.$])(true|false|yes|no|on|off|null|' r'NaN|Infinity|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'eval|isFinite|isNaN|parseFloat|parseInt|document|window)\b', Name.Builtin), (r'[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable, 'slashstartsregex'), (r'@[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable.Instance, 'slashstartsregex'), (r'@', Name.Other, 'slashstartsregex'), (r'@?[$a-zA-Z_][\w$]*', Name.Other, 'slashstartsregex'), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), ('"""', String, 'tdqs'), ("'''", String, 'tsqs'), ('"', String, 'dqs'), ("'", String, 'sqs'), ], 'strings': [ (r'[^#\\\'"]+', String), # note that all coffee script strings are multi-line. # hashmarks, quotes and backslashes must be parsed one at a time ], 'interpoling_string': [(r'\}', String.Interpol, "#pop"), include('root')], 'dqs': [ (r'"', String, '#pop'), (r'\\.|\'', String), # double-quoted string don't need ' escapes (r'#\{', String.Interpol, "interpoling_string"), (r'#', String), include('strings') ], 'sqs': [ (r"'", String, '#pop'), (r'#|\\.|"', String), # single quoted strings don't need " escapses include('strings') ], 'tdqs': [ (r'"""', String, '#pop'), (r'\\.|\'|"', String), # no need to escape quotes in triple-string (r'#\{', String.Interpol, "interpoling_string"), (r'#', String), include('strings'), ], 'tsqs': [ (r"'''", String, '#pop'), (r'#|\\.|\'|"', String), # no need to escape quotes in triple-strings include('strings') ], }
class ObjectiveJLexer(RegexLexer): """ For Objective-J source code with preprocessor directives. .. versionadded:: 1.3 """ name = 'Objective-J' aliases = ['objective-j', 'objectivej', 'obj-j', 'objj'] filenames = ['*.j'] mimetypes = ['text/x-objective-j'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)*' flags = re.DOTALL | re.MULTILINE tokens = { 'root': [ include('whitespace'), # function definition (r'^(' + _ws + r'[+-]' + _ws + r')([(a-zA-Z_].*?[^(])(' + _ws + r'\{)', bygroups(using(this), using(this, state='function_signature'), using(this))), # class definition (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(@class|@protocol)(\s*)', bygroups(Keyword, Text), 'forward_classname'), (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)), include('statements'), ('[{()}]', Punctuation), (';', Punctuation), ], 'whitespace': [ (r'(@import)(\s+)("(?:\\\\|\\"|[^"])*")', bygroups(Comment.Preproc, Text, String.Double)), (r'(@import)(\s+)(<(?:\\\\|\\>|[^>])*>)', bygroups(Comment.Preproc, Text, String.Double)), (r'(#(?:include|import))(\s+)("(?:\\\\|\\"|[^"])*")', bygroups(Comment.Preproc, Text, String.Double)), (r'(#(?:include|import))(\s+)(<(?:\\\\|\\>|[^>])*>)', bygroups(Comment.Preproc, Text, String.Double)), (r'#if\s+0', Comment.Preproc, 'if0'), (r'#', Comment.Preproc, 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), (r'<!--', Comment), ], 'slashstartsregex': [ include('whitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop'), ], 'badregex': [ (r'\n', Text, '#pop'), ], 'statements': [ (r'(L|@)?"', String, 'string'), (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|' r'else|throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'prototype|__proto__)\b', Keyword, 'slashstartsregex'), (r'(var|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(@selector|@private|@protected|@public|@encode|' r'@synchronized|@try|@throw|@catch|@finally|@end|@property|' r'@synthesize|@dynamic|@for|@accessors|new)\b', Keyword), (r'(int|long|float|short|double|char|unsigned|signed|void|' r'id|BOOL|bool|boolean|IBOutlet|IBAction|SEL|@outlet|@action)\b', Keyword.Type), (r'(self|super)\b', Name.Builtin), (r'(TRUE|YES|FALSE|NO|Nil|nil|NULL)\b', Keyword.Constant), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), (r'(ABS|ASIN|ACOS|ATAN|ATAN2|SIN|COS|TAN|EXP|POW|CEIL|FLOOR|ROUND|' r'MIN|MAX|RAND|SQRT|E|LN2|LN10|LOG2E|LOG10E|PI|PI2|PI_2|SQRT1_2|' r'SQRT2)\b', Keyword.Constant), (r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), (r'([$a-zA-Z_]\w*)(' + _ws + r')(?=\()', bygroups(Name.Function, using(this))), (r'[$a-zA-Z_]\w*', Name), ], 'classname': [ # interface definition that inherits (r'([a-zA-Z_]\w*)(' + _ws + r':' + _ws + r')([a-zA-Z_]\w*)?', bygroups(Name.Class, using(this), Name.Class), '#pop'), # interface definition for a category (r'([a-zA-Z_]\w*)(' + _ws + r'\()([a-zA-Z_]\w*)(\))', bygroups(Name.Class, using(this), Name.Label, Text), '#pop'), # simple interface / implementation (r'([a-zA-Z_]\w*)', Name.Class, '#pop'), ], 'forward_classname': [ (r'([a-zA-Z_]\w*)(\s*,\s*)', bygroups(Name.Class, Text), '#push'), (r'([a-zA-Z_]\w*)(\s*;?)', bygroups(Name.Class, Text), '#pop'), ], 'function_signature': [ include('whitespace'), # start of a selector w/ parameters ( r'(\(' + _ws + r')' # open paren r'([a-zA-Z_]\w+)' # return type r'(' + _ws + r'\)' + _ws + r')' # close paren r'([$a-zA-Z_]\w+' + _ws + r':)', # function name bygroups(using(this), Keyword.Type, using(this), Name.Function), 'function_parameters'), # no-param function ( r'(\(' + _ws + r')' # open paren r'([a-zA-Z_]\w+)' # return type r'(' + _ws + r'\)' + _ws + r')' # close paren r'([$a-zA-Z_]\w+)', # function name bygroups(using(this), Keyword.Type, using(this), Name.Function), "#pop"), # no return type given, start of a selector w/ parameters ( r'([$a-zA-Z_]\w+' + _ws + r':)', # function name bygroups(Name.Function), 'function_parameters'), # no return type given, no-param function ( r'([$a-zA-Z_]\w+)', # function name bygroups(Name.Function), "#pop"), default('#pop'), ], 'function_parameters': [ include('whitespace'), # parameters ( r'(\(' + _ws + ')' # open paren r'([^)]+)' # type r'(' + _ws + r'\)' + _ws + r')' # close paren r'([$a-zA-Z_]\w+)', # param name bygroups(using(this), Keyword.Type, using(this), Text)), # one piece of a selector name ( r'([$a-zA-Z_]\w+' + _ws + r':)', # function name Name.Function), # smallest possible selector piece (r'(:)', Name.Function), # var args (r'(,' + _ws + r'\.\.\.)', using(this)), # param name (r'([$a-zA-Z_]\w+)', Text), ], 'expression': [ (r'([$a-zA-Z_]\w*)(\()', bygroups(Name.Function, Punctuation)), (r'(\))', Punctuation, "#pop"), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } def analyse_text(text): if re.search('^\s*@import\s+[<"]', text, re.MULTILINE): # special directive found in most Objective-J files return True return False
class LassoLexer(RegexLexer): """ For `Lasso <http://www.lassosoft.com/>`_ source code, covering both Lasso 9 syntax and LassoScript for Lasso 8.6 and earlier. For Lasso embedded in HTML, use the `LassoHtmlLexer`. Additional options accepted: `builtinshighlighting` If given and ``True``, highlight builtin types, traits, methods, and members (default: ``True``). `requiredelimiters` If given and ``True``, only highlight code between delimiters as Lasso (default: ``False``). .. versionadded:: 1.6 """ name = 'Lasso' aliases = ['lasso', 'lassoscript'] filenames = ['*.lasso', '*.lasso[89]'] alias_filenames = ['*.incl', '*.inc', '*.las'] mimetypes = ['text/x-lasso'] flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'^#!.+lasso9\b', Comment.Preproc, 'lasso'), (r'\[no_square_brackets\]', Comment.Preproc, 'nosquarebrackets'), (r'\[noprocess\]', Comment.Preproc, ('delimiters', 'noprocess')), (r'\[', Comment.Preproc, ('delimiters', 'squarebrackets')), (r'<\?(LassoScript|lasso|=)', Comment.Preproc, ('delimiters', 'anglebrackets')), (r'<(!--.*?-->)?', Other, 'delimiters'), (r'\s+', Other), default(('delimiters', 'lassofile')), ], 'delimiters': [ (r'\[no_square_brackets\]', Comment.Preproc, 'nosquarebrackets'), (r'\[noprocess\]', Comment.Preproc, 'noprocess'), (r'\[', Comment.Preproc, 'squarebrackets'), (r'<\?(LassoScript|lasso|=)', Comment.Preproc, 'anglebrackets'), (r'<(!--.*?-->)?', Other), (r'[^[<]+', Other), ], 'nosquarebrackets': [ (r'<\?(LassoScript|lasso|=)', Comment.Preproc, 'anglebrackets'), (r'<', Other), (r'[^<]+', Other), ], 'noprocess': [ (r'\[/noprocess\]', Comment.Preproc, '#pop'), (r'\[', Other), (r'[^[]', Other), ], 'squarebrackets': [ (r'\]', Comment.Preproc, '#pop'), include('lasso'), ], 'anglebrackets': [ (r'\?>', Comment.Preproc, '#pop'), include('lasso'), ], 'lassofile': [ (r'\]|\?>', Comment.Preproc, '#pop'), include('lasso'), ], 'whitespacecomments': [ (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*\*!.*?\*/', String.Doc), (r'/\*.*?\*/', Comment.Multiline), ], 'lasso': [ # whitespace/comments include('whitespacecomments'), # literals (r'\d*\.\d+(e[+-]?\d+)?', Number.Float), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'([+-]?)(infinity|NaN)\b', bygroups(Operator, Number)), (r"'", String.Single, 'singlestring'), (r'"', String.Double, 'doublestring'), (r'`[^`]*`', String.Backtick), # names (r'\$[a-z_][\w.]*', Name.Variable), (r'#([a-z_][\w.]*|\d+)', Name.Variable.Instance), (r"(\.)('[a-z_][\w.]*')", bygroups(Name.Builtin.Pseudo, Name.Variable.Class)), (r"(self)(\s*->\s*)('[a-z_][\w.]*')", bygroups(Name.Builtin.Pseudo, Operator, Name.Variable.Class)), (r'(\.\.?)([a-z_][\w.]*(=(?!=))?)', bygroups(Name.Builtin.Pseudo, Name.Other.Member)), (r'(->\\?\s*|&\s*)([a-z_][\w.]*(=(?!=))?)', bygroups(Operator, Name.Other.Member)), (r'(self|inherited)\b', Name.Builtin.Pseudo), (r'-[a-z_][\w.]*', Name.Attribute), (r'::\s*[a-z_][\w.]*', Name.Label), (r'(error_(code|msg)_\w+|Error_AddError|Error_ColumnRestriction|' r'Error_DatabaseConnectionUnavailable|Error_DatabaseTimeout|' r'Error_DeleteError|Error_FieldRestriction|Error_FileNotFound|' r'Error_InvalidDatabase|Error_InvalidPassword|' r'Error_InvalidUsername|Error_ModuleNotFound|' r'Error_NoError|Error_NoPermission|Error_OutOfMemory|' r'Error_ReqColumnMissing|Error_ReqFieldMissing|' r'Error_RequiredColumnMissing|Error_RequiredFieldMissing|' r'Error_UpdateError)\b', Name.Exception), # definitions (r'(define)(\s+)([a-z_][\w.]*)(\s*=>\s*)(type|trait|thread)\b', bygroups(Keyword.Declaration, Text, Name.Class, Operator, Keyword)), (r'(define)(\s+)([a-z_][\w.]*)(\s*->\s*)([a-z_][\w.]*=?|[-+*/%])', bygroups(Keyword.Declaration, Text, Name.Class, Operator, Name.Function), 'signature'), (r'(define)(\s+)([a-z_][\w.]*)', bygroups(Keyword.Declaration, Text, Name.Function), 'signature'), (r'(public|protected|private|provide)(\s+)(([a-z_][\w.]*=?|[-+*/%])' r'(?=\s*\())', bygroups(Keyword, Text, Name.Function), 'signature'), (r'(public|protected|private|provide)(\s+)([a-z_][\w.]*)', bygroups(Keyword, Text, Name.Function)), # keywords (r'(true|false|none|minimal|full|all|void)\b', Keyword.Constant), (r'(local|var|variable|global|data(?=\s))\b', Keyword.Declaration), (r'(array|date|decimal|duration|integer|map|pair|string|tag|xml|' r'null|bytes|list|queue|set|stack|staticarray|tie)\b', Keyword.Type), (r'([a-z_][\w.]*)(\s+)(in)\b', bygroups(Name, Text, Keyword)), (r'(let|into)(\s+)([a-z_][\w.]*)', bygroups(Keyword, Text, Name)), (r'require\b', Keyword, 'requiresection'), (r'(/?)(Namespace_Using)\b', bygroups(Punctuation, Keyword.Namespace)), (r'(/?)(Cache|Database_Names|Database_SchemaNames|' r'Database_TableNames|Define_Tag|Define_Type|Email_Batch|' r'Encode_Set|HTML_Comment|Handle|Handle_Error|Header|If|Inline|' r'Iterate|LJAX_Target|Link|Link_CurrentAction|Link_CurrentGroup|' r'Link_CurrentRecord|Link_Detail|Link_FirstGroup|' r'Link_FirstRecord|Link_LastGroup|Link_LastRecord|Link_NextGroup|' r'Link_NextRecord|Link_PrevGroup|Link_PrevRecord|Log|Loop|' r'NoProcess|Output_None|Portal|Private|Protect|Records|Referer|' r'Referrer|Repeating|ResultSet|Rows|Search_Args|Search_Arguments|' r'Select|Sort_Args|Sort_Arguments|Thread_Atomic|Value_List|While|' r'Abort|Case|Else|If_Empty|If_False|If_Null|If_True|Loop_Abort|' r'Loop_Continue|Loop_Count|Params|Params_Up|Return|Return_Value|' r'Run_Children|SOAP_DefineTag|SOAP_LastRequest|SOAP_LastResponse|' r'Tag_Name|ascending|average|by|define|descending|do|equals|' r'frozen|group|handle_failure|import|in|into|join|let|match|max|' r'min|on|order|parent|protected|provide|public|require|returnhome|' r'skip|split_thread|sum|take|thread|to|trait|type|where|with|' r'yield|yieldhome)\b', bygroups(Punctuation, Keyword)), # other (r',', Punctuation, 'commamember'), (r'(and|or|not)\b', Operator.Word), (r'([a-z_][\w.]*)(\s*::\s*[a-z_][\w.]*)?(\s*=(?!=))', bygroups(Name, Name.Label, Operator)), (r'(/?)([\w.]+)', bygroups(Punctuation, Name.Other)), (r'(=)(n?bw|n?ew|n?cn|lte?|gte?|n?eq|n?rx|ft)\b', bygroups(Operator, Operator.Word)), (r':=|[-+*/%=<>&|!?\\]+', Operator), (r'[{}():;,@^]', Punctuation), ], 'singlestring': [ (r"'", String.Single, '#pop'), (r"[^'\\]+", String.Single), include('escape'), (r"\\", String.Single), ], 'doublestring': [ (r'"', String.Double, '#pop'), (r'[^"\\]+', String.Double), include('escape'), (r'\\', String.Double), ], 'escape': [ (r'\\(U[\da-f]{8}|u[\da-f]{4}|x[\da-f]{1,2}|[0-7]{1,3}|:[^:]+:|' r'[abefnrtv?"\'\\]|$)', String.Escape), ], 'signature': [ (r'=>', Operator, '#pop'), (r'\)', Punctuation, '#pop'), (r'[(,]', Punctuation, 'parameter'), include('lasso'), ], 'parameter': [ (r'\)', Punctuation, '#pop'), (r'-?[a-z_][\w.]*', Name.Attribute, '#pop'), (r'\.\.\.', Name.Builtin.Pseudo), include('lasso'), ], 'requiresection': [ (r'(([a-z_][\w.]*=?|[-+*/%])(?=\s*\())', Name, 'requiresignature'), (r'(([a-z_][\w.]*=?|[-+*/%])(?=(\s*::\s*[\w.]+)?\s*,))', Name), (r'[a-z_][\w.]*=?|[-+*/%]', Name, '#pop'), (r'::\s*[a-z_][\w.]*', Name.Label), (r',', Punctuation), include('whitespacecomments'), ], 'requiresignature': [ (r'(\)(?=(\s*::\s*[\w.]+)?\s*,))', Punctuation, '#pop'), (r'\)', Punctuation, '#pop:2'), (r'-?[a-z_][\w.]*', Name.Attribute), (r'::\s*[a-z_][\w.]*', Name.Label), (r'\.\.\.', Name.Builtin.Pseudo), (r'[(,]', Punctuation), include('whitespacecomments'), ], 'commamember': [ (r'(([a-z_][\w.]*=?|[-+*/%])' r'(?=\s*(\(([^()]*\([^()]*\))*[^)]*\)\s*)?(::[\w.\s]+)?=>))', Name.Function, 'signature'), include('whitespacecomments'), default('#pop'), ], } def __init__(self, **options): self.builtinshighlighting = get_bool_opt(options, 'builtinshighlighting', True) self.requiredelimiters = get_bool_opt(options, 'requiredelimiters', False) self._builtins = set() self._members = set() if self.builtinshighlighting: from pygments.lexers._lasso_builtins import BUILTINS, MEMBERS for key, value in iteritems(BUILTINS): self._builtins.update(value) for key, value in iteritems(MEMBERS): self._members.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.requiredelimiters: stack.append('delimiters') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if (token is Name.Other and value.lower() in self._builtins or token is Name.Other.Member and value.lower().rstrip('=') in self._members): yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): rv = 0.0 if 'bin/lasso9' in text: rv += 0.8 if re.search(r'<\?lasso', text, re.I): rv += 0.4 if re.search(r'local\(', text, re.I): rv += 0.4 return rv
class TypeScriptLexer(RegexLexer): """ For `TypeScript <http://typescriptlang.org/>`_ source code. .. versionadded:: 1.6 """ name = 'TypeScript' aliases = ['ts'] filenames = ['*.ts'] mimetypes = ['text/x-typescript'] flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'this)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' r'extends|final|float|goto|implements|import|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), # Match stuff like: module name {...} (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)', bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'), # Match variable type keywords (r'\b(string|bool|number)\b', Keyword.Type), # Match stuff like: constructor (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved), # Match stuff like: super(argument, list) (r'(super)(\s*)(\([\w,?.$\s]+\s*\))', bygroups(Keyword.Reserved, Text), 'slashstartsregex'), # Match stuff like: function() {...} (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'), # Match stuff like: (function: return type) (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)', bygroups(Name.Other, Text, Keyword.Type)), (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class NedLexer(RegexLexer): name = 'ned' filenames = ['*.ned'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(true|false)\b', Name.Builtin), (r'(<-->|-->|<--|\.\.)', Keyword), (r'(bool|double|int|xml)\b', Keyword.Type), (r'(inout|input|output)\b', Keyword.Type), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words(("channel", "channelinterface", "simple", "module", "network", "moduleinterface"), suffix=r'\b'), Keyword), (words( ("parameters", "gates", "types", "submodules", "connections"), suffix=r'\b'), Keyword), (words(("volatile", "allowunconnected", "extends", "for", "if", "import", "like", "package", "property"), suffix=r'\b'), Keyword), (words(("sizeof", "const", "default", "ask", "this", "index", "typename", "xmldoc"), suffix=r'\b'), Keyword), (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta", "binomial", "cauchy", "ceil", "chi_square", "cos", "erlang_k", "exp", "exponential", "fabs", "floor", "fmod", "gamma_d", "genk_exponential", "genk_intuniform", "genk_normal", "genk_truncnormal", "genk_uniform", "geometric", "hypergeometric", "hypot", "intuniform", "log", "log10", "lognormal", "max", "min", "negbinomial", "normal", "pareto_shifted", "poisson", "pow", "simTime", "sin", "sqrt", "student_t", "tan", "triang", "truncnormal", "uniform", "weibull", "xml", "xmldoc"), suffix=r'\b'), Name.Builtin), ('@[a-zA-Z_]\w*', Name.Builtin), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ] }
class ReachLexer(RegexLexer): """ For Reach source code. """ name = 'Reach' aliases = ['rsh', 'reach'] filenames = ['*.rsh'] # TODO? mimetypes = [ 'application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript' ] flags = re.DOTALL | re.UNICODE | re.MULTILINE tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gimuy]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'root': [ (r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'(\.\d+|[0-9]+\.[0-9]*)([eE][-+]?[0-9]+)?', Number.Float), (r'0[bB][01]+', Number.Bin), (r'0[oO][0-7]+', Number.Oct), (r'0[xX][0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'\.\.\.', Punctuation), (r'=>', Operator), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), ( r'(for|in|while|do|break|return|continue|match|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|' # Reach ones r'interact|commit|exit|only|each|race|fork|paySpec|parallelReduce|when|timeout|timeRemaining|throwTimeout|publish|pay|declassify|transfer|' r'invariant|assert|require|assume|possible|unknowable|forall|' r'this|of)\b', Keyword, 'slashstartsregex'), ( r'(var|let|with|function|' # reach ones r'export|const|import|from|as' r')\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|debugger|double|enum|' r'extends|final|float|goto|implements|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), ( r'(Array|Boolean|Date|Error|Function|Foldable|Math|netscape|' r'Number|Object|Packages|RegExp|String|Promise|Proxy|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|isSafeInteger|parseFloat|parseInt|' # The reach ones r'UInt|Int|FixedPoint|Interval|IntervalType|Reach|App|Fun|Null|Bool|Bytes|Address|Token|Tuple|Struct|Participant|ParticipantClass|View|Data|Digest|Map|Set|Refine|Anybody|deployMode|verifyArithmetic|verifyPerConnector|connectors|ETH|ALGO|' r'deploy|balance|digest|implies|ensure|hasRandom|makeCommitment|checkCommitment|closeTo|lastConsensusTime|remote|' r'and|or|add|sub|mul|div|mod|lt|le|gt|ge|lsh|rsh|band|bior|bxor|eq|neq|' r'polyEq|polyNeq|typeEq|intEq|ite|typeOf|isType|is|' r'array|makeEnum|' r'document|this|window)\b', Name.Builtin), (JS_IDENT, Name.Other), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'`', String.Backtick, 'interp'), ], 'interp': [ (r'`', String.Backtick, '#pop'), (r'\\\\', String.Backtick), (r'\\`', String.Backtick), (r'\$\{', String.Interpol, 'interp-inside'), (r'\$', String.Backtick), (r'[^`\\$]+', String.Backtick), ], 'interp-inside': [ # TODO: should this include single-line comments and allow nesting strings? (r'\}', String.Interpol, '#pop'), include('root'), ], # (\\\\|\\`|[^`])*`', String.Backtick), }
class PythonLexer(RegexLexer): """ For `Python <http://www.python.org>`_ source code. """ name = 'Python' aliases = ['python', 'py', 'sage'] filenames = [ '*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage' ] mimetypes = ['text/x-python', 'application/x-python'] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), include('keywords'), (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('builtins'), include('magicfuncs'), include('magicvars'), include('backtick'), ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(('assert', 'break', 'continue', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), ], 'builtins': [ (words( ('__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls' r')\b', Name.Builtin.Pseudo), (words( ('ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning', 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Exception), ], 'magicfuncs': [ (words( ('__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__', '__complex__', '__contains__', '__del__', '__delattr__', '__delete__', '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__', '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__', '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__', '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__', '__ilshift__', '__imod__', '__imul__', '__index__', '__init__', '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__', '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__', '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__', '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__', '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__', '__unicode__', '__xor__'), suffix=r'\b'), Name.Function.Magic), ], 'magicvars': [ (words( ('__bases__', '__class__', '__closure__', '__code__', '__defaults__', '__dict__', '__doc__', '__file__', '__func__', '__globals__', '__metaclass__', '__module__', '__mro__', '__name__', '__self__', '__slots__', '__weakref__'), suffix=r'\b'), Name.Variable.Magic), ], 'numbers': [(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float), (r'\d+[eE][+-]?[0-9]+j?', Number.Float), (r'0[0-7]+j?', Number.Oct), (r'0[bB][01]+', Number.Bin), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+j?', Number.Integer)], 'backtick': [ ('`.*?`', String.Backtick), ], 'name': [ (r'@[\w.]+', Name.Decorator), ('[a-zA-Z_]\w*', Name), ], 'funcname': [ include('magicfuncs'), ('[a-zA-Z_]\w*', Name.Function, '#pop'), default('#pop'), ], 'classname': [('[a-zA-Z_]\w*', Name.Class, '#pop')], 'import': [ (r'(?:[ \t]|\\\n)+', Text), (r'as\b', Keyword.Namespace), (r',', Operator), (r'[a-zA-Z_][\w.]*', Name.Namespace), default('#pop') # all else: go back ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), (r'import\b', Keyword.Namespace, '#pop'), # if None occurs here, it's "raise x from None", since None can # never be a module name (r'None\b', Name.Builtin.Pseudo, '#pop'), # sadly, in "raise x from y" y will be highlighted as namespace too (r'[a-zA-Z_.][\w.]*', Name.Namespace), # anything else here also means "raise x from y" and is therefore # not an error default('#pop'), ], 'stringescape': [(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)], 'strings-single': innerstring_rules(String.Single), 'strings-double': innerstring_rules(String.Double), 'dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('strings-double') ], 'sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('strings-single') ], 'tdqs': [(r'"""', String.Double, '#pop'), include('strings-double'), (r'\n', String.Double)], 'tsqs': [(r"'''", String.Single, '#pop'), include('strings-single'), (r'\n', String.Single)], } def analyse_text(text): return shebang_matches(text, r'pythonw?(2(\.\d)?)?') or \ 'import ' in text[:1000]
class NotmuchLexer(RegexLexer): """ For `Notmuch <https://notmuchmail.org/>`_ email text format. .. versionadded:: 2.5 Additional options accepted: `body_lexer` If given, highlight the contents of the message body with the specified lexer, else guess it according to the body content (default: ``None``). """ name = 'Notmuch' aliases = ['notmuch'] def _highlight_code(self, match): code = match.group(1) try: if self.body_lexer: lexer = get_lexer_by_name(self.body_lexer) else: lexer = guess_lexer(code.strip()) except ClassNotFound: lexer = get_lexer_by_name('text') yield from lexer.get_tokens_unprocessed(code) tokens = { 'root': [ (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')), ], 'message-attr': [ (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)), (r'(\s*(?:depth|match|excluded):\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), (r'(\s*filename:\s*)(.+\n)', bygroups(Name.Attribute, String)), default('#pop'), ], 'message': [ (r'\fmessage\}\n', Keyword, '#pop'), (r'\fheader\{\n', Keyword, 'header'), (r'\fbody\{\n', Keyword, 'body'), ], 'header': [ (r'\fheader\}\n', Keyword, '#pop'), (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)', bygroups(Name.Attribute, String)), (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)', bygroups(Generic.Strong, Literal, Name.Tag)), ], 'body': [ (r'\fpart\{\n', Keyword, 'part'), (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')), (r'\fbody\}\n', Keyword, '#pop'), ], 'part-attr': [ (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)', bygroups(Punctuation, Name.Attribute, String)), (r'(,\s*)(Content-type:\s*)(.+\n)', bygroups(Punctuation, Name.Attribute, String)), default('#pop'), ], 'part': [ (r'\f(?:part|attachment)\}\n', Keyword, '#pop'), (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')), (r'^Non-text part: .*\n', Comment), (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code), ], } def analyse_text(text): return 1.0 if text.startswith('\fmessage{') else 0.0 def __init__(self, **options): self.body_lexer = options.get('body_lexer', None) RegexLexer.__init__(self, **options)
class ZephirLexer(RegexLexer): """ For `Zephir language <http://zephir-lang.com/>`_ source code. Zephir is a compiled high level language aimed to the creation of C-extensions for PHP. .. versionadded:: 2.0 """ name = 'Zephir' aliases = ['zephir'] filenames = ['*.zep'] zephir_keywords = ['fetch', 'echo', 'isset', 'empty'] zephir_type = ['bit', 'bits', 'string'] flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [ (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline) ], 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') ], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|' r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|' r'empty)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|' r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|' r'float|unsigned|private|protected|public|short|static|self|throws|reverse|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' r'window)\b', Name.Builtin), (r'[$a-zA-Z_][\w\\]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. MIR is a human readable serialization format that's used to represent LLVM's machine specific intermediate representation. It allows LLVM's developers to see the state of the compilation process at various points, as well as test individual pieces of the compiler. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR' aliases = ['llvm-mir'] filenames = ['*.mir'] tokens = { 'root': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents starting with | are LLVM-IR (r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR (r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency (r'[^-#]+|.', Text), ], 'llvm_ir': [ # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), ], 'llvm_mir': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes (r'name:', Keyword, 'name'), (words(('alignment', ), suffix=':'), Keyword, 'number'), (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'), suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'), suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer (r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else (r'.+', Text), (r'\n', Text), ], 'name': [ (r'[^\n]+', Name), default('#pop'), ], 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop'), ], 'number': [ (r' *[0-9]+', Number), default('#pop'), ], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), ], }
class GDScriptLexer(RegexLexer): """ For `Godot source code <https://www.godotengine.org>`_ source code. """ name = 'GDScript' aliases = ['gdscript', 'gd'] filenames = ['*.gd'] mimetypes = ['text/x-gdscript', 'application/x-gdscript'] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|]', Operator), include('keywords'), (r'(func)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), include('builtins'), ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(( 'do', 'var', 'const', 'extends', 'is', 'export', 'onready', 'tool', 'static', 'setget', 'signal', 'breakpoint', 'switch', 'case', 'assert', 'break', 'continue', 'elif', 'else', 'for', 'if', 'pass', 'return', 'while', 'match'), suffix=r'\b'), Keyword), ], 'builtins': [ (words(( 'Color8', 'abs', 'acos', 'asin', 'assert', 'atan', 'atan2', 'bytes2var', 'ceil', 'clamp', 'convert', 'cos', 'cosh', 'db2linear', 'decimals', 'dectime', 'deg2rad', 'dict2inst', 'ease', 'exp', 'floor', 'fmod', 'fposmod', 'funcref', 'hash', 'inst2dict', 'instance_from_id', 'is_inf', 'is_nan', 'lerp', 'linear2db', 'load', 'log', 'max', 'min', 'nearest_po2', 'pow', 'preload', 'print', 'print_stack', 'printerr', 'printraw', 'prints', 'printt', 'rad2deg', 'rand_range', 'rand_seed', 'randf', 'randi', 'randomize', 'range', 'round', 'seed', 'sign', 'sin', 'sinh', 'sqrt', 'stepify', 'str', 'str2var', 'tan', 'tan', 'tanh', 'type_exist', 'typeof', 'var2bytes', 'var2str', 'weakref', 'yield'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'(?<!\.)(self|false|true' r')\b', Name.Builtin.Pseudo), (words(( 'null', 'bool', 'int', 'float', 'String', 'Vector2', 'Vector3', 'Matrix32', 'Array', 'ByteArray', 'IntArray', 'FloatArray', 'StringArray', 'Vector2Array', 'Vector3Array', 'ColorArray', 'Plane', 'Quat', 'AABB', 'Matrix3', 'Transform', 'Color', 'Image', 'NodePath', 'RID', 'Object', 'InputEvent', 'Rect2' ), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin.Type), ], 'numbers': [ (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float), (r'\d+[eE][+-]?[0-9]+j?', Number.Float), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+j?', Number.Integer) ], 'name': [ ('[a-zA-Z_]\w*', Name), ], 'funcname': [ ('[a-zA-Z_]\w*', Name.Function, '#pop'), default('#pop'), ], 'classname': [ ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings-single': innerstring_rules(String.Single), 'strings-double': innerstring_rules(String.Double), 'dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('strings-double') ], 'sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('strings-single') ], 'tdqs': [ (r'"""', String.Double, '#pop'), include('strings-double'), (r'\n', String.Double) ], 'tsqs': [ (r"'''", String.Single, '#pop'), include('strings-single'), (r'\n', String.Single) ], }
class ThriftLexer(RegexLexer): """ For `Thrift <https://thrift.apache.org/>`__ interface definitions. .. versionadded:: 2.1 """ name = 'Thrift' aliases = ['thrift'] filenames = ['*.thrift'] mimetypes = ['application/x-thrift'] tokens = { 'root': [ include('whitespace'), include('comments'), (r'"', String.Double, combined('stringescape', 'dqs')), (r'\'', String.Single, combined('stringescape', 'sqs')), (r'(namespace)(\s+)', bygroups(Keyword.Namespace, Text.Whitespace), 'namespace'), (r'(enum|union|struct|service|exception)(\s+)', bygroups(Keyword.Declaration, Text.Whitespace), 'class'), ( r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)' # return arguments r'((?:[^\W\d]|\$)[\w$]*)' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), include('keywords'), include('numbers'), (r'[&=]', Operator), (r'[:;,{}()<>\[\]]', Punctuation), (r'[a-zA-Z_](\.\w|\w)*', Name), ], 'whitespace': [ (r'\n', Text.Whitespace), (r'\s+', Text.Whitespace), ], 'comments': [ (r'#.*$', Comment), (r'//.*?\n', Comment), (r'/\*[\w\W]*?\*/', Comment.Multiline), ], 'stringescape': [ (r'\\([\\nrt"\'])', String.Escape), ], 'dqs': [ (r'"', String.Double, '#pop'), (r'[^\\"\n]+', String.Double), ], 'sqs': [ (r"'", String.Single, '#pop'), (r'[^\\\'\n]+', String.Single), ], 'namespace': [ (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), default('#pop'), ], 'class': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), default('#pop'), ], 'keywords': [ (r'(async|oneway|extends|throws|required|optional)\b', Keyword), (r'(true|false)\b', Keyword.Constant), (r'(const|typedef)\b', Keyword.Declaration), (words(('cpp_namespace', 'cpp_include', 'cpp_type', 'java_package', 'cocoa_prefix', 'csharp_namespace', 'delphi_namespace', 'php_namespace', 'py_module', 'perl_package', 'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix', 'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace', 'xsd_attrs', 'include'), suffix=r'\b'), Keyword.Namespace), (words( ('void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double', 'string', 'binary', 'map', 'list', 'set', 'slist', 'senum'), suffix=r'\b'), Keyword.Type), (words( ('BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__', '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__', 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin', 'break', 'case', 'catch', 'class', 'clone', 'continue', 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic', 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare', 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile', 'ensure', 'except', 'exec', 'finally', 'float', 'for', 'foreach', 'function', 'global', 'goto', 'if', 'implements', 'import', 'in', 'inline', 'instanceof', 'interface', 'is', 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not', 'or', 'pass', 'public', 'print', 'private', 'protected', 'raise', 'redo', 'rescue', 'retry', 'register', 'return', 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized', 'then', 'this', 'throw', 'transient', 'try', 'undef', 'unless', 'unsigned', 'until', 'use', 'var', 'virtual', 'volatile', 'when', 'while', 'with', 'xor', 'yield'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), ], 'numbers': [ (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float), (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex), (r'[+-]?[0-9]+', Number.Integer), ], }
class CppLexer(CFamilyLexer): """ For C++ source code with preprocessor directives. Additional options accepted: `stdlibhighlighting` Highlight common types found in the C/C++ standard library (e.g. `size_t`). (default: ``True``). `c99highlighting` Highlight common types found in the C99 standard library (e.g. `int8_t`). Actually, this includes all fixed-width integer types. (default: ``True``). `c11highlighting` Highlight atomic types found in the C11 standard library (e.g. `atomic_bool`). (default: ``True``). `platformhighlighting` Highlight common types found in the platform SDK headers (e.g. `clockid_t` on Linux). (default: ``True``). """ name = 'C++' aliases = ['cpp', 'c++'] filenames = [ '*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP' ] mimetypes = ['text/x-c++hdr', 'text/x-c++src'] priority = 0.1 tokens = { 'statements': [ (r'(class|concept|typename)(\s+)', bygroups(Keyword, Text), 'classname'), (words( ('catch', 'const_cast', 'delete', 'dynamic_cast', 'explicit', 'export', 'friend', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', 'reinterpret_cast', 'class', 'restrict', 'static_cast', 'template', 'this', 'throw', 'throws', 'try', 'typeid', 'using', 'virtual', 'constexpr', 'nullptr', 'concept', 'decltype', 'noexcept', 'override', 'final', 'constinit', 'consteval', 'co_await', 'co_return', 'co_yield', 'requires', 'import', 'module', 'typename'), suffix=r'\b'), Keyword), (r'char(16_t|32_t|8_t)\b', Keyword.Type), (r'(enum)(\s+)', bygroups(Keyword, Text), 'enumname'), # C++11 raw strings (r'((?:[LuU]|u8)?R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")', bygroups(String.Affix, String, String.Delimiter, String.Delimiter, String, String.Delimiter, String)), inherit, ], 'root': [ inherit, # C++ Microsoft-isms (words(('virtual_inheritance', 'uuidof', 'super', 'single_inheritance', 'multiple_inheritance', 'interface', 'event'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), # Offload C++ extensions, http://offload.codeplay.com/ (r'__(offload|blockingoffload|outer)\b', Keyword.Pseudo), ], 'enumname': [ include('whitespace'), # 'enum class' and 'enum struct' C++11 support (words(('class', 'struct'), suffix=r'\b'), Keyword), (CFamilyLexer._ident, Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), default('#pop') ] } def analyse_text(text): if re.search('#include <[a-z_]+>', text): return 0.2 if re.search('using namespace ', text): return 0.4
class FactorLexer(RegexLexer): """ Lexer for the `Factor <http://factorcode.org>`_ language. .. versionadded:: 1.4 """ name = 'Factor' aliases = ['factor'] filenames = ['*.factor'] mimetypes = ['text/x-factor'] flags = re.MULTILINE | re.UNICODE builtin_kernel = words( ('-rot', '2bi', '2bi@', '2bi*', '2curry', '2dip', '2drop', '2dup', '2keep', '2nip', '2over', '2tri', '2tri@', '2tri*', '3bi', '3curry', '3dip', '3drop', '3dup', '3keep', '3tri', '4dip', '4drop', '4dup', '4keep', '<wrapper>', '=', '>boolean', 'clone', '?', '?execute', '?if', 'and', 'assert', 'assert=', 'assert?', 'bi', 'bi-curry', 'bi-curry@', 'bi-curry*', 'bi@', 'bi*', 'boa', 'boolean', 'boolean?', 'both?', 'build', 'call', 'callstack', 'callstack>array', 'callstack?', 'clear', '(clone)', 'compose', 'compose?', 'curry', 'curry?', 'datastack', 'die', 'dip', 'do', 'drop', 'dup', 'dupd', 'either?', 'eq?', 'equal?', 'execute', 'hashcode', 'hashcode*', 'identity-hashcode', 'identity-tuple', 'identity-tuple?', 'if', 'if*', 'keep', 'loop', 'most', 'new', 'nip', 'not', 'null', 'object', 'or', 'over', 'pick', 'prepose', 'retainstack', 'rot', 'same?', 'swap', 'swapd', 'throw', 'tri', 'tri-curry', 'tri-curry@', 'tri-curry*', 'tri@', 'tri*', 'tuple', 'tuple?', 'unless', 'unless*', 'until', 'when', 'when*', 'while', 'with', 'wrapper', 'wrapper?', 'xor'), suffix=r'\s') builtin_assocs = words( ('2cache', '<enum>', '>alist', '?at', '?of', 'assoc', 'assoc-all?', 'assoc-any?', 'assoc-clone-like', 'assoc-combine', 'assoc-diff', 'assoc-diff!', 'assoc-differ', 'assoc-each', 'assoc-empty?', 'assoc-filter', 'assoc-filter!', 'assoc-filter-as', 'assoc-find', 'assoc-hashcode', 'assoc-intersect', 'assoc-like', 'assoc-map', 'assoc-map-as', 'assoc-partition', 'assoc-refine', 'assoc-size', 'assoc-stack', 'assoc-subset?', 'assoc-union', 'assoc-union!', 'assoc=', 'assoc>map', 'assoc?', 'at', 'at+', 'at*', 'cache', 'change-at', 'clear-assoc', 'delete-at', 'delete-at*', 'enum', 'enum?', 'extract-keys', 'inc-at', 'key?', 'keys', 'map>assoc', 'maybe-set-at', 'new-assoc', 'of', 'push-at', 'rename-at', 'set-at', 'sift-keys', 'sift-values', 'substitute', 'unzip', 'value-at', 'value-at*', 'value?', 'values', 'zip'), suffix=r'\s') builtin_combinators = words( ('2cleave', '2cleave>quot', '3cleave', '3cleave>quot', '4cleave', '4cleave>quot', 'alist>quot', 'call-effect', 'case', 'case-find', 'case>quot', 'cleave', 'cleave>quot', 'cond', 'cond>quot', 'deep-spread>quot', 'execute-effect', 'linear-case-quot', 'no-case', 'no-case?', 'no-cond', 'no-cond?', 'recursive-hashcode', 'shallow-spread>quot', 'spread', 'to-fixed-point', 'wrong-values', 'wrong-values?'), suffix=r'\s') builtin_math = words( ('-', '/', '/f', '/i', '/mod', '2/', '2^', '<', '<=', '<fp-nan>', '>', '>=', '>bignum', '>fixnum', '>float', '>integer', '(all-integers?)', '(each-integer)', '(find-integer)', '*', '+', '?1+', 'abs', 'align', 'all-integers?', 'bignum', 'bignum?', 'bit?', 'bitand', 'bitnot', 'bitor', 'bits>double', 'bits>float', 'bitxor', 'complex', 'complex?', 'denominator', 'double>bits', 'each-integer', 'even?', 'find-integer', 'find-last-integer', 'fixnum', 'fixnum?', 'float', 'float>bits', 'float?', 'fp-bitwise=', 'fp-infinity?', 'fp-nan-payload', 'fp-nan?', 'fp-qnan?', 'fp-sign', 'fp-snan?', 'fp-special?', 'if-zero', 'imaginary-part', 'integer', 'integer>fixnum', 'integer>fixnum-strict', 'integer?', 'log2', 'log2-expects-positive', 'log2-expects-positive?', 'mod', 'neg', 'neg?', 'next-float', 'next-power-of-2', 'number', 'number=', 'number?', 'numerator', 'odd?', 'out-of-fixnum-range', 'out-of-fixnum-range?', 'power-of-2?', 'prev-float', 'ratio', 'ratio?', 'rational', 'rational?', 'real', 'real-part', 'real?', 'recip', 'rem', 'sgn', 'shift', 'sq', 'times', 'u<', 'u<=', 'u>', 'u>=', 'unless-zero', 'unordered?', 'when-zero', 'zero?'), suffix=r'\s') builtin_sequences = words( ('1sequence', '2all?', '2each', '2map', '2map-as', '2map-reduce', '2reduce', '2selector', '2sequence', '3append', '3append-as', '3each', '3map', '3map-as', '3sequence', '4sequence', '<repetition>', '<reversed>', '<slice>', '?first', '?last', '?nth', '?second', '?set-nth', 'accumulate', 'accumulate!', 'accumulate-as', 'all?', 'any?', 'append', 'append!', 'append-as', 'assert-sequence', 'assert-sequence=', 'assert-sequence?', 'binary-reduce', 'bounds-check', 'bounds-check?', 'bounds-error', 'bounds-error?', 'but-last', 'but-last-slice', 'cartesian-each', 'cartesian-map', 'cartesian-product', 'change-nth', 'check-slice', 'check-slice-error', 'clone-like', 'collapse-slice', 'collector', 'collector-for', 'concat', 'concat-as', 'copy', 'count', 'cut', 'cut-slice', 'cut*', 'delete-all', 'delete-slice', 'drop-prefix', 'each', 'each-from', 'each-index', 'empty?', 'exchange', 'filter', 'filter!', 'filter-as', 'find', 'find-from', 'find-index', 'find-index-from', 'find-last', 'find-last-from', 'first', 'first2', 'first3', 'first4', 'flip', 'follow', 'fourth', 'glue', 'halves', 'harvest', 'head', 'head-slice', 'head-slice*', 'head*', 'head?', 'if-empty', 'immutable', 'immutable-sequence', 'immutable-sequence?', 'immutable?', 'index', 'index-from', 'indices', 'infimum', 'infimum-by', 'insert-nth', 'interleave', 'iota', 'iota-tuple', 'iota-tuple?', 'join', 'join-as', 'last', 'last-index', 'last-index-from', 'length', 'lengthen', 'like', 'longer', 'longer?', 'longest', 'map', 'map!', 'map-as', 'map-find', 'map-find-last', 'map-index', 'map-integers', 'map-reduce', 'map-sum', 'max-length', 'member-eq?', 'member?', 'midpoint@', 'min-length', 'mismatch', 'move', 'new-like', 'new-resizable', 'new-sequence', 'non-negative-integer-expected', 'non-negative-integer-expected?', 'nth', 'nths', 'pad-head', 'pad-tail', 'padding', 'partition', 'pop', 'pop*', 'prefix', 'prepend', 'prepend-as', 'produce', 'produce-as', 'product', 'push', 'push-all', 'push-either', 'push-if', 'reduce', 'reduce-index', 'remove', 'remove!', 'remove-eq', 'remove-eq!', 'remove-nth', 'remove-nth!', 'repetition', 'repetition?', 'replace-slice', 'replicate', 'replicate-as', 'rest', 'rest-slice', 'reverse', 'reverse!', 'reversed', 'reversed?', 'second', 'selector', 'selector-for', 'sequence', 'sequence-hashcode', 'sequence=', 'sequence?', 'set-first', 'set-fourth', 'set-last', 'set-length', 'set-nth', 'set-second', 'set-third', 'short', 'shorten', 'shorter', 'shorter?', 'shortest', 'sift', 'slice', 'slice-error', 'slice-error?', 'slice?', 'snip', 'snip-slice', 'start', 'start*', 'subseq', 'subseq?', 'suffix', 'suffix!', 'sum', 'sum-lengths', 'supremum', 'supremum-by', 'surround', 'tail', 'tail-slice', 'tail-slice*', 'tail*', 'tail?', 'third', 'trim', 'trim-head', 'trim-head-slice', 'trim-slice', 'trim-tail', 'trim-tail-slice', 'unclip', 'unclip-last', 'unclip-last-slice', 'unclip-slice', 'unless-empty', 'virtual-exemplar', 'virtual-sequence', 'virtual-sequence?', 'virtual@', 'when-empty'), suffix=r'\s') builtin_namespaces = words( ('+@', 'change', 'change-global', 'counter', 'dec', 'get', 'get-global', 'global', 'inc', 'init-namespaces', 'initialize', 'is-global', 'make-assoc', 'namespace', 'namestack', 'off', 'on', 'set', 'set-global', 'set-namestack', 'toggle', 'with-global', 'with-scope', 'with-variable', 'with-variables'), suffix=r'\s') builtin_arrays = words( ('1array', '2array', '3array', '4array', '<array>', '>array', 'array', 'array?', 'pair', 'pair?', 'resize-array'), suffix=r'\s') builtin_io = words( ('(each-stream-block-slice)', '(each-stream-block)', '(stream-contents-by-block)', '(stream-contents-by-element)', '(stream-contents-by-length-or-block)', '(stream-contents-by-length)', '+byte+', '+character+', 'bad-seek-type', 'bad-seek-type?', 'bl', 'contents', 'each-block', 'each-block-size', 'each-block-slice', 'each-line', 'each-morsel', 'each-stream-block', 'each-stream-block-slice', 'each-stream-line', 'error-stream', 'flush', 'input-stream', 'input-stream?', 'invalid-read-buffer', 'invalid-read-buffer?', 'lines', 'nl', 'output-stream', 'output-stream?', 'print', 'read', 'read-into', 'read-partial', 'read-partial-into', 'read-until', 'read1', 'readln', 'seek-absolute', 'seek-absolute?', 'seek-end', 'seek-end?', 'seek-input', 'seek-output', 'seek-relative', 'seek-relative?', 'stream-bl', 'stream-contents', 'stream-contents*', 'stream-copy', 'stream-copy*', 'stream-element-type', 'stream-flush', 'stream-length', 'stream-lines', 'stream-nl', 'stream-print', 'stream-read', 'stream-read-into', 'stream-read-partial', 'stream-read-partial-into', 'stream-read-partial-unsafe', 'stream-read-unsafe', 'stream-read-until', 'stream-read1', 'stream-readln', 'stream-seek', 'stream-seekable?', 'stream-tell', 'stream-write', 'stream-write1', 'tell-input', 'tell-output', 'with-error-stream', 'with-error-stream*', 'with-error>output', 'with-input-output+error-streams', 'with-input-output+error-streams*', 'with-input-stream', 'with-input-stream*', 'with-output-stream', 'with-output-stream*', 'with-output>error', 'with-output+error-stream', 'with-output+error-stream*', 'with-streams', 'with-streams*', 'write', 'write1'), suffix=r'\s') builtin_strings = words(('1string', '<string>', '>string', 'resize-string', 'string', 'string?'), suffix=r'\s') builtin_vectors = words( ('1vector', '<vector>', '>vector', '?push', 'vector', 'vector?'), suffix=r'\s') builtin_continuations = words( ('<condition>', '<continuation>', '<restart>', 'attempt-all', 'attempt-all-error', 'attempt-all-error?', 'callback-error-hook', 'callcc0', 'callcc1', 'cleanup', 'compute-restarts', 'condition', 'condition?', 'continuation', 'continuation?', 'continue', 'continue-restart', 'continue-with', 'current-continuation', 'error', 'error-continuation', 'error-in-thread', 'error-thread', 'ifcc', 'ignore-errors', 'in-callback?', 'original-error', 'recover', 'restart', 'restart?', 'restarts', 'rethrow', 'rethrow-restarts', 'return', 'return-continuation', 'thread-error-hook', 'throw-continue', 'throw-restarts', 'with-datastack', 'with-return'), suffix=r'\s') tokens = { 'root': [ # factor allows a file to start with a shebang (r'#!.*$', Comment.Preproc), default('base'), ], 'base': [ (r'\s+', Text), # defining words (r'((?:MACRO|MEMO|TYPED)?:[:]?)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(M:[:]?)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Function)), (r'(C:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Class)), (r'(GENERIC:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(HOOK:|GENERIC#)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Function)), (r'\(\s', Name.Function, 'stackeffect'), (r';\s', Keyword), # imports and namespaces (r'(USING:)(\s+)', bygroups(Keyword.Namespace, Text), 'vocabs'), (r'(USE:|UNUSE:|IN:|QUALIFIED:)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Namespace)), (r'(QUALIFIED-WITH:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text, Name.Namespace)), (r'(FROM:|EXCLUDE:)(\s+)(\S+)(\s+=>\s)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text), 'words'), (r'(RENAME:)(\s+)(\S+)(\s+)(\S+)(\s+=>\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Namespace, Text, Name.Function)), (r'(ALIAS:|TYPEDEF:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function)), (r'(DEFER:|FORGET:|POSTPONE:)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function)), # tuples and classes (r'(TUPLE:|ERROR:)(\s+)(\S+)(\s+<\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class), 'slots'), (r'(TUPLE:|ERROR:|BUILTIN:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class), 'slots'), (r'(MIXIN:|UNION:|INTERSECTION:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'(PREDICATE:)(\s+)(\S+)(\s+<\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class)), (r'(C:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Class)), (r'(INSTANCE:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class)), (r'(SLOT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(SINGLETON:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'SINGLETONS:', Keyword, 'classes'), # other syntax (r'(CONSTANT:|SYMBOL:|MAIN:|HELP:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'SYMBOLS:\s', Keyword, 'words'), (r'SYNTAX:\s', Keyword), (r'ALIEN:\s', Keyword), (r'(STRUCT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'(FUNCTION:)(\s+\S+\s+)(\S+)(\s+\(\s+[^)]+\)\s)', bygroups(Keyword.Namespace, Text, Name.Function, Text)), (r'(FUNCTION-ALIAS:)(\s+)(\S+)(\s+\S+\s+)(\S+)(\s+\(\s+[^)]+\)\s)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function, Text)), # vocab.private (r'(?:<PRIVATE|PRIVATE>)\s', Keyword.Namespace), # strings (r'"""\s+(?:.|\n)*?\s+"""', String), (r'"(?:\\\\|\\"|[^"])*"', String), (r'\S+"\s+(?:\\\\|\\"|[^"])*"', String), (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char), # comments (r'!\s+.*$', Comment), (r'#!\s+.*$', Comment), (r'/\*\s+(?:.|\n)*?\s\*/\s', Comment), # boolean constants (r'[tf]\s', Name.Constant), # symbols and literals (r'[\\$]\s+\S+', Name.Constant), (r'M\\\s+\S+\s+\S+', Name.Constant), # numbers (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s', Number), (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number), (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), (r'0b[01]+\s', Number.Bin), (r'0o[0-7]+\s', Number.Oct), (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), # keywords (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s', Keyword), # builtins (builtin_kernel, Name.Builtin), (builtin_assocs, Name.Builtin), (builtin_combinators, Name.Builtin), (builtin_math, Name.Builtin), (builtin_sequences, Name.Builtin), (builtin_namespaces, Name.Builtin), (builtin_arrays, Name.Builtin), (builtin_io, Name.Builtin), (builtin_strings, Name.Builtin), (builtin_vectors, Name.Builtin), (builtin_continuations, Name.Builtin), # everything else is text (r'\S+', Text), ], 'stackeffect': [ (r'\s+', Text), (r'\(\s+', Name.Function, 'stackeffect'), (r'\)\s', Name.Function, '#pop'), (r'--\s', Name.Function), (r'\S+', Name.Variable), ], 'slots': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)', bygroups(Text, Name.Variable, Text)), (r'\S+', Name.Variable), ], 'vocabs': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Namespace), ], 'classes': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Class), ], 'words': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Function), ], }
class KokaLexer(RegexLexer): """ Lexer for the `Koka <http://koka.codeplex.com>`_ language. .. versionadded:: 1.6 """ name = 'Koka' aliases = ['koka'] filenames = ['*.kk', '*.kki'] mimetypes = ['text/x-koka'] keywords = [ 'infix', 'infixr', 'infixl', 'type', 'cotype', 'rectype', 'alias', 'struct', 'con', 'fun', 'function', 'val', 'var', 'external', 'if', 'then', 'else', 'elif', 'return', 'match', 'private', 'public', 'private', 'module', 'import', 'as', 'include', 'inline', 'rec', 'try', 'yield', 'enum', 'interface', 'instance', ] # keywords that are followed by a type typeStartKeywords = [ 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', ] # keywords valid in a type typekeywords = [ 'forall', 'exists', 'some', 'with', ] # builtin names and special names builtin = [ 'for', 'while', 'repeat', 'foreach', 'foreach-indexed', 'error', 'catch', 'finally', 'cs', 'js', 'file', 'ref', 'assigned', ] # symbols that can be in an operator symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' # symbol boundary: an operator keyword should not be followed by any of these sboundary = '(?!' + symbols + ')' # name boundary: a keyword should not be followed by any of these boundary = r'(?![\w/])' # koka token abstractions tokenType = Name.Attribute tokenTypeDef = Name.Class tokenConstructor = Generic.Emph # main lexer tokens = { 'root': [ include('whitespace'), # go into type mode (r'::?' + sboundary, tokenType, 'type'), (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'alias-type'), (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'struct-type'), ((r'(%s)' % '|'.join(typeStartKeywords)) + r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 'type'), # special sequences of tokens (we use ?: for non-capturing group as # required by 'bygroups') (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', bygroups(Keyword, Text, Keyword, Name.Namespace)), (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' r'((?:[a-z]\w*/)*[a-z]\w*))?', bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, Keyword, Name.Namespace)), (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', bygroups(Keyword, Text, Name.Function)), (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' r'([a-z]\w*|\((?:' + symbols + r'|/)\))', bygroups(Keyword, Text, Keyword, Name.Function)), # keywords (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), (r'(%s)' % '|'.join(keywords) + boundary, Keyword), (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), (r'::?|:=|\->|[=.]' + sboundary, Keyword), # names (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace, tokenConstructor)), (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', bygroups(Name.Namespace, Name)), (r'_\w*', Name.Variable), # literal string (r'@"', String.Double, 'litstring'), # operators (symbols + "|/(?![*/])", Operator), (r'`', Operator), (r'[{}()\[\];,]', Punctuation), # literals. No check for literal characters with len > 1 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), (r'0[xX][0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r"'", String.Char, 'char'), (r'"', String.Double, 'string'), ], # type started by alias 'alias-type': [(r'=', Keyword), include('type')], # type started by struct 'struct-type': [(r'(?=\((?!,*\)))', Punctuation, '#pop'), include('type')], # type started by colon 'type': [(r'[(\[<]', tokenType, 'type-nested'), include('type-content')], # type nested in brackets: can contain parameters, comma etc. 'type-nested': [ (r'[)\]>]', tokenType, '#pop'), (r'[(\[<]', tokenType, 'type-nested'), (r',', tokenType), (r'([a-z]\w*)(\s*)(:)(?!:)', bygroups(Name, Text, tokenType)), # parameter name include('type-content') ], # shared contents of a type 'type-content': [ include('whitespace'), # keywords (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', Keyword, '#pop'), # need to match because names overlap... # kinds (r'[EPHVX]' + boundary, tokenType), # type names (r'[a-z][0-9]*(?![\w/])', tokenType), (r'_\w*', tokenType.Variable), # Generic.Emph (r'((?:[a-z]\w*/)*)([A-Z]\w*)', bygroups(Name.Namespace, tokenType)), (r'((?:[a-z]\w*/)*)([a-z]\w+)', bygroups(Name.Namespace, tokenType)), # type keyword operators (r'::|->|[.:|]', tokenType), # catchall default('#pop') ], # comments and literals 'whitespace': [(r'\n\s*#.*$', Comment.Preproc), (r'\s+', Text), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment.Single)], 'comment': [ (r'[^/*]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'litstring': [ (r'[^"]+', String.Double), (r'""', String.Escape), (r'"', String.Double, '#pop'), ], 'string': [ (r'[^\\"\n]+', String.Double), include('escape-sequence'), (r'["\n]', String.Double, '#pop'), ], 'char': [ (r'[^\\\'\n]+', String.Char), include('escape-sequence'), (r'[\'\n]', String.Char, '#pop'), ], 'escape-sequence': [ (r'\\[nrt\\"\']', String.Escape), (r'\\x[0-9a-fA-F]{2}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), # Yes, \U literals are 6 hex digits. (r'\\U[0-9a-fA-F]{6}', String.Escape) ] }
class BatchLexer(RegexLexer): """ Lexer for the DOS/Windows Batch file format. .. versionadded:: 0.7 """ name = 'Batchfile' aliases = ['bat', 'batch', 'dosbatch', 'winbatch'] filenames = ['*.bat', '*.cmd'] mimetypes = ['application/x-dos-batch'] flags = re.MULTILINE | re.IGNORECASE _nl = r'\n\x1a' _punct = r'&<>|' _ws = r'\t\v\f\r ,;=\xa0' _nlws = r'\s\x1a\xa0,;=' _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws) _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' % (_nl, _ws, _nl, _punct)) _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl) _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws _label = r'(?:(?:[^%s%s+:^]|\^[%s]?[\w\W])*)' % (_nlws, _punct, _nl) _label_compound = r'(?:(?:[^%s%s+:^)]|\^[%s]?[^)])*)' % (_nlws, _punct, _nl) _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator _opword = r'(?:equ|geq|gtr|leq|lss|neq)' _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl) _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|' r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|' r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|' r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:' r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' % (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl)) _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s])+)' % (_nl, _nlws, _punct) _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s)])+)' % (_nl, _nlws, _punct) _token = r'(?:[%s]+|%s)' % (_punct, _core_token) _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound) _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token)) def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ] def _make_follow_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _space=_space, _start_label=_start_label, _token=_token, _token_compound=_token_compound, _ws=_ws): suffix = '/compound' if compound else '' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'%s([%s]*)(%s)(.*)' % (_start_label, _ws, _label_compound if compound else _label), bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)), include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'), (r'\|\|?|&&?', Punctuation, '#pop'), include('text') ] return state def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable, _ws=_ws, _nlws=_nlws): op = r'=+\-*/!~' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'[(),]+', Punctuation), (r'([%s]|%%|\^\^)+' % op, Operator), (r'(%s|%s|(\^[%s]?)?[^()%s%%\^"%s%s]|\^[%s]?%s)+' % (_string, _variable, _nl, op, _nlws, _punct, _nlws, r'[^)]' if compound else r'[\w\W]'), using(this, state='variable')), (r'(?=[\x00|&])', Text, '#pop'), include('follow')] return state def _make_call_state(compound, _label=_label, _label_compound=_label_compound): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(:?)(%s)' % (_label_compound if compound else _label), bygroups(Punctuation, Name.Label), '#pop')) return state def _make_label_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append( (r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' % (_label_compound if compound else _label, _string, _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl, _punct, r')' if compound else ''), bygroups(Name.Label, Comment.Single), '#pop')) return state def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _nlws=_nlws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s])\d)?)(>>?&|<&)([%s]*)(\d)' % (_nlws, _nlws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nlws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ] tokens = { 'root': _make_begin_state(False), 'follow': _make_follow_state(False), 'arithmetic': _make_arithmetic_state(False), 'call': _make_call_state(False), 'label': _make_label_state(False), 'redirect': _make_redirect_state(False), 'root/compound': _make_begin_state(True), 'follow/compound': _make_follow_state(True), 'arithmetic/compound': _make_arithmetic_state(True), 'call/compound': _make_call_state(True), 'label/compound': _make_label_state(True), 'redirect/compound': _make_redirect_state(True), 'variable-or-escape': [(_variable, Name.Variable), (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)], 'string': [(r'"', String.Double, '#pop'), (_variable, Name.Variable), (r'\^!|%%', String.Escape), (r'[^"%%^%s]+|[%%^]' % _nl, String.Double), default('#pop')], 'sqstring': [include('variable-or-escape'), (r'[^%]+|%', String.Single)], 'bqstring': [include('variable-or-escape'), (r'[^%]+|%', String.Backtick)], 'text': [(r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s%s\d)]+|.' % (_nlws, _punct), Text)], 'variable': [(r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s]+|.' % _nl, Name.Variable)], 'for': [(r'(%s)(in)(%s)(\()' % (_space, _space), bygroups(using(this, state='text'), Keyword, using(this, state='text'), Punctuation), '#pop'), include('follow')], 'for2': [(r'\)', Punctuation), (r'(%s)(do%s)' % (_space, _token_terminator), bygroups(using(this, state='text'), Keyword), '#pop'), (r'[%s]+' % _nl, Text), include('follow')], 'for/f': [(r'(")((?:%s|[^"])*?")([%s]*)(\))' % (_variable, _nlws), bygroups(String.Double, using(this, state='string'), Text, Punctuation)), (r'"', String.Double, ('#pop', 'for2', 'string')), (r"('(?:%%%%|%s|[\w\W])*?')([%s]*)(\))" % (_variable, _nlws), bygroups(using(this, state='sqstring'), Text, Punctuation)), (r'(`(?:%%%%|%s|[\w\W])*?`)([%s]*)(\))' % (_variable, _nlws), bygroups(using(this, state='bqstring'), Text, Punctuation)), include('for2')], 'for/l': [(r'-?\d+', Number.Integer), include('for2')], 'if': [ (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' % (_token_terminator, _space), bygroups(Keyword, using(this, state='text'), Number.Integer), '#pop'), (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text'), using(this, state='variable')), '#pop'), (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text')), '#pop'), (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number), bygroups(using(this, state='arithmetic'), Operator.Word, using(this, state='arithmetic')), '#pop'), (_stoken, using(this, state='text'), ('#pop', 'if2')), ], 'if2': [(r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken), bygroups(using(this, state='text'), Operator, using(this, state='text')), '#pop'), (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken), bygroups(using(this, state='text'), Operator.Word, using(this, state='text')), '#pop')], '(?': [(_space, using(this, state='text')), (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')), default('#pop')], 'else?': [(_space, using(this, state='text')), (r'else%s' % _token_terminator, Keyword, '#pop'), default('#pop')] }
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words(( '@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation (r'([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [ (r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop') ], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods (r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ]
class CoqLexer(RegexLexer): """ For the `Coq <http://coq.inria.fr/>`_ theorem prover. .. versionadded:: 1.5 """ name = 'Coq' aliases = ['coq'] filenames = ['*.v'] mimetypes = ['text/x-coq'] keywords1 = ( # Vernacular commands 'Section', 'Module', 'End', 'Require', 'Import', 'Export', 'Variable', 'Variables', 'Parameter', 'Parameters', 'Axiom', 'Hypothesis', 'Hypotheses', 'Notation', 'Local', 'Tactic', 'Reserved', 'Scope', 'Open', 'Close', 'Bind', 'Delimit', 'Definition', 'Let', 'Ltac', 'Fixpoint', 'CoFixpoint', 'Morphism', 'Relation', 'Implicit', 'Arguments', 'Set', 'Unset', 'Contextual', 'Strict', 'Prenex', 'Implicits', 'Inductive', 'CoInductive', 'Record', 'Structure', 'Canonical', 'Coercion', 'Theorem', 'Lemma', 'Corollary', 'Proposition', 'Fact', 'Remark', 'Example', 'Proof', 'Goal', 'Save', 'Qed', 'Defined', 'Hint', 'Resolve', 'Rewrite', 'View', 'Search', 'Show', 'Print', 'Printing', 'All', 'Graph', 'Projections', 'inside', 'outside', 'Check', ) keywords2 = ( # Gallina 'forall', 'exists', 'exists2', 'fun', 'fix', 'cofix', 'struct', 'match', 'end', 'in', 'return', 'let', 'if', 'is', 'then', 'else', 'for', 'of', 'nosimpl', 'with', 'as', ) keywords3 = ( # Sorts 'Type', 'Prop', ) keywords4 = ( # Tactics 'pose', 'set', 'move', 'case', 'elim', 'apply', 'clear', 'hnf', 'intro', 'intros', 'generalize', 'rename', 'pattern', 'after', 'destruct', 'induction', 'using', 'refine', 'inversion', 'injection', 'rewrite', 'congr', 'unlock', 'compute', 'ring', 'field', 'replace', 'fold', 'unfold', 'change', 'cutrewrite', 'simpl', 'have', 'suff', 'wlog', 'suffices', 'without', 'loss', 'nat_norm', 'assert', 'cut', 'trivial', 'revert', 'bool_congr', 'nat_congr', 'symmetry', 'transitivity', 'auto', 'split', 'left', 'right', 'autorewrite', 'tauto', ) keywords5 = ( # Terminators 'by', 'done', 'exact', 'reflexivity', 'tauto', 'romega', 'omega', 'assumption', 'solve', 'contradiction', 'discriminate', ) keywords6 = ( # Control 'do', 'last', 'first', 'try', 'idtac', 'repeat', ) # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', # 'downto', 'else', 'end', 'exception', 'external', 'false', # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', # 'type', 'val', 'virtual', 'when', 'while', 'with' keyopts = ( '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', '<-', '<->', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~', '=>', r'/\\', r'\\/', u'Π', u'λ', ) operators = r'[!$%&*+\./:<=>?@^|~-]' word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') tokens = { 'root': [ (r'\s+', Text), (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), (r'\(\*', Comment, 'comment'), (words(keywords1, prefix=r'\b', suffix=r'\b'), Keyword.Namespace), (words(keywords2, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords3, prefix=r'\b', suffix=r'\b'), Keyword.Type), (words(keywords4, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords5, prefix=r'\b', suffix=r'\b'), Keyword.Pseudo), (words(keywords6, prefix=r'\b', suffix=r'\b'), Keyword.Reserved), (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][\w\']*)', Name.Class), (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), (r'0[oO][0-7][0-7_]*', Number.Oct), (r'0[bB][01][01_]*', Number.Bin), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name.Variable), ], 'comment': [ (r'[^(*)]+', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), (r'[(*)]', Comment), ], 'string': [ (r'[^"]+', String.Double), (r'""', String.Double), (r'"', String.Double, '#pop'), ], 'dotted': [(r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), default('#pop')], } def analyse_text(text): if text.startswith('(*'): return True
class RubyLexer(ExtendedRegexLexer): """ For `Ruby <http://www.ruby-lang.org>`_ source code. """ name = 'Ruby' aliases = ['rb', 'ruby', 'duby'] filenames = [ '*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby', 'Gemfile' ] mimetypes = ['text/x-ruby', 'application/x-ruby'] flags = re.DOTALL | re.MULTILINE def heredoc_callback(self, match, ctx): # okay, this is the hardest part of parsing Ruby... # match: 1 = <<[-~]?, 2 = quote? 3 = name 4 = quote? 5 = rest of line start = match.start(1) yield start, Operator, match.group(1) # <<[-~]? yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` yield match.start(3), String.Delimiter, match.group(3) # heredoc name yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) heredocstack.append((match.group(1) in ('<<-', '<<~'), match.group(3))) ctx.pos = match.start(5) ctx.end = match.end(5) # this may find other heredocs for i, t, v in self.get_tokens_unprocessed(context=ctx): yield i, t, v ctx.pos = match.end() if outermost: # this is the outer heredoc again, now we can process them all for tolerant, hdname in heredocstack: lines = [] for match in line_re.finditer(ctx.text, ctx.pos): if tolerant: check = match.group().strip() else: check = match.group().rstrip() if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group( ) yield match.start(), String.Delimiter, match.group() ctx.pos = match.end() break else: lines.append(match) else: # end of heredoc not found -- error! for amatch in lines: yield amatch.start(), Error, amatch.group() ctx.end = len(ctx.text) del heredocstack[:] def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group( 4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-' + name] = [ include('string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name + '-intp-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[QWx]?' + lbrace, String.Other, name + '-intp-string')) states[name + '-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[qsw]' + lbrace, String.Other, name + '-string')) states[name + '-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[mixounse]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append( (r'%r' + lbrace, String.Regex, name + '-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', intp_regex_callback), # regular fancy strings with qsw (r'%[qsw]([\W_])((?:\\\1|(?!\1).)*)\1', String.Other), (r'(%[QWx]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states tokens = { 'root': [ (r'\A#!.+?$', Comment.Hashbang), (r'#.*?$', Comment.Single), (r'=begin\s.*?\n=end.*?$', Comment.Multiline), # keywords (words( ('BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if', 'in', 'next', 'redo', 'rescue', 'raise', 'retry', 'return', 'super', 'then', 'undef', 'unless', 'until', 'when', 'while', 'yield'), suffix=r'\b'), Keyword), # start of function, class and module names (r'(module)(\s+)([a-zA-Z_]\w*' r'(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # special methods (words(('initialize', 'new', 'loop', 'include', 'extend', 'raise', 'attr_reader', 'attr_writer', 'attr_accessor', 'attr', 'catch', 'throw', 'private', 'module_function', 'public', 'protected', 'true', 'false', 'nil'), suffix=r'\b'), Keyword.Pseudo), (r'(not|and|or)\b', Operator.Word), (words(('autoload', 'block_given', 'const_defined', 'eql', 'equal', 'frozen', 'include', 'instance_of', 'is_a', 'iterator', 'kind_of', 'method_defined', 'nil', 'private_method_defined', 'protected_method_defined', 'public_method_defined', 'respond_to', 'tainted'), suffix=r'\?'), Name.Builtin), (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin), (words( ('Array', 'Float', 'Integer', 'String', '__id__', '__send__', 'abort', 'ancestors', 'at_exit', 'autoload', 'binding', 'callcc', 'caller', 'catch', 'chomp', 'chop', 'class_eval', 'class_variables', 'clone', 'const_defined?', 'const_get', 'const_missing', 'const_set', 'constants', 'display', 'dup', 'eval', 'exec', 'exit', 'extend', 'fail', 'fork', 'format', 'freeze', 'getc', 'gets', 'global_variables', 'gsub', 'hash', 'id', 'included_modules', 'inspect', 'instance_eval', 'instance_method', 'instance_methods', 'instance_variable_get', 'instance_variable_set', 'instance_variables', 'lambda', 'load', 'local_variables', 'loop', 'method', 'method_missing', 'methods', 'module_eval', 'name', 'object_id', 'open', 'p', 'print', 'printf', 'private_class_method', 'private_instance_methods', 'private_methods', 'proc', 'protected_instance_methods', 'protected_methods', 'public_class_method', 'public_instance_methods', 'public_methods', 'putc', 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'scan', 'select', 'self', 'send', 'set_trace_func', 'singleton_methods', 'sleep', 'split', 'sprintf', 'srand', 'sub', 'syscall', 'system', 'taint', 'test', 'throw', 'to_a', 'to_s', 'trace_var', 'trap', 'untaint', 'untrace_var', 'warn'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo), # normal heredocs (r'(?<!\w)(<<[-~]?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback), # empty string heredocs (r'(<<[-~]?)("|\')()(\2)(.*?\n)', heredoc_callback), (r'__END__', Comment.Preproc, 'end-part'), # multiline regex (after keywords or files) (r'(?:^|(?<=[=<>~!:])|' r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' r'(?<=\.index\s)|' r'(?<=\.scan\s)|' r'(?<=\.sub\s)|' r'(?<=\.sub!\s)|' r'(?<=\.gsub\s)|' r'(?<=\.gsub!\s)|' r'(?<=\.match\s)|' r'(?<=(?:\s|;)if\s)|' r'(?<=(?:\s|;)elsif\s)|' r'(?<=^when\s)|' r'(?<=^index\s)|' r'(?<=^scan\s)|' r'(?<=^sub\s)|' r'(?<=^gsub\s)|' r'(?<=^sub!\s)|' r'(?<=^gsub!\s)|' r'(?<=^match\s)|' r'(?<=^if\s)|' r'(?<=^elsif\s)' r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'), # multiline regex (in method calls or subscripts) (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'), # multiline regex (this time the funny no whitespace rule) (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex), 'multiline-regex'), # lex numbers and ignore following regular expressions which # are division operators in fact (grrrr. i hate that. any # better ideas?) # since pygments 0.7 we also eat a "?" operator after numbers # so that the char operator does not work. Chars are not allowed # there so that you can use the ternary operator. # stupid example: # x>=0?n[x]:"" (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', bygroups(Number.Oct, Text, Operator)), (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?', bygroups(Number.Hex, Text, Operator)), (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?', bygroups(Number.Bin, Text, Operator)), (r'([\d]+(?:_\d+)*)(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), # Names (r'@@[a-zA-Z_]\w*', Name.Variable.Class), (r'@[a-zA-Z_]\w*', Name.Variable.Instance), (r'\$\w+', Name.Variable.Global), (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global), (r'\$-[0adFiIlpvw]', Name.Variable.Global), (r'::', Operator), include('strings'), # chars ( r'\?(\\[MC]-)*' # modifiers r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' r'(?!\w)', String.Char), (r'[A-Z]\w+', Name.Constant), # this is needed because ruby attributes can look # like keywords (class) or like this: ` ?!? (words(RUBY_OPERATORS, prefix=r'(\.|::)'), bygroups(Operator, Name.Operator)), (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])', bygroups(Operator, Name)), (r'[a-zA-Z_]\w*[!?]?', Name), (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|' r'!~|&&?|\|\||\.{1,3})', Operator), (r'[-+/*%=<>&!^|~]=?', Operator), (r'[(){};,/?:\\]', Punctuation), (r'\s+', Text) ], 'funcname': [(r'\(', Punctuation, 'defexpr'), (r'(?:([a-zA-Z_]\w*)(\.))?' r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), default('#pop')], 'classname': [(r'\(', Punctuation, 'defexpr'), (r'<<', Operator, '#pop'), (r'[A-Z_]\w*', Name.Class, '#pop'), default('#pop')], 'defexpr': [(r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'), (r'\(', Operator, '#push'), include('root')], 'in-intp': [ (r'\{', String.Interpol, '#push'), (r'\}', String.Interpol, '#pop'), include('root'), ], 'string-intp': [(r'#\{', String.Interpol, 'in-intp'), (r'#@@?[a-zA-Z_]\w*', String.Interpol), (r'#\$[a-zA-Z_]\w*', String.Interpol)], 'string-intp-escaped': [ include('string-intp'), (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape) ], 'interpolated-regex': [ include('string-intp'), (r'[\\#]', String.Regex), (r'[^\\#]+', String.Regex), ], 'interpolated-string': [ include('string-intp'), (r'[\\#]', String.Other), (r'[^\\#]+', String.Other), ], 'multiline-regex': [ include('string-intp'), (r'\\\\', String.Regex), (r'\\/', String.Regex), (r'[\\#]', String.Regex), (r'[^\\/#]+', String.Regex), (r'/[mixounse]*', String.Regex, '#pop'), ], 'end-part': [(r'.+', Comment.Preproc, '#pop')] } tokens.update(gen_rubystrings_rules()) def analyse_text(text): return shebang_matches(text, r'ruby(1\.\d)?')
class Python3Lexer(RegexLexer): """ For `Python <http://www.python.org>`_ source code (version 3.0). .. versionadded:: 0.10 """ name = 'Python 3' aliases = ['python3', 'py3'] filenames = [] # Nothing until Python 3 gets widespread mimetypes = ['text/x-python3', 'application/x-python3'] flags = re.MULTILINE | re.UNICODE uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsaux%]', String.Interpol), # the new style '{}'.format(...) string formatting ( r'\{' '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name '(\![sra])?' # conversion '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (words(('assert', 'async', 'await', 'break', 'continue', 'del', 'elif', 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), ] tokens['builtins'] = [ (words(('__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'classmethod', 'cmp', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo), ( words( ( 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning', 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning', 'WindowsError', 'ZeroDivisionError', # new builtin exceptions from PEP 3151 'BlockingIOError', 'ChildProcessError', 'ConnectionError', 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError', 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError', 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError', 'PermissionError', 'ProcessLookupError', 'TimeoutError'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Exception), ] tokens['magicfuncs'] = [ (words( ('__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__', '__and__', '__anext__', '__await__', '__bool__', '__bytes__', '__call__', '__complex__', '__contains__', '__del__', '__delattr__', '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__', '__eq__', '__exit__', '__float__', '__floordiv__', '__format__', '__ge__', '__get__', '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__import__', '__imul__', '__index__', '__init__', '__instancecheck__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__', '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__next__', '__or__', '__pos__', '__pow__', '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__', '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__', '__sub__', '__subclasscheck__', '__truediv__', '__xor__'), suffix=r'\b'), Name.Function.Magic), ] tokens['magicvars'] = [ (words(('__annotations__', '__bases__', '__class__', '__closure__', '__code__', '__defaults__', '__dict__', '__doc__', '__file__', '__func__', '__globals__', '__kwdefaults__', '__module__', '__mro__', '__name__', '__objclass__', '__qualname__', '__self__', '__slots__', '__weakref__'), suffix=r'\b'), Name.Variable.Magic), ] tokens['numbers'] = [ (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+[eE][+-]?[0-9]+j?', Number.Float), (r'0[oO][0-7]+', Number.Oct), (r'0[bB][01]+', Number.Bin), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+', Number.Integer) ] tokens['backtick'] = [] tokens['name'] = [ (r'@\w+', Name.Decorator), (r'@', Operator), # new matrix multiplication operator (uni_name, Name), ] tokens['funcname'] = [(uni_name, Name.Function, '#pop')] tokens['classname'] = [(uni_name, Name.Class, '#pop')] tokens['import'] = [ (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), (r'\.', Name.Namespace), (uni_name, Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), default('#pop') # all else: go back ] tokens['fromimport'] = [ (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'), (r'\.', Name.Namespace), (uni_name, Name.Namespace), default('#pop'), ] tokens['strings-single'] = innerstring_rules(String.Single) tokens['strings-double'] = innerstring_rules(String.Double) def analyse_text(text): return shebang_matches(text, r'pythonw?3(\.\d)?')
'string-single': [ (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Single), (r'#\{', String.Interpol, 'interpolation'), (r"'", String.Single, '#pop'), ], 'string-url': [ (r'(\\#|#(?=[^\n{])|[^\n#)])+', String.Other), (r'#\{', String.Interpol, 'interpolation'), (r'\)', String.Other, '#pop'), ], 'pseudo-class': [ (r'[\w-]+', Name.Decorator), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'class': [ (r'[\w-]+', Name.Class), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'id': [ (r'[\w-]+', Name.Namespace), (r'#\{', String.Interpol, 'interpolation'), default('#pop'), ], 'for': [
"string-double": [ (r'(\\.|#(?=[^\n{])|[^\n"#])+', String.Double), (r"#\{", String.Interpol, "interpolation"), (r'"', String.Double, "#pop"), ], "string-single": [ (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Double), (r"#\{", String.Interpol, "interpolation"), (r"'", String.Double, "#pop"), ], "string-url": [ (r"(\\#|#(?=[^\n{])|[^\n#)])+", String.Other), (r"#\{", String.Interpol, "interpolation"), (r"\)", String.Other, "#pop"), ], "pseudo-class": [(r"[\w-]+", Name.Decorator), (r"#\{", String.Interpol, "interpolation"), default("#pop")], "class": [(r"[\w-]+", Name.Class), (r"#\{", String.Interpol, "interpolation"), default("#pop")], "id": [(r"[\w-]+", Name.Namespace), (r"#\{", String.Interpol, "interpolation"), default("#pop")], "for": [(r"(from|to|through)", Operator.Word), include("value")], } def _indentation(lexer, match, ctx): indentation = match.group(0) yield match.start(), Text, indentation ctx.last_indentation = indentation ctx.pos = match.end() if ( hasattr(ctx, "block_state") and ctx.block_state