class LlvmLexer(RegexLexer): """ For LLVM assembly code. """ name = 'LLVM' aliases = ['llvm'] filenames = ['*.ll'] mimetypes = ['text/x-llvm'] #: optional Comment or Whitespace string = r'"[^"]*?"' identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' tokens = { 'root': [ include('whitespace'), # Before keywords, because keywords are valid label names :(... (identifier + r'\s*:', Name.Label), include('keyword'), (r'%' + identifier, Name.Variable), (r'@' + identifier, Name.Variable.Global), (r'%\d+', Name.Variable.Anonymous), (r'@\d+', Name.Variable.Global), (r'#\d+', Name.Variable.Global), (r'!' + identifier, Name.Variable), (r'!\d+', Name.Variable.Anonymous), (r'c?' + string, String), (r'0[xX][a-fA-F0-9]+', Number), (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) ], 'whitespace': [(r'(\n|\s)+', Text), (r';.*?\n', Comment)], 'keyword': [ # Regular keywords (words(( 'acq_rel', 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias', 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 'allocsize', 'allOnes', 'alwaysinline', 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gs', 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp', 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc', 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 'attributes', 'available_externally', 'avr_intrcc', 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress', 'br', 'branchFunnel', 'builtin', 'byArg', 'byte', 'byteArray', 'byval', 'c', 'call', 'callee', 'caller', 'calls', 'catch', 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 'cleanup', 'cleanuppad', 'cleanupret', 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant', 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 'datalayout', 'declare', 'default', 'define', 'deplibs', 'dereferenceable', 'dereferenceable_or_null', 'distinct', 'dllexport', 'dllimport', 'dso_local', 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch', 'extern_weak', 'external', 'externally_initialized', 'extractelement', 'extractvalue', 'fadd', 'false', 'fast', 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze', 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc', 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash', 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp', 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir', 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits', 'inlinehint', 'inrange', 'inreg', 'insertelement', 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect', 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind', 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr', 'live', 'load', 'local_unnamed_addr', 'localdynamic', 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 'module', 'monotonic', 'msp430_intrcc', 'mul', 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf', 'nnan', 'noalias', 'nobuiltin', 'nocapture', 'nocf_check', 'noduplicate', 'noduplicates', 'noimplicitfloat', 'noinline', 'none', 'nonlazybind', 'nonnull', 'norecurse', 'noRecurse', 'noredzone', 'noreturn', 'notail', 'notEligibleToImport', 'nounwind', 'nsw', 'nsz', 'null', 'nuw', 'oeq', 'offset', 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing', 'optnone', 'optsize', 'or', 'ord', 'path', 'personality', 'phi', 'poison', 'prefix', 'preserve_allcc', 'preserve_mostcc', 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device', 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly', 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume', 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 'safestack', 'samesize', 'sanitize_address', 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl', 'shufflevector', 'sideeffect', 'signext', 'single', 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 'sizeM1BitWidth', 'sle', 'slt', 'source_filename', 'speculatable', 'spir_func', 'spir_kernel', 'srem', 'sret', 'ssp', 'sspreq', 'sspstrong', 'store', 'strictfp', 'sub', 'summaries', 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail', 'target', 'thread_local', 'to', 'token', 'triple', 'true', 'trunc', 'type', 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls', 'typeid', 'typeIdInfo', 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr', 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem', 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg', 'variable', 'vFuncId', 'virtualConstProp', 'void', 'volatile', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx', 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext', 'zeroinitializer', 'zext', 'immarg', 'willreturn'), suffix=r'\b'), Keyword), # Types (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128', 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type), # Integer types (r'i[1-9]\d*', Keyword.Type) ] }
class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. MIR is a human readable serialization format that's used to represent LLVM's machine specific intermediate representation. It allows LLVM's developers to see the state of the compilation process at various points, as well as test individual pieces of the compiler. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR' aliases = ['llvm-mir'] filenames = ['*.mir'] tokens = { 'root': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents starting with | are LLVM-IR (r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR (r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency (r'[^-#]+|.', Text), ], 'llvm_ir': [ # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), ], 'llvm_mir': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes (r'name:', Keyword, 'name'), (words(('alignment', ), suffix=':'), Keyword, 'number'), (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'), suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'), suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer (r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else (r'.+', Text), (r'\n', Text), ], 'name': [ (r'[^\n]+', Name), default('#pop'), ], 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop'), ], 'number': [ (r' *[0-9]+', Number), default('#pop'), ], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), ], }
class SystemVerilogLexer(RegexLexer): """ Extends verilog lexer to recognise all SystemVerilog keywords from IEEE 1800-2009 standard. .. versionadded:: 1.5 """ name = 'systemverilog' aliases = ['systemverilog', 'sv'] filenames = ['*.sv', '*.svh'] mimetypes = ['text/x-systemverilog'] # : optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'root': [ (r'^\s*`define', Comment.Preproc, 'macro'), (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)), (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), 'import'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), (r'[{}#@]', Punctuation), (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'([0-9]+)|(\'h)[0-9a-fA-F]+', Number.Hex), (r'([0-9]+)|(\'b)[01]+', Number.Bin), (r'([0-9]+)|(\'d)[0-9]+', Number.Integer), (r'([0-9]+)|(\'o)[0-7]+', Number.Oct), (r'\'[01xz]', Number), (r'\d+[Ll]?', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.;\']', Punctuation), (r'`[a-zA-Z_]\w*', Name.Constant), (words( ('accept_on', 'alias', 'always', 'always_comb', 'always_ff', 'always_latch', 'and', 'assert', 'assign', 'assume', 'automatic', 'before', 'begin', 'bind', 'bins', 'binsof', 'bit', 'break', 'buf', 'bufif0', 'bufif1', 'byte', 'case', 'casex', 'casez', 'cell', 'chandle', 'checker', 'class', 'clocking', 'cmos', 'config', 'const', 'constraint', 'context', 'continue', 'cover', 'covergroup', 'coverpoint', 'cross', 'deassign', 'default', 'defparam', 'design', 'disable', 'dist', 'do', 'edge', 'else', 'end', 'endcase', 'endchecker', 'endclass', 'endclocking', 'endconfig', 'endfunction', 'endgenerate', 'endgroup', 'endinterface', 'endmodule', 'endpackage', 'endprimitive', 'endprogram', 'endproperty', 'endsequence', 'endspecify', 'endtable', 'endtask', 'enum', 'event', 'eventually', 'expect', 'export', 'extends', 'extern', 'final', 'first_match', 'for', 'force', 'foreach', 'forever', 'fork', 'forkjoin', 'function', 'generate', 'genvar', 'global', 'highz0', 'highz1', 'if', 'iff', 'ifnone', 'ignore_bins', 'illegal_bins', 'implies', 'import', 'incdir', 'include', 'initial', 'inout', 'input', 'inside', 'instance', 'int', 'integer', 'interface', 'intersect', 'join', 'join_any', 'join_none', 'large', 'let', 'liblist', 'library', 'local', 'localparam', 'logic', 'longint', 'macromodule', 'matches', 'medium', 'modport', 'module', 'nand', 'negedge', 'new', 'nexttime', 'nmos', 'nor', 'noshowcancelled', 'not', 'notif0', 'notif1', 'null', 'or', 'output', 'package', 'packed', 'parameter', 'pmos', 'posedge', 'primitive', 'priority', 'program', 'property', 'protected', 'pull0', 'pull1', 'pulldown', 'pullup', 'pulsestyle_ondetect', 'pulsestyle_onevent', 'pure', 'rand', 'randc', 'randcase', 'randsequence', 'rcmos', 'real', 'realtime', 'ref', 'reg', 'reject_on', 'release', 'repeat', 'restrict', 'return', 'rnmos', 'rpmos', 'rtran', 'rtranif0', 'rtranif1', 's_always', 's_eventually', 's_nexttime', 's_until', 's_until_with', 'scalared', 'sequence', 'shortint', 'shortreal', 'showcancelled', 'signed', 'small', 'solve', 'specify', 'specparam', 'static', 'string', 'strong', 'strong0', 'strong1', 'struct', 'super', 'supply0', 'supply1', 'sync_accept_on', 'sync_reject_on', 'table', 'tagged', 'task', 'this', 'throughout', 'time', 'timeprecision', 'timeunit', 'tran', 'tranif0', 'tranif1', 'tri', 'tri0', 'tri1', 'triand', 'trior', 'trireg', 'type', 'typedef', 'union', 'unique', 'unique0', 'unsigned', 'until', 'until_with', 'untyped', 'use', 'uwire', 'var', 'vectored', 'virtual', 'void', 'wait', 'wait_order', 'wand', 'weak', 'weak0', 'weak1', 'while', 'wildcard', 'wire', 'with', 'within', 'wor', 'xnor', 'xor'), suffix=r'\b'), Keyword), (words(('`__FILE__', '`__LINE__', '`begin_keywords', '`celldefine', '`default_nettype', '`define', '`else', '`elsif', '`end_keywords', '`endcelldefine', '`endif', '`ifdef', '`ifndef', '`include', '`line', '`nounconnected_drive', '`pragma', '`resetall', '`timescale', '`unconnected_drive', '`undef', '`undefineall'), suffix=r'\b'), Comment.Preproc), (words( ('$display', '$displayb', '$displayh', '$displayo', '$dumpall', '$dumpfile', '$dumpflush', '$dumplimit', '$dumpoff', '$dumpon', '$dumpports', '$dumpportsall', '$dumpportsflush', '$dumpportslimit', '$dumpportsoff', '$dumpportson', '$dumpvars', '$fclose', '$fdisplay', '$fdisplayb', '$fdisplayh', '$fdisplayo', '$feof', '$ferror', '$fflush', '$fgetc', '$fgets', '$finish', '$fmonitor', '$fmonitorb', '$fmonitorh', '$fmonitoro', '$fopen', '$fread', '$fscanf', '$fseek', '$fstrobe', '$fstrobeb', '$fstrobeh', '$fstrobeo', '$ftell', '$fwrite', '$fwriteb', '$fwriteh', '$fwriteo', '$monitor', '$monitorb', '$monitorh', '$monitoro', '$monitoroff', '$monitoron', '$plusargs', '$random', '$readmemb', '$readmemh', '$rewind', '$sformat', '$sformatf', '$sscanf', '$strobe', '$strobeb', '$strobeh', '$strobeo', '$swrite', '$swriteb', '$swriteh', '$swriteo', '$test', '$ungetc', '$value$plusargs', '$write', '$writeb', '$writeh', '$writememb', '$writememh', '$writeo'), suffix=r'\b'), Name.Builtin), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (words(('byte', 'shortint', 'int', 'longint', 'integer', 'time', 'bit', 'logic', 'reg', 'supply0', 'supply1', 'tri', 'triand', 'trior', 'tri0', 'tri1', 'trireg', 'uwire', 'wire', 'wand', 'wo' 'shortreal', 'real', 'realtime'), suffix=r'\b'), Keyword.Type), ('[a-zA-Z_]\w*:(?!:)', Name.Label), ('[a-zA-Z_]\w*', Name), ], 'classname': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'import': [(r'[\w:]+\*?', Name.Namespace, '#pop')] } def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): # Convention: mark all upper case names as constants if token is Name: if value.isupper(): token = Name.Constant yield index, token, value
class VhdlLexer(RegexLexer): """ For VHDL source code. .. versionadded:: 1.5 """ name = 'vhdl' aliases = ['vhdl'] filenames = ['*.vhdl', '*.vhd'] mimetypes = ['text/x-vhdl'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'--.*?$', Comment.Single), (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char), (r'[~!%^&*+=|?:<>/-]', Operator), (r"'[a-z_]\w*", Name.Attribute), (r'[()\[\],.;\']', Punctuation), (r'"[^\n\\]*"', String), (r'(library)(\s+)([a-z_]\w*)', bygroups(Keyword, Text, Name.Namespace)), (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)), (r'(use)(\s+)([a-z_][\w.]*)', bygroups(Keyword, Text, Name.Namespace)), (r'(entity|component)(\s+)([a-z_]\w*)', bygroups(Keyword, Text, Name.Class)), (r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)' r'(of)(\s+)([a-z_]\w*)(\s+)(is)', bygroups(Keyword, Text, Name.Class, Text, Keyword, Text, Name.Class, Text, Keyword)), (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'), include('types'), include('keywords'), include('numbers'), (r'[a-z_]\w*', Name), ], 'endblock': [ include('keywords'), (r'[a-z_]\w*', Name.Class), (r'(\s+)', Text), (r';', Punctuation, '#pop'), ], 'types': [ (words(('boolean', 'bit', 'character', 'severity_level', 'integer', 'time', 'delay_length', 'natural', 'positive', 'string', 'bit_vector', 'file_open_kind', 'file_open_status', 'std_ulogic', 'std_ulogic_vector', 'std_logic', 'std_logic_vector'), suffix=r'\b'), Keyword.Type), ], 'keywords': [ (words( ('abs', 'access', 'after', 'alias', 'all', 'and', 'architecture', 'array', 'assert', 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component', 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group', 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label', 'library', 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new', 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or', 'others', 'out', 'package', 'port', 'postponed', 'procedure', 'process', 'pure', 'range', 'record', 'register', 'reject', 'return', 'rol', 'ror', 'select', 'severity', 'signal', 'shared', 'sla', 'sli', 'sra', 'srl', 'subtype', 'then', 'to', 'transport', 'type', 'units', 'until', 'use', 'variable', 'wait', 'when', 'while', 'with', 'xnor', 'xor'), suffix=r'\b'), Keyword), ], 'numbers': [ (r'\d{1,2}#[0-9a-f_]+#?', Number.Integer), (r'\d+', Number.Integer), (r'(\d+\.\d*|\.\d+|\d+)E[+-]?\d+', Number.Float), (r'X"[0-9a-f_]+"', Number.Hex), (r'O"[0-7_]+"', Number.Oct), (r'B"[01_]+"', Number.Bin), ], }
class AmplLexer(RegexLexer): """ For `AMPL <http://ampl.com/>`_ source code. .. versionadded:: 2.2 """ name = 'Ampl' aliases = ['ampl'] filenames = ['*.run'] tokens = { 'root': [ (r'\n', Text), (r'\s+', Text.Whitespace), (r'#.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (words( ('call', 'cd', 'close', 'commands', 'data', 'delete', 'display', 'drop', 'end', 'environ', 'exit', 'expand', 'include', 'load', 'model', 'objective', 'option', 'problem', 'purge', 'quit', 'redeclare', 'reload', 'remove', 'reset', 'restore', 'shell', 'show', 'solexpand', 'solution', 'solve', 'update', 'unload', 'xref', 'coeff', 'coef', 'cover', 'obj', 'interval', 'default', 'from', 'to', 'to_come', 'net_in', 'net_out', 'dimen', 'dimension', 'check', 'complements', 'write', 'function', 'pipe', 'format', 'if', 'then', 'else', 'in', 'while', 'repeat', 'for'), suffix=r'\b'), Keyword.Reserved), (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)', Keyword.Type), (r'\".*?\"', String.Double), (r'\'.*?\'', String.Single), (r'[()\[\]{},;:]+', Punctuation), (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|' r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|' r'ub|urc|uslack|val)', bygroups(Name.Variable, Punctuation, Keyword.Reserved)), (r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|' r'node|table|suffix|read table|write table)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Name.Variable)), (r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)', bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Variable, Text, Punctuation, Text, Name.Variable)), (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)', bygroups(Keyword.Declaration, Text, using(this), Text, Name.Variable)), (words(('abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor', 'log', 'log10', 'max', 'min', 'precision', 'round', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'time', 'trunc', 'Beta', 'Cauchy', 'Exponential', 'Gamma', 'Irand224', 'Normal', 'Normal01', 'Poisson', 'Uniform', 'Uniform01', 'num', 'num0', 'ichar', 'char', 'length', 'substr', 'sprintf', 'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw', 'prev', 'prevw', 'first', 'last', 'ord', 'ord0', 'card', 'arity', 'indexarity'), prefix=r'\b', suffix=r'\b'), Name.Builtin), (r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)', Operator), (words(('or', 'exists', 'forall', 'and', 'in', 'not', 'within', 'union', 'diff', 'difference', 'symdiff', 'inter', 'intersect', 'intersection', 'cross', 'setof', 'by', 'less', 'sum', 'prod', 'product', 'div', 'mod'), suffix=r'\b'), Keyword.Reserved ), # Operator.Name but not enough emphasized with that (r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float), (r'\d+([eE][+-]?\d+)?', Number.Integer), (r'[+-]?Infinity', Number.Integer), (r'(\w+|(\.(?!\.)))', Text) ] }
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)*' tokens = { 'whitespace': [ # preprocessor directives: without whitespace ('^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words( ('auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void|' r'[a-z_][a-z0-9_]*_t)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread', 'typename'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?({)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } stdlib_types = [ 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t' ] c99_types = [ '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' ] def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type yield index, token, value
def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ]
(words(( 'azimuth', 'background-attachment', 'background-color', 'background-image', 'background-position', 'background-repeat', 'background', 'border-bottom-color', 'border-bottom-style', 'border-bottom-width', 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color', 'border-right-style', 'border-right-width', 'border-top-color', 'border-top-style', 'border-top-width', 'border-bottom', 'border-collapse', 'border-left', 'border-width', 'border-color', 'border-spacing', 'border-style', 'border-top', 'border', 'caption-side', 'clear', 'clip', 'color', 'content', 'counter-increment', 'counter-reset', 'cue-after', 'cue-before', 'cue', 'cursor', 'direction', 'display', 'elevation', 'empty-cells', 'float', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'font', 'height', 'letter-spacing', 'line-height', 'list-style-type', 'list-style-image', 'list-style-position', 'list-style', 'margin-bottom', 'margin-left', 'margin-right', 'margin-top', 'margin', 'marker-offset', 'marks', 'max-height', 'max-width', 'min-height', 'min-width', 'opacity', 'orphans', 'outline', 'outline-color', 'outline-style', 'outline-width', 'overflow', 'padding-bottom', 'padding-left', 'padding-right', 'padding-top', 'padding', 'page', 'page-break-after', 'page-break-before', 'page-break-inside', 'pause-after', 'pause-before', 'pause', 'pitch', 'pitch-range', 'play-during', 'position', 'quotes', 'richness', 'right', 'size', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate', 'stress', 'table-layout', 'text-align', 'text-decoration', 'text-indent', 'text-shadow', 'text-transform', 'top', 'unicode-bidi', 'vertical-align', 'visibility', 'voice-family', 'volume', 'white-space', 'widows', 'width', 'word-spacing', 'z-index', 'bottom', 'left', 'above', 'absolute', 'always', 'armenian', 'aural', 'auto', 'avoid', 'baseline', 'behind', 'below', 'bidi-override', 'blink', 'block', 'bold', 'bolder', 'both', 'capitalize', 'center-left', 'center-right', 'center', 'circle', 'cjk-ideographic', 'close-quote', 'collapse', 'condensed', 'continuous', 'crop', 'crosshair', 'cross', 'cursive', 'dashed', 'decimal-leading-zero', 'decimal', 'default', 'digits', 'disc', 'dotted', 'double', 'e-resize', 'embed', 'extra-condensed', 'extra-expanded', 'expanded', 'fantasy', 'far-left', 'far-right', 'faster', 'fast', 'fixed', 'georgian', 'groove', 'hebrew', 'help', 'hidden', 'hide', 'higher', 'high', 'hiragana-iroha', 'hiragana', 'icon', 'inherit', 'inline-table', 'inline', 'inset', 'inside', 'invert', 'italic', 'justify', 'katakana-iroha', 'katakana', 'landscape', 'larger', 'large', 'left-side', 'leftwards', 'level', 'lighter', 'line-through', 'list-item', 'loud', 'lower-alpha', 'lower-greek', 'lower-roman', 'lowercase', 'ltr', 'lower', 'low', 'medium', 'message-box', 'middle', 'mix', 'monospace', 'n-resize', 'narrower', 'ne-resize', 'no-close-quote', 'no-open-quote', 'no-repeat', 'none', 'normal', 'nowrap', 'nw-resize', 'oblique', 'once', 'open-quote', 'outset', 'outside', 'overline', 'pointer', 'portrait', 'px', 'relative', 'repeat-x', 'repeat-y', 'repeat', 'rgb', 'ridge', 'right-side', 'rightwards', 's-resize', 'sans-serif', 'scroll', 'se-resize', 'semi-condensed', 'semi-expanded', 'separate', 'serif', 'show', 'silent', 'slow', 'slower', 'small-caps', 'small-caption', 'smaller', 'soft', 'solid', 'spell-out', 'square', 'static', 'status-bar', 'super', 'sw-resize', 'table-caption', 'table-cell', 'table-column', 'table-column-group', 'table-footer-group', 'table-header-group', 'table-row', 'table-row-group', 'text', 'text-bottom', 'text-top', 'thick', 'thin', 'transparent', 'ultra-condensed', 'ultra-expanded', 'underline', 'upper-alpha', 'upper-latin', 'upper-roman', 'uppercase', 'url', 'visible', 'w-resize', 'wait', 'wider', 'x-fast', 'x-high', 'x-large', 'x-loud', 'x-low', 'x-small', 'x-soft', 'xx-large', 'xx-small', 'yes'), suffix=r'\b'),
def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group( 4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-' + name] = [ include('string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name + '-intp-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[QWx]?' + lbrace, String.Other, name + '-intp-string')) states[name + '-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[qsw]' + lbrace, String.Other, name + '-string')) states[name + '-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[mixounse]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append( (r'%r' + lbrace, String.Regex, name + '-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', intp_regex_callback), # regular fancy strings with qsw (r'%[qsw]([\W_])((?:\\\1|(?!\1).)*)\1', String.Other), (r'(%[QWx]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states
class MznDefLexer(MznLexer): name = 'MiniZincDef' aliases = ['minizincdef'] tokens = { 'root': [ (r'<[0-9a-zA-Z- ]+>', Comment.Special), (r'"/\\"', String), (r'"""', String), (r'"\\\("', String), (r'"\\.\\.\\."', Comment.Special), (r'\[[^ ]', String.Regex, 'regex'), (r'::=', Comment.Special), (words(('|',), prefix=r'^\s*'), Comment.Special), inherit ], 'main__1': [ (r'<[0-9a-zA-Z- ]+>', Comment.Special), (r'"/\\"', String), (r'"""', String), (r'"\\\("', String), (r'"\\.\\.\\."', String), (r'\[[^ ]', String.Regex, 'regex'), (r'::=', Comment.Special), (words(('|',), prefix=r'^\s*'), Comment.Special), inherit ], 'main__2': [ (r'<[0-9a-zA-Z- ]+>', Comment.Special), (r'"/\\"', String), (r'"""', String), (r'"\\\("', String), (r'"\\.\\.\\."', String), (r'\[[^ ]', String.Regex, 'regex'), (r'::=', Comment.Special), (words(('|',), prefix=r'^\s*'), Comment.Special), inherit ], 'main__3': [ (r'<[0-9a-zA-Z- ]+>', Comment.Special), (r'"/\\"', String), (r'"""', String), (r'"\\\("', String), (r'"\\.\\.\\."', String), (r'\[[^ ]', String.Regex, 'regex'), (r'::=', Comment.Special), (words(('|',), prefix=r'^\s*'), Comment.Special), inherit ], 'main__4': [ (r'<[0-9a-zA-Z- ]+>', Comment.Special), (r'"/\\"', String), (r'"""', String), (r'"\\\("', String), (r'"\\.\\.\\."', String), (r'\[[^ ]', String.Regex, 'regex'), (r'::=', Comment.Special), (words(('|',), prefix=r'^\s*'), Comment.Special), inherit ], 'regex': [ (r'\]', String.Regex, '#pop'), (r'.', String.Regex) ] }
class FancyLexer(RegexLexer): """ Pygments Lexer For `Fancy <http://www.fancy-lang.org/>`_. Fancy is a self-hosted, pure object-oriented, dynamic, class-based, concurrent general-purpose programming language running on Rubinius, the Ruby VM. .. versionadded:: 1.5 """ name = 'Fancy' filenames = ['*.fy', '*.fancypack'] aliases = ['fancy', 'fy'] mimetypes = ['text/x-fancysrc'] tokens = { # copied from PerlLexer: 'balanced-regex': [ (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'), (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), (r'\{(\\\\|\\\}|[^}])*\}[egimosx]*', String.Regex, '#pop'), (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'), (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'), (r'\((\\\\|\\\)|[^)])*\)[egimosx]*', String.Regex, '#pop'), (r'@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex, '#pop'), (r'%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex, '#pop'), (r'\$(\\\\|\\\$|[^$])*\$[egimosx]*', String.Regex, '#pop'), ], 'root': [ (r'\s+', Text), # balanced delimiters (copied from PerlLexer): (r's\{(\\\\|\\\}|[^}])*\}\s*', String.Regex, 'balanced-regex'), (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'), (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'), (r's\((\\\\|\\\)|[^)])*\)\s*', String.Regex, 'balanced-regex'), (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), # Comments (r'#(.*?)\n', Comment.Single), # Symbols (r'\'([^\'\s\[\](){}]+|\[\])', String.Symbol), # Multi-line DoubleQuotedString (r'"""(\\\\|\\"|[^"])*"""', String), # DoubleQuotedString (r'"(\\\\|\\"|[^"])*"', String), # keywords (r'(def|class|try|catch|finally|retry|return|return_local|match|' r'case|->|=>)\b', Keyword), # constants (r'(self|super|nil|false|true)\b', Name.Constant), (r'[(){};,/?|:\\]', Punctuation), # names (words( ('Object', 'Array', 'Hash', 'Directory', 'File', 'Class', 'String', 'Number', 'Enumerable', 'FancyEnumerable', 'Block', 'TrueClass', 'NilClass', 'FalseClass', 'Tuple', 'Symbol', 'Stack', 'Set', 'FancySpec', 'Method', 'Package', 'Range'), suffix=r'\b'), Name.Builtin), # functions (r'[a-zA-Z](\w|[-+?!=*/^><%])*:', Name.Function), # operators, must be below functions (r'[-+*/~,<>=&!?%^\[\].$]+', Operator), ('[A-Z]\w*', Name.Constant), ('@[a-zA-Z_]\w*', Name.Variable.Instance), ('@@[a-zA-Z_]\w*', Name.Variable.Class), ('@@?', Operator), ('[a-zA-Z_]\w*', Name), # numbers - / checks are necessary to avoid mismarking regexes, # see comment in RubyLexer (r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', bygroups(Number.Oct, Text, Operator)), (r'(0[xX][0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?', bygroups(Number.Hex, Text, Operator)), (r'(0[bB][01]+(?:_[01]+)*)(\s*)([/?])?', bygroups(Number.Bin, Text, Operator)), (r'([\d]+(?:_\d+)*)(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), (r'\d+([eE][+-]?[0-9]+)|\d+\.\d+([eE][+-]?[0-9]+)?', Number.Float), (r'\d+', Number.Integer) ] }
class RubyLexer(ExtendedRegexLexer): """ For `Ruby <http://www.ruby-lang.org>`_ source code. """ name = 'Ruby' aliases = ['rb', 'ruby', 'duby'] filenames = [ '*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby' ] mimetypes = ['text/x-ruby', 'application/x-ruby'] flags = re.DOTALL | re.MULTILINE def heredoc_callback(self, match, ctx): # okay, this is the hardest part of parsing Ruby... # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line start = match.start(1) yield start, Operator, match.group(1) # <<-? yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` yield match.start(3), Name.Constant, match.group(3) # heredoc name yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) heredocstack.append((match.group(1) == '<<-', match.group(3))) ctx.pos = match.start(5) ctx.end = match.end(5) # this may find other heredocs for i, t, v in self.get_tokens_unprocessed(context=ctx): yield i, t, v ctx.pos = match.end() if outermost: # this is the outer heredoc again, now we can process them all for tolerant, hdname in heredocstack: lines = [] for match in line_re.finditer(ctx.text, ctx.pos): if tolerant: check = match.group().strip() else: check = match.group().rstrip() if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group( ) yield match.start(), Name.Constant, match.group() ctx.pos = match.end() break else: lines.append(match) else: # end of heredoc not found -- error! for amatch in lines: yield amatch.start(), Error, amatch.group() ctx.end = len(ctx.text) del heredocstack[:] def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group( 4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-' + name] = [ include('string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name + '-intp-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[QWx]?' + lbrace, String.Other, name + '-intp-string')) states[name + '-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append( (r'%[qsw]' + lbrace, String.Other, name + '-string')) states[name + '-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[mixounse]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append( (r'%r' + lbrace, String.Regex, name + '-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', intp_regex_callback), # regular fancy strings with qsw (r'%[qsw]([\W_])((?:\\\1|(?!\1).)*)\1', String.Other), (r'(%[QWx]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states tokens = { 'root': [ (r'\A#!.+?$', Comment.Hashbang), (r'#.*?$', Comment.Single), (r'=begin\s.*?\n=end.*?$', Comment.Multiline), # keywords (words( ('BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if', 'in', 'next', 'redo', 'rescue', 'raise', 'retry', 'return', 'super', 'then', 'undef', 'unless', 'until', 'when', 'while', 'yield'), suffix=r'\b'), Keyword), # start of function, class and module names (r'(module)(\s+)([a-zA-Z_]\w*' r'(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # special methods (words(('initialize', 'new', 'loop', 'include', 'extend', 'raise', 'attr_reader', 'attr_writer', 'attr_accessor', 'attr', 'catch', 'throw', 'private', 'module_function', 'public', 'protected', 'true', 'false', 'nil'), suffix=r'\b'), Keyword.Pseudo), (r'(not|and|or)\b', Operator.Word), (words(('autoload', 'block_given', 'const_defined', 'eql', 'equal', 'frozen', 'include', 'instance_of', 'is_a', 'iterator', 'kind_of', 'method_defined', 'nil', 'private_method_defined', 'protected_method_defined', 'public_method_defined', 'respond_to', 'tainted'), suffix=r'\?'), Name.Builtin), (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin), (words( ('Array', 'Float', 'Integer', 'String', '__id__', '__send__', 'abort', 'ancestors', 'at_exit', 'autoload', 'binding', 'callcc', 'caller', 'catch', 'chomp', 'chop', 'class_eval', 'class_variables', 'clone', 'const_defined?', 'const_get', 'const_missing', 'const_set', 'constants', 'display', 'dup', 'eval', 'exec', 'exit', 'extend', 'fail', 'fork', 'format', 'freeze', 'getc', 'gets', 'global_variables', 'gsub', 'hash', 'id', 'included_modules', 'inspect', 'instance_eval', 'instance_method', 'instance_methods', 'instance_variable_get', 'instance_variable_set', 'instance_variables', 'lambda', 'load', 'local_variables', 'loop', 'method', 'method_missing', 'methods', 'module_eval', 'name', 'object_id', 'open', 'p', 'print', 'printf', 'private_class_method', 'private_instance_methods', 'private_methods', 'proc', 'protected_instance_methods', 'protected_methods', 'public_class_method', 'public_instance_methods', 'public_methods', 'putc', 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'scan', 'select', 'self', 'send', 'set_trace_func', 'singleton_methods', 'sleep', 'split', 'sprintf', 'srand', 'sub', 'syscall', 'system', 'taint', 'test', 'throw', 'to_a', 'to_s', 'trace_var', 'trap', 'untaint', 'untrace_var', 'warn'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo), # normal heredocs (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback), # empty string heredocs (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), (r'__END__', Comment.Preproc, 'end-part'), # multiline regex (after keywords or assignments) (r'(?:^|(?<=[=<>~!:])|' r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' r'(?<=\.index\s)|' r'(?<=\.scan\s)|' r'(?<=\.sub\s)|' r'(?<=\.sub!\s)|' r'(?<=\.gsub\s)|' r'(?<=\.gsub!\s)|' r'(?<=\.match\s)|' r'(?<=(?:\s|;)if\s)|' r'(?<=(?:\s|;)elsif\s)|' r'(?<=^when\s)|' r'(?<=^index\s)|' r'(?<=^scan\s)|' r'(?<=^sub\s)|' r'(?<=^gsub\s)|' r'(?<=^sub!\s)|' r'(?<=^gsub!\s)|' r'(?<=^match\s)|' r'(?<=^if\s)|' r'(?<=^elsif\s)' r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'), # multiline regex (in method calls or subscripts) (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'), # multiline regex (this time the funny no whitespace rule) (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex), 'multiline-regex'), # lex numbers and ignore following regular expressions which # are division operators in fact (grrrr. i hate that. any # better ideas?) # since pygments 0.7 we also eat a "?" operator after numbers # so that the char operator does not work. Chars are not allowed # there so that you can use the ternary operator. # stupid example: # x>=0?n[x]:"" (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', bygroups(Number.Oct, Text, Operator)), (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?', bygroups(Number.Hex, Text, Operator)), (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?', bygroups(Number.Bin, Text, Operator)), (r'([\d]+(?:_\d+)*)(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), # Names (r'@@[a-zA-Z_]\w*', Name.Variable.Class), (r'@[a-zA-Z_]\w*', Name.Variable.Instance), (r'\$\w+', Name.Variable.Global), (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global), (r'\$-[0adFiIlpvw]', Name.Variable.Global), (r'::', Operator), include('strings'), # chars ( r'\?(\\[MC]-)*' # modifiers r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' r'(?!\w)', String.Char), (r'[A-Z]\w+', Name.Constant), # this is needed because ruby attributes can look # like keywords (class) or like this: ` ?!? (words(RUBY_OPERATORS, prefix=r'(\.|::)'), bygroups(Operator, Name.Operator)), (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])', bygroups(Operator, Name)), (r'[a-zA-Z_]\w*[!?]?', Name), (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|' r'!~|&&?|\|\||\.{1,3})', Operator), (r'[-+/*%=<>&!^|~]=?', Operator), (r'[(){};,/?:\\]', Punctuation), (r'\s+', Text) ], 'funcname': [(r'\(', Punctuation, 'defexpr'), (r'(?:([a-zA-Z_]\w*)(\.))?' r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), default('#pop')], 'classname': [(r'\(', Punctuation, 'defexpr'), (r'<<', Operator, '#pop'), (r'[A-Z_]\w*', Name.Class, '#pop'), default('#pop')], 'defexpr': [(r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'), (r'\(', Operator, '#push'), include('root')], 'in-intp': [ (r'\{', String.Interpol, '#push'), (r'\}', String.Interpol, '#pop'), include('root'), ], 'string-intp': [(r'#\{', String.Interpol, 'in-intp'), (r'#@@?[a-zA-Z_]\w*', String.Interpol), (r'#\$[a-zA-Z_]\w*', String.Interpol)], 'string-intp-escaped': [ include('string-intp'), (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape) ], 'interpolated-regex': [ include('string-intp'), (r'[\\#]', String.Regex), (r'[^\\#]+', String.Regex), ], 'interpolated-string': [ include('string-intp'), (r'[\\#]', String.Other), (r'[^\\#]+', String.Other), ], 'multiline-regex': [ include('string-intp'), (r'\\\\', String.Regex), (r'\\/', String.Regex), (r'[\\#]', String.Regex), (r'[^\\/#]+', String.Regex), (r'/[mixounse]*', String.Regex, '#pop'), ], 'end-part': [(r'.+', Comment.Preproc, '#pop')] } tokens.update(gen_rubystrings_rules()) def analyse_text(text): return shebang_matches(text, r'ruby(1\.\d)?')
class TclLexer(RegexLexer): """ For Tcl source code. .. versionadded:: 0.10 """ keyword_cmds_re = words(( 'after', 'apply', 'array', 'break', 'catch', 'continue', 'elseif', 'else', 'error', 'eval', 'expr', 'for', 'foreach', 'global', 'if', 'namespace', 'proc', 'rename', 'return', 'set', 'switch', 'then', 'trace', 'unset', 'update', 'uplevel', 'upvar', 'variable', 'vwait', 'while'), prefix=r'\b', suffix=r'\b') builtin_cmds_re = words(( 'append', 'bgerror', 'binary', 'cd', 'chan', 'clock', 'close', 'concat', 'dde', 'dict', 'encoding', 'eof', 'exec', 'exit', 'fblocked', 'fconfigure', 'fcopy', 'file', 'fileevent', 'flush', 'format', 'gets', 'glob', 'history', 'http', 'incr', 'info', 'interp', 'join', 'lappend', 'lassign', 'lindex', 'linsert', 'list', 'llength', 'load', 'loadTk', 'lrange', 'lrepeat', 'lreplace', 'lreverse', 'lsearch', 'lset', 'lsort', 'mathfunc', 'mathop', 'memory', 'msgcat', 'open', 'package', 'pid', 'pkg::create', 'pkg_mkIndex', 'platform', 'platform::shell', 'puts', 'pwd', 're_syntax', 'read', 'refchan', 'regexp', 'registry', 'regsub', 'scan', 'seek', 'socket', 'source', 'split', 'string', 'subst', 'tell', 'time', 'tm', 'unknown', 'unload'), prefix=r'\b', suffix=r'\b') name = 'Tcl' aliases = ['tcl'] filenames = ['*.tcl', '*.rvt'] mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl'] def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""): return [ (keyword_cmds_re, Keyword, 'params' + context), (builtin_cmds_re, Name.Builtin, 'params' + context), (r'([\w.-]+)', Name.Variable, 'params' + context), (r'#', Comment, 'comment'), ] tokens = { 'root': [ include('command'), include('basic'), include('data'), (r'\}', Keyword), # HACK: somehow we miscounted our braces ], 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re), 'command-in-brace': _gen_command_rules(keyword_cmds_re, builtin_cmds_re, "-in-brace"), 'command-in-bracket': _gen_command_rules(keyword_cmds_re, builtin_cmds_re, "-in-bracket"), 'command-in-paren': _gen_command_rules(keyword_cmds_re, builtin_cmds_re, "-in-paren"), 'basic': [ (r'\(', Keyword, 'paren'), (r'\[', Keyword, 'bracket'), (r'\{', Keyword, 'brace'), (r'"', String.Double, 'string'), (r'(eq|ne|in|ni)\b', Operator.Word), (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator), ], 'data': [ (r'\s+', Text), (r'0x[a-fA-F0-9]+', Number.Hex), (r'0[0-7]+', Number.Oct), (r'\d+\.\d+', Number.Float), (r'\d+', Number.Integer), (r'\$([\w.:-]+)', Name.Variable), (r'([\w.:-]+)', Text), ], 'params': [ (r';', Keyword, '#pop'), (r'\n', Text, '#pop'), (r'(else|elseif|then)\b', Keyword), include('basic'), include('data'), ], 'params-in-brace': [ (r'\}', Keyword, ('#pop', '#pop')), include('params') ], 'params-in-paren': [ (r'\)', Keyword, ('#pop', '#pop')), include('params') ], 'params-in-bracket': [ (r'\]', Keyword, ('#pop', '#pop')), include('params') ], 'string': [ (r'\[', String.Double, 'string-square'), (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\])', String.Double), (r'"', String.Double, '#pop') ], 'string-square': [ (r'\[', String.Double, 'string-square'), (r'(?s)(\\\\|\\[0-7]+|\\.|\\\n|[^\]\\])', String.Double), (r'\]', String.Double, '#pop') ], 'brace': [ (r'\}', Keyword, '#pop'), include('command-in-brace'), include('basic'), include('data'), ], 'paren': [ (r'\)', Keyword, '#pop'), include('command-in-paren'), include('basic'), include('data'), ], 'bracket': [ (r'\]', Keyword, '#pop'), include('command-in-bracket'), include('basic'), include('data'), ], 'comment': [ (r'.*[^\\]\n', Comment, '#pop'), (r'.*\\\n', Comment), ], } def analyse_text(text): return shebang_matches(text, r'(tcl)')
def gen_crystalstrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Regex, match.group(4) # end[imsx]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), # This allows arbitrary text after '\ for simplicity (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), (r':"', String.Symbol, 'simple-sym'), # Crystal doesn't have "symbol:"s but this simplifies function args (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-'+name] = [ include('string-escaped' if name == 'sym' else 'string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name+'-intp-string'] = [ (r'\\[' + lbrace + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append((r'%' + lbrace, String.Other, name+'-intp-string')) states[name+'-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html states['strings'].append((r'%[wi]' + lbrace, String.Other, name+'-string')) states[name+'-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[imsx]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append((r'%r' + lbrace, String.Regex, name+'-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)', intp_regex_callback), # regular fancy strings with qsw (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states
class AdaLexer(RegexLexer): """ For Ada source code. .. versionadded:: 1.3 """ name = 'Ada' aliases = ['ada', 'ada95', 'ada2005'] filenames = ['*.adb', '*.ads', '*.ada'] mimetypes = ['text/x-ada'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'[^\S\n]+', Text), (r'--.*?\n', Comment.Single), (r'[^\S\n]+', Text), (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), (r'(subtype|type)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), (r'task|protected', Keyword.Declaration), (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, Comment.Preproc)), (r'(true|false|null)\b', Keyword.Constant), (words( ('Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float', 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'), Keyword.Type), (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), (r'generic|private', Keyword.Declaration), (r'package', Keyword.Declaration, 'package'), (r'array\b', Keyword.Reserved, 'array_def'), (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'(\w+)(\s*)(:)(\s*)(constant)', bygroups(Name.Constant, Text, Punctuation, Text, Keyword.Reserved)), (r'<<\w+>>', Name.Label), (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), (words( ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all', 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding', 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue', 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when', 'while', 'xor'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), (r'"[^"]*"', String), include('attribute'), include('numbers'), (r"'[^']'", String.Character), (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))), (r"(<>|=>|:=|[()|:;,.'])", Punctuation), (r'[*<>+=/&-]', Operator), (r'\n+', Text), ], 'numbers': [ (r'[0-9_]+#[0-9a-f]+#', Number.Hex), (r'[0-9_]+\.[0-9_]*', Number.Float), (r'[0-9_]+', Number.Integer), ], 'attribute': [ (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), ], 'subprogram': [ (r'\(', Punctuation, ('#pop', 'formal_part')), (r';', Punctuation, '#pop'), (r'is\b', Keyword.Reserved, '#pop'), (r'"[^"]+"|\w+', Name.Function), include('root'), ], 'end': [ ('(if|case|record|loop|select)', Keyword.Reserved), ('"[^"]+"|[\w.]+', Name.Function), ('\s+', Text), (';', Punctuation, '#pop'), ], 'type_def': [ (r';', Punctuation, '#pop'), (r'\(', Punctuation, 'formal_part'), (r'with|and|use', Keyword.Reserved), (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), (r'record\b', Keyword.Reserved, ('record_def')), (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'), include('root'), ], 'array_def': [ (r';', Punctuation, '#pop'), (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)), include('root'), ], 'record_def': [ (r'end record', Keyword.Reserved, '#pop'), include('root'), ], 'import': [ (r'[\w.]+', Name.Namespace, '#pop'), default('#pop'), ], 'formal_part': [ (r'\)', Punctuation, '#pop'), (r'\w+', Name.Variable), (r',|:[^=]', Punctuation), (r'(in|not|null|out|access)\b', Keyword.Reserved), include('root'), ], 'package': [ ('body', Keyword.Declaration), ('is\s+new|renames', Keyword.Reserved), ('is', Keyword.Reserved, '#pop'), (';', Punctuation, '#pop'), ('\(', Punctuation, 'package_instantiation'), ('([\w.]+)', Name.Class), include('root'), ], 'package_instantiation': [ (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)), (r'[\w.\'"]', Text), (r'\)', Punctuation, '#pop'), include('root'), ], }
def _multi_escape(entries): return words(entries, suffix=' ')
class BoaLexer(RegexLexer): """ Lexer for the `Boa <http://boa.cs.iastate.edu/docs/>`_ language. .. versionadded:: 2.4 """ name = 'Boa' aliases = ['boa'] filenames = ['*.boa'] reserved = words( ('input', 'output', 'of', 'weight', 'before', 'after', 'stop', 'ifall', 'foreach', 'exists', 'function', 'break', 'switch', 'case', 'visitor', 'default', 'return', 'visit', 'while', 'if', 'else'), suffix=r'\b', prefix=r'\b') keywords = words( ('bottom', 'collection', 'maximum', 'mean', 'minimum', 'set', 'sum', 'top', 'string', 'int', 'bool', 'float', 'time', 'false', 'true', 'array', 'map', 'stack', 'enum', 'type'), suffix=r'\b', prefix=r'\b') classes = words( ('Project', 'ForgeKind', 'CodeRepository', 'Revision', 'RepositoryKind', 'ChangedFile', 'FileKind', 'ASTRoot', 'Namespace', 'Declaration', 'Type', 'Method', 'Variable', 'Statement', 'Expression', 'Modifier', 'StatementKind', 'ExpressionKind', 'ModifierKind', 'Visibility', 'TypeKind', 'Person', 'ChangeKind'), suffix=r'\b', prefix=r'\b') operators = ('->', ':=', ':', '=', '<<', '!', '++', '||', '&&', '+', '-', '*', ">", "<") string_sep = ('`', '\"') built_in_functions = words( ( # Array functions 'new', 'sort', # Date & Time functions 'yearof', 'dayofyear', 'hourof', 'minuteof', 'secondof', 'now', 'addday', 'addmonth', 'addweek', 'addyear', 'dayofmonth', 'dayofweek', 'dayofyear', 'formattime', 'trunctoday', 'trunctohour', 'trunctominute', 'trunctomonth', 'trunctosecond', 'trunctoyear', # Map functions 'clear', 'haskey', 'keys', 'lookup', 'remove', 'values', # Math functions 'abs', 'acos', 'acosh', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'ceil', 'cos', 'cosh', 'exp', 'floor', 'highbit', 'isfinite', 'isinf', 'isnan', 'isnormal', 'log', 'log10', 'max', 'min', 'nrand', 'pow', 'rand', 'round', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'trunc', # Other functions 'def', 'hash', 'len', # Set functions 'add', 'contains', 'remove', # String functions 'format', 'lowercase', 'match', 'matchposns', 'matchstrs', 'regex', 'split', 'splitall', 'splitn', 'strfind', 'strreplace', 'strrfind', 'substring', 'trim', 'uppercase', # Type Conversion functions 'bool', 'float', 'int', 'string', 'time', # Domain-Specific functions 'getast', 'getsnapshot', 'hasfiletype', 'isfixingrevision', 'iskind', 'isliteral', ), prefix=r'\b', suffix=r'\(') tokens = { 'root': [ (r'#.*?$', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), (reserved, Keyword.Reserved), (built_in_functions, Name.Function), (keywords, Keyword.Type), (classes, Name.Classes), (words(operators), Operator), (r'[][(),;{}\\.]', Punctuation), (r'"(\\\\|\\"|[^"])*"', String), (r'`(\\\\|\\`|[^`])*`', String), (words(string_sep), String.Delimeter), (r'[a-zA-Z_]+', Name.Variable), (r'[0-9]+', Number.Integer), (r'\s+?', Text), # Whitespace ] }
(r'[a-z_-][\w-]*(?=\()', Name.Function), (words(_css_properties + ( 'above', 'absolute', 'always', 'armenian', 'aural', 'auto', 'avoid', 'baseline', 'behind', 'below', 'bidi-override', 'blink', 'block', 'bold', 'bolder', 'both', 'capitalize', 'center-left', 'center-right', 'center', 'circle', 'cjk-ideographic', 'close-quote', 'collapse', 'condensed', 'continuous', 'crop', 'crosshair', 'cross', 'cursive', 'dashed', 'decimal-leading-zero', 'decimal', 'default', 'digits', 'disc', 'dotted', 'double', 'e-resize', 'embed', 'extra-condensed', 'extra-expanded', 'expanded', 'fantasy', 'far-left', 'far-right', 'faster', 'fast', 'fixed', 'georgian', 'groove', 'hebrew', 'help', 'hidden', 'hide', 'higher', 'high', 'hiragana-iroha', 'hiragana', 'icon', 'inherit', 'inline-table', 'inline', 'inset', 'inside', 'invert', 'italic', 'justify', 'katakana-iroha', 'katakana', 'landscape', 'larger', 'large', 'left-side', 'leftwards', 'level', 'lighter', 'line-through', 'list-item', 'loud', 'lower-alpha', 'lower-greek', 'lower-roman', 'lowercase', 'ltr', 'lower', 'low', 'medium', 'message-box', 'middle', 'mix', 'monospace', 'n-resize', 'narrower', 'ne-resize', 'no-close-quote', 'no-open-quote', 'no-repeat', 'none', 'normal', 'nowrap', 'nw-resize', 'oblique', 'once', 'open-quote', 'outset', 'outside', 'overline', 'pointer', 'portrait', 'px', 'relative', 'repeat-x', 'repeat-y', 'repeat', 'rgb', 'ridge', 'right-side', 'rightwards', 's-resize', 'sans-serif', 'scroll', 'se-resize', 'semi-condensed', 'semi-expanded', 'separate', 'serif', 'show', 'silent', 'slow', 'slower', 'small-caps', 'small-caption', 'smaller', 'soft', 'solid', 'spell-out', 'square', 'static', 'status-bar', 'super', 'sw-resize', 'table-caption', 'table-cell', 'table-column', 'table-column-group', 'table-footer-group', 'table-header-group', 'table-row', 'table-row-group', 'text', 'text-bottom', 'text-top', 'thick', 'thin', 'transparent', 'ultra-condensed', 'ultra-expanded', 'underline', 'upper-alpha', 'upper-latin', 'upper-roman', 'uppercase', 'url', 'visible', 'w-resize', 'wait', 'wider', 'x-fast', 'x-high', 'x-large', 'x-loud', 'x-low', 'x-small', 'x-soft', 'xx-large', 'xx-small', 'yes'), suffix=r'\b'), Name.Constant),
class ElmLexer(RegexLexer): """ For Elm source code. .. versionadded:: 2.1 """ name = 'Elm' url = 'http://elm-lang.org/' aliases = ['elm'] filenames = ['*.elm'] mimetypes = ['text/x-elm'] validName = r'[a-z_][a-zA-Z0-9_\']*' specialName = r'^main ' builtinOps = ( '~', '||', '|>', '|', '`', '^', '\\', '\'', '>>', '>=', '>', '==', '=', '<~', '<|', '<=', '<<', '<-', '<', '::', ':', '/=', '//', '/', '..', '.', '->', '-', '++', '+', '*', '&&', '%', ) reservedWords = words(( 'alias', 'as', 'case', 'else', 'if', 'import', 'in', 'let', 'module', 'of', 'port', 'then', 'type', 'where', ), suffix=r'\b') tokens = { 'root': [ # Comments (r'\{-', Comment.Multiline, 'comment'), (r'--.*', Comment.Single), # Whitespace (r'\s+', Whitespace), # Strings (r'"', String, 'doublequote'), # Modules (r'^(\s*)(module)(\s*)', bygroups(Whitespace, Keyword.Namespace, Whitespace), 'imports'), # Imports (r'^(\s*)(import)(\s*)', bygroups(Whitespace, Keyword.Namespace, Whitespace), 'imports'), # Shaders (r'\[glsl\|.*', Name.Entity, 'shader'), # Keywords (reservedWords, Keyword.Reserved), # Types (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), # Main (specialName, Keyword.Reserved), # Prefix Operators (words((builtinOps), prefix=r'\(', suffix=r'\)'), Name.Function), # Infix Operators (words(builtinOps), Name.Function), # Numbers include('numbers'), # Variable Names (validName, Name.Variable), # Parens (r'[,()\[\]{}]', Punctuation), ], 'comment': [ (r'-(?!\})', Comment.Multiline), (r'\{-', Comment.Multiline, 'comment'), (r'[^-}]', Comment.Multiline), (r'-\}', Comment.Multiline, '#pop'), ], 'doublequote': [ (r'\\u[0-9a-fA-F]{4}', String.Escape), (r'\\[nrfvb\\"]', String.Escape), (r'[^"]', String), (r'"', String, '#pop'), ], 'imports': [ (r'\w+(\.\w+)*', Name.Class, '#pop'), ], 'numbers': [ (r'_?\d+\.(?=\d+)', Number.Float), (r'_?\d+', Number.Integer), ], 'shader': [ (r'\|(?!\])', Name.Entity), (r'\|\]', Name.Entity, '#pop'), (r'(.*)(\n)', bygroups(Name.Entity, Whitespace)), ], }
class FactorLexer(RegexLexer): """ Lexer for the `Factor <http://factorcode.org>`_ language. .. versionadded:: 1.4 """ name = 'Factor' aliases = ['factor'] filenames = ['*.factor'] mimetypes = ['text/x-factor'] flags = re.MULTILINE | re.UNICODE builtin_kernel = words( ('-rot', '2bi', '2bi@', '2bi*', '2curry', '2dip', '2drop', '2dup', '2keep', '2nip', '2over', '2tri', '2tri@', '2tri*', '3bi', '3curry', '3dip', '3drop', '3dup', '3keep', '3tri', '4dip', '4drop', '4dup', '4keep', '<wrapper>', '=', '>boolean', 'clone', '?', '?execute', '?if', 'and', 'assert', 'assert=', 'assert?', 'bi', 'bi-curry', 'bi-curry@', 'bi-curry*', 'bi@', 'bi*', 'boa', 'boolean', 'boolean?', 'both?', 'build', 'call', 'callstack', 'callstack>array', 'callstack?', 'clear', '(clone)', 'compose', 'compose?', 'curry', 'curry?', 'datastack', 'die', 'dip', 'do', 'drop', 'dup', 'dupd', 'either?', 'eq?', 'equal?', 'execute', 'hashcode', 'hashcode*', 'identity-hashcode', 'identity-tuple', 'identity-tuple?', 'if', 'if*', 'keep', 'loop', 'most', 'new', 'nip', 'not', 'null', 'object', 'or', 'over', 'pick', 'prepose', 'retainstack', 'rot', 'same?', 'swap', 'swapd', 'throw', 'tri', 'tri-curry', 'tri-curry@', 'tri-curry*', 'tri@', 'tri*', 'tuple', 'tuple?', 'unless', 'unless*', 'until', 'when', 'when*', 'while', 'with', 'wrapper', 'wrapper?', 'xor'), suffix=r'\s') builtin_assocs = words( ('2cache', '<enum>', '>alist', '?at', '?of', 'assoc', 'assoc-all?', 'assoc-any?', 'assoc-clone-like', 'assoc-combine', 'assoc-diff', 'assoc-diff!', 'assoc-differ', 'assoc-each', 'assoc-empty?', 'assoc-filter', 'assoc-filter!', 'assoc-filter-as', 'assoc-find', 'assoc-hashcode', 'assoc-intersect', 'assoc-like', 'assoc-map', 'assoc-map-as', 'assoc-partition', 'assoc-refine', 'assoc-size', 'assoc-stack', 'assoc-subset?', 'assoc-union', 'assoc-union!', 'assoc=', 'assoc>map', 'assoc?', 'at', 'at+', 'at*', 'cache', 'change-at', 'clear-assoc', 'delete-at', 'delete-at*', 'enum', 'enum?', 'extract-keys', 'inc-at', 'key?', 'keys', 'map>assoc', 'maybe-set-at', 'new-assoc', 'of', 'push-at', 'rename-at', 'set-at', 'sift-keys', 'sift-values', 'substitute', 'unzip', 'value-at', 'value-at*', 'value?', 'values', 'zip'), suffix=r'\s') builtin_combinators = words( ('2cleave', '2cleave>quot', '3cleave', '3cleave>quot', '4cleave', '4cleave>quot', 'alist>quot', 'call-effect', 'case', 'case-find', 'case>quot', 'cleave', 'cleave>quot', 'cond', 'cond>quot', 'deep-spread>quot', 'execute-effect', 'linear-case-quot', 'no-case', 'no-case?', 'no-cond', 'no-cond?', 'recursive-hashcode', 'shallow-spread>quot', 'spread', 'to-fixed-point', 'wrong-values', 'wrong-values?'), suffix=r'\s') builtin_math = words( ('-', '/', '/f', '/i', '/mod', '2/', '2^', '<', '<=', '<fp-nan>', '>', '>=', '>bignum', '>fixnum', '>float', '>integer', '(all-integers?)', '(each-integer)', '(find-integer)', '*', '+', '?1+', 'abs', 'align', 'all-integers?', 'bignum', 'bignum?', 'bit?', 'bitand', 'bitnot', 'bitor', 'bits>double', 'bits>float', 'bitxor', 'complex', 'complex?', 'denominator', 'double>bits', 'each-integer', 'even?', 'find-integer', 'find-last-integer', 'fixnum', 'fixnum?', 'float', 'float>bits', 'float?', 'fp-bitwise=', 'fp-infinity?', 'fp-nan-payload', 'fp-nan?', 'fp-qnan?', 'fp-sign', 'fp-snan?', 'fp-special?', 'if-zero', 'imaginary-part', 'integer', 'integer>fixnum', 'integer>fixnum-strict', 'integer?', 'log2', 'log2-expects-positive', 'log2-expects-positive?', 'mod', 'neg', 'neg?', 'next-float', 'next-power-of-2', 'number', 'number=', 'number?', 'numerator', 'odd?', 'out-of-fixnum-range', 'out-of-fixnum-range?', 'power-of-2?', 'prev-float', 'ratio', 'ratio?', 'rational', 'rational?', 'real', 'real-part', 'real?', 'recip', 'rem', 'sgn', 'shift', 'sq', 'times', 'u<', 'u<=', 'u>', 'u>=', 'unless-zero', 'unordered?', 'when-zero', 'zero?'), suffix=r'\s') builtin_sequences = words( ('1sequence', '2all?', '2each', '2map', '2map-as', '2map-reduce', '2reduce', '2selector', '2sequence', '3append', '3append-as', '3each', '3map', '3map-as', '3sequence', '4sequence', '<repetition>', '<reversed>', '<slice>', '?first', '?last', '?nth', '?second', '?set-nth', 'accumulate', 'accumulate!', 'accumulate-as', 'all?', 'any?', 'append', 'append!', 'append-as', 'assert-sequence', 'assert-sequence=', 'assert-sequence?', 'binary-reduce', 'bounds-check', 'bounds-check?', 'bounds-error', 'bounds-error?', 'but-last', 'but-last-slice', 'cartesian-each', 'cartesian-map', 'cartesian-product', 'change-nth', 'check-slice', 'check-slice-error', 'clone-like', 'collapse-slice', 'collector', 'collector-for', 'concat', 'concat-as', 'copy', 'count', 'cut', 'cut-slice', 'cut*', 'delete-all', 'delete-slice', 'drop-prefix', 'each', 'each-from', 'each-index', 'empty?', 'exchange', 'filter', 'filter!', 'filter-as', 'find', 'find-from', 'find-index', 'find-index-from', 'find-last', 'find-last-from', 'first', 'first2', 'first3', 'first4', 'flip', 'follow', 'fourth', 'glue', 'halves', 'harvest', 'head', 'head-slice', 'head-slice*', 'head*', 'head?', 'if-empty', 'immutable', 'immutable-sequence', 'immutable-sequence?', 'immutable?', 'index', 'index-from', 'indices', 'infimum', 'infimum-by', 'insert-nth', 'interleave', 'iota', 'iota-tuple', 'iota-tuple?', 'join', 'join-as', 'last', 'last-index', 'last-index-from', 'length', 'lengthen', 'like', 'longer', 'longer?', 'longest', 'map', 'map!', 'map-as', 'map-find', 'map-find-last', 'map-index', 'map-integers', 'map-reduce', 'map-sum', 'max-length', 'member-eq?', 'member?', 'midpoint@', 'min-length', 'mismatch', 'move', 'new-like', 'new-resizable', 'new-sequence', 'non-negative-integer-expected', 'non-negative-integer-expected?', 'nth', 'nths', 'pad-head', 'pad-tail', 'padding', 'partition', 'pop', 'pop*', 'prefix', 'prepend', 'prepend-as', 'produce', 'produce-as', 'product', 'push', 'push-all', 'push-either', 'push-if', 'reduce', 'reduce-index', 'remove', 'remove!', 'remove-eq', 'remove-eq!', 'remove-nth', 'remove-nth!', 'repetition', 'repetition?', 'replace-slice', 'replicate', 'replicate-as', 'rest', 'rest-slice', 'reverse', 'reverse!', 'reversed', 'reversed?', 'second', 'selector', 'selector-for', 'sequence', 'sequence-hashcode', 'sequence=', 'sequence?', 'set-first', 'set-fourth', 'set-last', 'set-length', 'set-nth', 'set-second', 'set-third', 'short', 'shorten', 'shorter', 'shorter?', 'shortest', 'sift', 'slice', 'slice-error', 'slice-error?', 'slice?', 'snip', 'snip-slice', 'start', 'start*', 'subseq', 'subseq?', 'suffix', 'suffix!', 'sum', 'sum-lengths', 'supremum', 'supremum-by', 'surround', 'tail', 'tail-slice', 'tail-slice*', 'tail*', 'tail?', 'third', 'trim', 'trim-head', 'trim-head-slice', 'trim-slice', 'trim-tail', 'trim-tail-slice', 'unclip', 'unclip-last', 'unclip-last-slice', 'unclip-slice', 'unless-empty', 'virtual-exemplar', 'virtual-sequence', 'virtual-sequence?', 'virtual@', 'when-empty'), suffix=r'\s') builtin_namespaces = words( ('+@', 'change', 'change-global', 'counter', 'dec', 'get', 'get-global', 'global', 'inc', 'init-namespaces', 'initialize', 'is-global', 'make-assoc', 'namespace', 'namestack', 'off', 'on', 'set', 'set-global', 'set-namestack', 'toggle', 'with-global', 'with-scope', 'with-variable', 'with-variables'), suffix=r'\s') builtin_arrays = words( ('1array', '2array', '3array', '4array', '<array>', '>array', 'array', 'array?', 'pair', 'pair?', 'resize-array'), suffix=r'\s') builtin_io = words( ('(each-stream-block-slice)', '(each-stream-block)', '(stream-contents-by-block)', '(stream-contents-by-element)', '(stream-contents-by-length-or-block)', '(stream-contents-by-length)', '+byte+', '+character+', 'bad-seek-type', 'bad-seek-type?', 'bl', 'contents', 'each-block', 'each-block-size', 'each-block-slice', 'each-line', 'each-morsel', 'each-stream-block', 'each-stream-block-slice', 'each-stream-line', 'error-stream', 'flush', 'input-stream', 'input-stream?', 'invalid-read-buffer', 'invalid-read-buffer?', 'lines', 'nl', 'output-stream', 'output-stream?', 'print', 'read', 'read-into', 'read-partial', 'read-partial-into', 'read-until', 'read1', 'readln', 'seek-absolute', 'seek-absolute?', 'seek-end', 'seek-end?', 'seek-input', 'seek-output', 'seek-relative', 'seek-relative?', 'stream-bl', 'stream-contents', 'stream-contents*', 'stream-copy', 'stream-copy*', 'stream-element-type', 'stream-flush', 'stream-length', 'stream-lines', 'stream-nl', 'stream-print', 'stream-read', 'stream-read-into', 'stream-read-partial', 'stream-read-partial-into', 'stream-read-partial-unsafe', 'stream-read-unsafe', 'stream-read-until', 'stream-read1', 'stream-readln', 'stream-seek', 'stream-seekable?', 'stream-tell', 'stream-write', 'stream-write1', 'tell-input', 'tell-output', 'with-error-stream', 'with-error-stream*', 'with-error>output', 'with-input-output+error-streams', 'with-input-output+error-streams*', 'with-input-stream', 'with-input-stream*', 'with-output-stream', 'with-output-stream*', 'with-output>error', 'with-output+error-stream', 'with-output+error-stream*', 'with-streams', 'with-streams*', 'write', 'write1'), suffix=r'\s') builtin_strings = words(('1string', '<string>', '>string', 'resize-string', 'string', 'string?'), suffix=r'\s') builtin_vectors = words( ('1vector', '<vector>', '>vector', '?push', 'vector', 'vector?'), suffix=r'\s') builtin_continuations = words( ('<condition>', '<continuation>', '<restart>', 'attempt-all', 'attempt-all-error', 'attempt-all-error?', 'callback-error-hook', 'callcc0', 'callcc1', 'cleanup', 'compute-restarts', 'condition', 'condition?', 'continuation', 'continuation?', 'continue', 'continue-restart', 'continue-with', 'current-continuation', 'error', 'error-continuation', 'error-in-thread', 'error-thread', 'ifcc', 'ignore-errors', 'in-callback?', 'original-error', 'recover', 'restart', 'restart?', 'restarts', 'rethrow', 'rethrow-restarts', 'return', 'return-continuation', 'thread-error-hook', 'throw-continue', 'throw-restarts', 'with-datastack', 'with-return'), suffix=r'\s') tokens = { 'root': [ # factor allows a file to start with a shebang (r'#!.*$', Comment.Preproc), default('base'), ], 'base': [ (r'\s+', Text), # defining words (r'((?:MACRO|MEMO|TYPED)?:[:]?)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(M:[:]?)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Function)), (r'(C:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Class)), (r'(GENERIC:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(HOOK:|GENERIC#)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Function)), (r'\(\s', Name.Function, 'stackeffect'), (r';\s', Keyword), # imports and namespaces (r'(USING:)(\s+)', bygroups(Keyword.Namespace, Text), 'vocabs'), (r'(USE:|UNUSE:|IN:|QUALIFIED:)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Namespace)), (r'(QUALIFIED-WITH:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text, Name.Namespace)), (r'(FROM:|EXCLUDE:)(\s+)(\S+)(\s+=>\s)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text), 'words'), (r'(RENAME:)(\s+)(\S+)(\s+)(\S+)(\s+=>\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Namespace, Text, Name.Function)), (r'(ALIAS:|TYPEDEF:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function)), (r'(DEFER:|FORGET:|POSTPONE:)(\s+)(\S+)', bygroups(Keyword.Namespace, Text, Name.Function)), # tuples and classes (r'(TUPLE:|ERROR:)(\s+)(\S+)(\s+<\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class), 'slots'), (r'(TUPLE:|ERROR:|BUILTIN:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class), 'slots'), (r'(MIXIN:|UNION:|INTERSECTION:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'(PREDICATE:)(\s+)(\S+)(\s+<\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class)), (r'(C:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function, Text, Name.Class)), (r'(INSTANCE:)(\s+)(\S+)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class, Text, Name.Class)), (r'(SLOT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'(SINGLETON:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'SINGLETONS:', Keyword, 'classes'), # other syntax (r'(CONSTANT:|SYMBOL:|MAIN:|HELP:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), (r'SYMBOLS:\s', Keyword, 'words'), (r'SYNTAX:\s', Keyword), (r'ALIEN:\s', Keyword), (r'(STRUCT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), (r'(FUNCTION:)(\s+\S+\s+)(\S+)(\s+\(\s+[^\)]+\)\s)', bygroups(Keyword.Namespace, Text, Name.Function, Text)), (r'(FUNCTION-ALIAS:)(\s+)(\S+)(\s+\S+\s+)(\S+)(\s+\(\s+[^\)]+\)\s)', bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function, Text)), # vocab.private (r'(?:<PRIVATE|PRIVATE>)\s', Keyword.Namespace), # strings (r'"""\s+(?:.|\n)*?\s+"""', String), (r'"(?:\\\\|\\"|[^"])*"', String), (r'\S+"\s+(?:\\\\|\\"|[^"])*"', String), (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char), # comments (r'!\s+.*$', Comment), (r'#!\s+.*$', Comment), (r'/\*\s+(?:.|\n)*?\s\*/\s', Comment), # boolean constants (r'[tf]\s', Name.Constant), # symbols and literals (r'[\\$]\s+\S+', Name.Constant), (r'M\\\s+\S+\s+\S+', Name.Constant), # numbers (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s', Number), (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number), (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), (r'0b[01]+\s', Number.Bin), (r'0o[0-7]+\s', Number.Oct), (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), # keywords (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s', Keyword), # builtins (builtin_kernel, Name.Builtin), (builtin_assocs, Name.Builtin), (builtin_combinators, Name.Builtin), (builtin_math, Name.Builtin), (builtin_sequences, Name.Builtin), (builtin_namespaces, Name.Builtin), (builtin_arrays, Name.Builtin), (builtin_io, Name.Builtin), (builtin_strings, Name.Builtin), (builtin_vectors, Name.Builtin), (builtin_continuations, Name.Builtin), # everything else is text (r'\S+', Text), ], 'stackeffect': [ (r'\s+', Text), (r'\(\s+', Name.Function, 'stackeffect'), (r'\)\s', Name.Function, '#pop'), (r'--\s', Name.Function), (r'\S+', Name.Variable), ], 'slots': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)', bygroups(Text, Name.Variable, Text)), (r'\S+', Name.Variable), ], 'vocabs': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Namespace), ], 'classes': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Class), ], 'words': [ (r'\s+', Text), (r';\s', Keyword, '#pop'), (r'\S+', Name.Function), ], }
class NedLexer(RegexLexer): name = 'ned' filenames = ['*.ned'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(true|false)\b', Name.Builtin), (r'(<-->|-->|<--|\.\.)', Keyword), (r'(bool|double|int|xml)\b', Keyword.Type), (r'(inout|input|output)\b', Keyword.Type), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words(("channel", "channelinterface", "simple", "module", "network", "moduleinterface"), suffix=r'\b'), Keyword), (words( ("parameters", "gates", "types", "submodules", "connections"), suffix=r'\b'), Keyword), (words(("volatile", "allowunconnected", "extends", "for", "if", "import", "like", "package", "property"), suffix=r'\b'), Keyword), (words(("sizeof", "const", "default", "ask", "this", "index", "typename", "xmldoc"), suffix=r'\b'), Keyword), (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta", "binomial", "cauchy", "ceil", "chi_square", "cos", "erlang_k", "exp", "exponential", "fabs", "floor", "fmod", "gamma_d", "genk_exponential", "genk_intuniform", "genk_normal", "genk_truncnormal", "genk_uniform", "geometric", "hypergeometric", "hypot", "intuniform", "log", "log10", "lognormal", "max", "min", "negbinomial", "normal", "pareto_shifted", "poisson", "pow", "simTime", "sin", "sqrt", "student_t", "tan", "triang", "truncnormal", "uniform", "weibull", "xml", "xmldoc"), suffix=r'\b'), Name.Builtin), ('@[a-zA-Z_]\w*', Name.Builtin), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ] }
class CobolLexer(RegexLexer): """ Lexer for OpenCOBOL code. .. versionadded:: 1.6 """ name = 'COBOL' aliases = ['cobol'] filenames = ['*.cob', '*.COB', '*.cpy', '*.CPY'] mimetypes = ['text/x-cobol'] flags = re.IGNORECASE | re.MULTILINE # Data Types: by PICTURE and USAGE # Operators: **, *, +, -, /, <, >, <=, >=, =, <> # Logical (?): NOT, AND, OR # Reserved words: # http://opencobol.add1tocobol.com/#reserved-words # Intrinsics: # http://opencobol.add1tocobol.com/#does-opencobol-implement-any-intrinsic-functions tokens = { 'root': [ include('comment'), include('strings'), include('core'), include('nums'), (r'[a-z0-9]([\w\-]*[a-z0-9]+)?', Name.Variable), # (r'[\s]+', Text), (r'[ \t]+', Text), ], 'comment': [ (r'(^.{6}[*/].*\n|^.{6}|\*>.*\n)', Comment), ], 'core': [ # Figurative constants (r'(^|(?<=[^\w\-]))(ALL\s+)?' r'((ZEROES)|(HIGH-VALUE|LOW-VALUE|QUOTE|SPACE|ZERO)(S)?)' r'\s*($|(?=[^\w\-]))', Name.Constant), # Reserved words STATEMENTS and other bolds (words( ('ACCEPT', 'ADD', 'ALLOCATE', 'CALL', 'CANCEL', 'CLOSE', 'COMPUTE', 'CONFIGURATION', 'CONTINUE', 'DATA', 'DELETE', 'DISPLAY', 'DIVIDE', 'DIVISION', 'ELSE', 'END', 'END-ACCEPT', 'END-ADD', 'END-CALL', 'END-COMPUTE', 'END-DELETE', 'END-DISPLAY', 'END-DIVIDE', 'END-EVALUATE', 'END-IF', 'END-MULTIPLY', 'END-OF-PAGE', 'END-PERFORM', 'END-READ', 'END-RETURN', 'END-REWRITE', 'END-SEARCH', 'END-START', 'END-STRING', 'END-SUBTRACT', 'END-UNSTRING', 'END-WRITE', 'ENVIRONMENT', 'EVALUATE', 'EXIT', 'FD', 'FILE', 'FILE-CONTROL', 'FOREVER', 'FREE', 'GENERATE', 'GO', 'GOBACK', 'IDENTIFICATION', 'IF', 'INITIALIZE', 'INITIATE', 'INPUT-OUTPUT', 'INSPECT', 'INVOKE', 'I-O-CONTROL', 'LINKAGE', 'LOCAL-STORAGE', 'MERGE', 'MOVE', 'MULTIPLY', 'OPEN', 'PERFORM', 'PROCEDURE', 'PROGRAM-ID', 'RAISE', 'READ', 'RELEASE', 'RESUME', 'RETURN', 'REWRITE', 'SCREEN', 'SD', 'SEARCH', 'SECTION', 'SET', 'SORT', 'START', 'STOP', 'STRING', 'SUBTRACT', 'SUPPRESS', 'TERMINATE', 'THEN', 'UNLOCK', 'UNSTRING', 'USE', 'VALIDATE', 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Reserved), # Reserved words (words( ('ACCESS', 'ADDRESS', 'ADVANCING', 'AFTER', 'ALL', 'ALPHABET', 'ALPHABETIC', 'ALPHABETIC-LOWER', 'ALPHABETIC-UPPER', 'ALPHANUMERIC', 'ALPHANUMERIC-EDITED', 'ALSO', 'ALTER', 'ALTERNATE' 'ANY', 'ARE', 'AREA', 'AREAS', 'ARGUMENT-NUMBER', 'ARGUMENT-VALUE', 'AS', 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', 'AUTOTERMINATE', 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', 'BLANK', 'BLINK', 'BLOCK', 'BOTTOM', 'BY', 'BYTE-LENGTH', 'CHAINING', 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', 'COLLATING', 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', 'COMMIT', 'COMMON', 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', 'CONTROLS', 'CONVERTING', 'COPY', 'CORR', 'CORRESPONDING', 'COUNT', 'CRT', 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', 'DEBUGGING', 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', 'DELIMITER', 'DEPENDING', 'DESCENDING', 'DETAIL', 'DISK', 'DOWN', 'DUPLICATES', 'DYNAMIC', 'EBCDIC', 'ENTRY', 'ENVIRONMENT-NAME', 'ENVIRONMENT-VALUE', 'EOL', 'EOP', 'EOS', 'ERASE', 'ERROR', 'ESCAPE', 'EXCEPTION', 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', 'FILE-ID', 'FILLER', 'FINAL', 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', 'FUNCTION', 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', 'HEADING', 'HIGHLIGHT', 'I-O', 'ID', 'IGNORE', 'IGNORING', 'IN', 'INDEX', 'INDEXED', 'INDICATE', 'INITIAL', 'INITIALIZED', 'INPUT', 'INTO', 'INTRINSIC', 'INVALID', 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', 'LAST', 'LEADING', 'LEFT', 'LENGTH', 'LIMIT', 'LIMITS', 'LINAGE', 'LINAGE-COUNTER', 'LINE', 'LINES', 'LOCALE', 'LOCK', 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', 'MULTIPLE', 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', 'NEGATIVE', 'NEXT', 'NO', 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', 'NUMERIC-EDITED', 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', 'OPTIONAL', 'ORDER', 'ORGANIZATION', 'OTHER', 'OUTPUT', 'OVERFLOW', 'OVERLINE', 'PACKED-DECIMAL', 'PADDING', 'PAGE', 'PARAGRAPH', 'PLUS', 'POINTER', 'POSITION', 'POSITIVE', 'PRESENT', 'PREVIOUS', 'PRINTER', 'PRINTING', 'PROCEDURE-POINTER', 'PROCEDURES', 'PROCEED', 'PROGRAM', 'PROGRAM-POINTER', 'PROMPT', 'QUOTE', 'QUOTES', 'RANDOM', 'RD', 'RECORD', 'RECORDING', 'RECORDS', 'RECURSIVE', 'REDEFINES', 'REEL', 'REFERENCE', 'RELATIVE', 'REMAINDER', 'REMOVAL', 'RENAMES', 'REPLACING', 'REPORT', 'REPORTING', 'REPORTS', 'REPOSITORY', 'REQUIRED', 'RESERVE', 'RETURNING', 'REVERSE-VIDEO', 'REWIND', 'RIGHT', 'ROLLBACK', 'ROUNDED', 'RUN', 'SAME', 'SCROLL', 'SECURE', 'SEGMENT-LIMIT', 'SELECT', 'SENTENCE', 'SEPARATE', 'SEQUENCE', 'SEQUENTIAL', 'SHARING', 'SIGN', 'SIGNED', 'SIGNED-INT', 'SIGNED-LONG', 'SIGNED-SHORT', 'SIZE', 'SORT-MERGE', 'SOURCE', 'SOURCE-COMPUTER', 'SPECIAL-NAMES', 'STANDARD', 'STANDARD-1', 'STANDARD-2', 'STATUS', 'SUM', 'SYMBOLIC', 'SYNC', 'SYNCHRONIZED', 'TALLYING', 'TAPE', 'TEST', 'THROUGH', 'THRU', 'TIME', 'TIMES', 'TO', 'TOP', 'TRAILING', 'TRANSFORM', 'TYPE', 'UNDERLINE', 'UNIT', 'UNSIGNED', 'UNSIGNED-INT', 'UNSIGNED-LONG', 'UNSIGNED-SHORT', 'UNTIL', 'UP', 'UPDATE', 'UPON', 'USAGE', 'USING', 'VALUE', 'VALUES', 'VARYING', 'WAIT', 'WHEN', 'WITH', 'WORDS', 'YYYYDDD', 'YYYYMMDD'), prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Pseudo), # inactive reserved words (words( ('ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', 'B-AND', 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', 'CF', 'CH', 'CHAIN', 'CLASS-ID', 'CLASSIFICATION', 'COMMUNICATION', 'CONDITION', 'DATA-POINTER', 'DESTINATION', 'DISABLE', 'EC', 'EGI', 'EMI', 'ENABLE', 'END-RECEIVE', 'ENTRY-CONVENTION', 'EO', 'ESI', 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', 'FLOAT-BINARY-16', 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', 'FLOAT-DECIMAL-16', 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', 'FUNCTION-POINTER', 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', 'INHERITS', 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', 'LC_CTYPE', 'LC_MESSAGES', 'LC_MONETARY', 'LC_NUMERIC', 'LC_TIME', 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', 'NORMAL', 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', 'PAGE-COUNTER', 'PF', 'PH', 'PROPERTY', 'PROTOTYPE', 'PURGE', 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', 'RELATION', 'REPLACE', 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', 'RF', 'RH', 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', 'STEP', 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', 'SUPER', 'SYMBOL', 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', 'TYPEDEF', 'UCS-4', 'UNIVERSAL', 'USER-DEFAULT', 'UTF-16', 'UTF-8', 'VAL-STATUS', 'VALID', 'VALIDATE', 'VALIDATE-STATUS'), prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Error), # Data Types (r'(^|(?<=[^\w\-]))' r'(PIC\s+.+?(?=(\s|\.\s))|PICTURE\s+.+?(?=(\s|\.\s))|' r'(COMPUTATIONAL)(-[1-5X])?|(COMP)(-[1-5X])?|' r'BINARY-C-LONG|' r'BINARY-CHAR|BINARY-DOUBLE|BINARY-LONG|BINARY-SHORT|' r'BINARY)\s*($|(?=[^\w\-]))', Keyword.Type), # Operators (r'(\*\*|\*|\+|-|/|<=|>=|<|>|==|/=|=)', Operator), # (r'(::)', Keyword.Declaration), (r'([(),;:&%.])', Punctuation), # Intrinsics (r'(^|(?<=[^\w\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' r'CHAR|COMBINED-DATETIME|CONCATENATE|COS|CURRENT-DATE|' r'DATE-OF-INTEGER|DATE-TO-YYYYMMDD|DAY-OF-INTEGER|DAY-TO-YYYYDDD|' r'EXCEPTION-(?:FILE|LOCATION|STATEMENT|STATUS)|EXP10|EXP|E|' r'FACTORIAL|FRACTION-PART|INTEGER-OF-(?:DATE|DAY|PART)|INTEGER|' r'LENGTH|LOCALE-(?:DATE|TIME(?:-FROM-SECONDS)?)|LOG(?:10)?|' r'LOWER-CASE|MAX|MEAN|MEDIAN|MIDRANGE|MIN|MOD|NUMVAL(?:-C)?|' r'ORD(?:-MAX|-MIN)?|PI|PRESENT-VALUE|RANDOM|RANGE|REM|REVERSE|' r'SECONDS-FROM-FORMATTED-TIME|SECONDS-PAST-MIDNIGHT|SIGN|SIN|SQRT|' r'STANDARD-DEVIATION|STORED-CHAR-LENGTH|SUBSTITUTE(?:-CASE)?|' r'SUM|TAN|TEST-DATE-YYYYMMDD|TEST-DAY-YYYYDDD|TRIM|' r'UPPER-CASE|VARIANCE|WHEN-COMPILED|YEAR-TO-YYYY)\s*' r'($|(?=[^\w\-]))', Name.Function), # Booleans (r'(^|(?<=[^\w\-]))(true|false)\s*($|(?=[^\w\-]))', Name.Builtin), # Comparing Operators (r'(^|(?<=[^\w\-]))(equal|equals|ne|lt|le|gt|ge|' r'greater|less|than|not|and|or)\s*($|(?=[^\w\-]))', Operator.Word ), ], # \"[^\"\n]*\"|\'[^\'\n]*\' 'strings': [ # apparently strings can be delimited by EOL if they are continued # in the next line (r'"[^"\n]*("|\n)', String.Double), (r"'[^'\n]*('|\n)", String.Single), ], 'nums': [ (r'\d+(\s*|\.$|$)', Number.Integer), (r'[+-]?\d*\.\d+(E[-+]?\d+)?', Number.Float), (r'[+-]?\d+\.\d*(E[-+]?\d+)?', Number.Float), ], }
class MonteLexer(RegexLexer): """ Lexer for the `Monte <https://monte.readthedocs.io/>`_ programming language. .. versionadded:: 2.2 """ name = 'Monte' aliases = ['monte'] filenames = ['*.mt'] tokens = { 'root': [ # Comments (r'#[^\n]*\n', Comment), # Docstrings # Apologies for the non-greedy matcher here. (r'/\*\*.*?\*/', String.Doc), # `var` declarations (r'\bvar\b', Keyword.Declaration, 'var'), # `interface` declarations (r'\binterface\b', Keyword.Declaration, 'interface'), # method declarations (words(_methods, prefix='\\b', suffix='\\b'), Keyword, 'method'), # All other declarations (words(_declarations, prefix='\\b', suffix='\\b'), Keyword.Declaration), # Keywords (words(_keywords, prefix='\\b', suffix='\\b'), Keyword), # Literals ('[+-]?0x[_0-9a-fA-F]+', Number.Hex), (r'[+-]?[_0-9]+\.[_0-9]*([eE][+-]?[_0-9]+)?', Number.Float), ('[+-]?[_0-9]+', Number.Integer), ("'", String.Double, 'char'), ('"', String.Double, 'string'), # Quasiliterals ('`', String.Backtick, 'ql'), # Operators (words(_operators), Operator), # Verb operators (_identifier + '=', Operator.Word), # Safe scope constants (words(_constants, prefix='\\b', suffix='\\b'), Keyword.Pseudo), # Safe scope guards (words(_guards, prefix='\\b', suffix='\\b'), Keyword.Type), # All other safe scope names (words(_safeScope, prefix='\\b', suffix='\\b'), Name.Builtin), # Identifiers (_identifier, Name), # Punctuation (r'\(|\)|\{|\}|\[|\]|:|,', Punctuation), # Whitespace (' +', Whitespace), # Definite lexer errors ('=', Error), ], 'char': [ # It is definitely an error to have a char of width == 0. ("'", Error, 'root'), (_escape_pattern, String.Escape, 'charEnd'), ('.', String.Char, 'charEnd'), ], 'charEnd': [ ("'", String.Char, '#pop:2'), # It is definitely an error to have a char of width > 1. ('.', Error), ], # The state of things coming into an interface. 'interface': [ (' +', Whitespace), (_identifier, Name.Class, '#pop'), include('root'), ], # The state of things coming into a method. 'method': [ (' +', Whitespace), (_identifier, Name.Function, '#pop'), include('root'), ], 'string': [ ('"', String.Double, 'root'), (_escape_pattern, String.Escape), (r'\n', String.Double), ('.', String.Double), ], 'ql': [ ('`', String.Backtick, 'root'), (r'\$' + _escape_pattern, String.Escape), (r'\$\$', String.Escape), (r'@@', String.Escape), (r'\$\{', String.Interpol, 'qlNest'), (r'@\{', String.Interpol, 'qlNest'), (r'\$' + _identifier, Name), ('@' + _identifier, Name), ('.', String.Backtick), ], 'qlNest': [ (r'\}', String.Interpol, '#pop'), include('root'), ], # The state of things immediately following `var`. 'var': [ (' +', Whitespace), (_identifier, Name.Variable, '#pop'), include('root'), ], }
class VerilogLexer(RegexLexer): """ For verilog source code with preprocessor directives. .. versionadded:: 1.4 """ name = 'verilog' aliases = ['verilog', 'v'] filenames = ['*.v'] mimetypes = ['text/x-verilog'] # : optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'root': [ (r'^\s*`define', Comment.Preproc, 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), (r'[{}#@]', Punctuation), (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'([0-9]+)|(\'h)[0-9a-fA-F]+', Number.Hex), (r'([0-9]+)|(\'b)[01]+', Number.Bin), (r'([0-9]+)|(\'d)[0-9]+', Number.Integer), (r'([0-9]+)|(\'o)[0-7]+', Number.Oct), (r'\'[01xz]', Number), (r'\d+[Ll]?', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.;\']', Punctuation), (r'`[a-zA-Z_]\w*', Name.Constant), (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)), (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), 'import'), (words( ('always', 'always_comb', 'always_ff', 'always_latch', 'and', 'assign', 'automatic', 'begin', 'break', 'buf', 'bufif0', 'bufif1', 'case', 'casex', 'casez', 'cmos', 'const', 'continue', 'deassign', 'default', 'defparam', 'disable', 'do', 'edge', 'else', 'end', 'endcase', 'endfunction', 'endgenerate', 'endmodule', 'endpackage', 'endprimitive', 'endspecify', 'endtable', 'endtask', 'enum', 'event', 'final', 'for', 'force', 'forever', 'fork', 'function', 'generate', 'genvar', 'highz0', 'highz1', 'if', 'initial', 'inout', 'input', 'integer', 'join', 'large', 'localparam', 'macromodule', 'medium', 'module', 'nand', 'negedge', 'nmos', 'nor', 'not', 'notif0', 'notif1', 'or', 'output', 'packed', 'parameter', 'pmos', 'posedge', 'primitive', 'pull0', 'pull1', 'pulldown', 'pullup', 'rcmos', 'ref', 'release', 'repeat', 'return', 'rnmos', 'rpmos', 'rtran', 'rtranif0', 'rtranif1', 'scalared', 'signed', 'small', 'specify', 'specparam', 'strength', 'string', 'strong0', 'strong1', 'struct', 'table', 'task', 'tran', 'tranif0', 'tranif1', 'type', 'typedef', 'unsigned', 'var', 'vectored', 'void', 'wait', 'weak0', 'weak1', 'while', 'xnor', 'xor'), suffix=r'\b'), Keyword), (words(('accelerate', 'autoexpand_vectornets', 'celldefine', 'default_nettype', 'else', 'elsif', 'endcelldefine', 'endif', 'endprotect', 'endprotected', 'expand_vectornets', 'ifdef', 'ifndef', 'include', 'noaccelerate', 'noexpand_vectornets', 'noremove_gatenames', 'noremove_netnames', 'nounconnected_drive', 'protect', 'protected', 'remove_gatenames', 'remove_netnames', 'resetall', 'timescale', 'unconnected_drive', 'undef'), prefix=r'`', suffix=r'\b'), Comment.Preproc), (words(('bits', 'bitstoreal', 'bitstoshortreal', 'countdrivers', 'display', 'fclose', 'fdisplay', 'finish', 'floor', 'fmonitor', 'fopen', 'fstrobe', 'fwrite', 'getpattern', 'history', 'incsave', 'input', 'itor', 'key', 'list', 'log', 'monitor', 'monitoroff', 'monitoron', 'nokey', 'nolog', 'printtimescale', 'random', 'readmemb', 'readmemh', 'realtime', 'realtobits', 'reset', 'reset_count', 'reset_value', 'restart', 'rtoi', 'save', 'scale', 'scope', 'shortrealtobits', 'showscopes', 'showvariables', 'showvars', 'sreadmemb', 'sreadmemh', 'stime', 'stop', 'strobe', 'time', 'timeformat', 'write'), prefix=r'\$', suffix=r'\b'), Name.Builtin), (words(('byte', 'shortint', 'int', 'longint', 'integer', 'time', 'bit', 'logic', 'reg', 'supply0', 'supply1', 'tri', 'triand', 'trior', 'tri0', 'tri1', 'trireg', 'uwire', 'wire', 'wand', 'wo' 'shortreal', 'real', 'realtime'), suffix=r'\b'), Keyword.Type), ('[a-zA-Z_]\w*:(?!:)', Name.Label), ('[a-zA-Z_]\w*', Name), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'import': [(r'[\w:]+\*?', Name.Namespace, '#pop')] } def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): # Convention: mark all upper case names as constants if token is Name: if value.isupper(): token = Name.Constant yield index, token, value
class FireLexer(CFamilyLexer): """ For `Fire Script <https://github.com/rbrich/xcikit>`_. .. versionadded:: 2.0 """ name = 'Fire' aliases = ('fire', ) filenames = ('*.fire', ) mimetypes = ('text/x-fire-script', ) flags = re.MULTILINE | re.UNICODE tokens = { 'keywords': [(words(('void', 'false', 'true'), suffix=r'\b'), Keyword.Constant), (words(('stdin', 'stdout', 'stderr', 'null'), suffix=r'\b'), Name.Builtin), (words(( 'catch', 'class', 'else', 'fun', 'if', 'import', 'instance', 'in', 'match', 'module', 'then', 'try', 'type', 'with', ), suffix=r'\b'), Keyword.Reserved)], 'types': [(words(('Void', 'Bool', 'Byte', 'Char', 'Int', 'Int32', 'Int64', 'Float', 'Float32', 'Float64', 'String'), suffix=r'\b'), Keyword.Type)], 'raw_string': [ (r'"""', String.Double, '#pop'), (r'\\"""+', String.Escape), (r'\\', String.Double), # backslash must be parsed one at a time (r'[^\\"]+', String.Double), # other characters, including newlines ], 'statements': [ include('keywords'), include('types'), (r'(b?)(""")', bygroups(String.Affix, String.Double), 'raw_string'), (r'(b?)(")', bygroups(String.Affix, String.Double), 'string'), (r"(b?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUuBb]*', Number.Hex), (r'0o[0-7]+[LlUuBb]*', Number.Oct), (r'0b[01]+[LlUuBb]*', Number.Bin), (r'\d+[LlUuBb]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (r'[a-z_]\w*', Name), (r'[A-Z]\w*', Name.Class), ], }
class HsailLexer(RegexLexer): """ For HSAIL assembly code. .. versionadded:: 2.2 """ name = 'HSAIL' aliases = ['hsail', 'hsa'] filenames = ['*.hsail'] mimetypes = ['text/x-hsail'] string = r'"[^"]*?"' identifier = r'[a-zA-Z_][\w.]*' # Registers register_number = r'[0-9]+' register = r'(\$(c|s|d|q)' + register_number + ')' # Qualifiers alignQual = r'(align\(\d+\))' widthQual = r'(width\((\d+|all)\))' allocQual = r'(alloc\(agent\))' # Instruction Modifiers roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))') datatypeMod = ( r'_(' # packedTypes r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|' r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|' r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|' # baseTypes r'u8|s8|u16|s16|u32|s32|u64|s64|' r'b128|b8|b16|b32|b64|b1|' r'f16|f32|f64|' # opaqueType r'roimg|woimg|rwimg|samp|sig32|sig64)') # Numeric Constant float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+' hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+' ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})' tokens = { 'root': [ include('whitespace'), include('comments'), (string, String), (r'@' + identifier + ':?', Name.Label), (register, Name.Variable.Anonymous), include('keyword'), (r'&' + identifier, Name.Variable.Global), (r'%' + identifier, Name.Variable), (hexfloat, Number.Hex), (r'0[xX][a-fA-F0-9]+', Number.Hex), (ieeefloat, Number.Float), (float, Number.Float), (r'\d+', Number.Integer), (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation) ], 'whitespace': [ (r'(\n|\s)+', Text), ], 'comments': [ (r'/\*.*?\*/', Comment.Multiline), (r'//.*?\n', Comment.Single), ], 'keyword': [ # Types (r'kernarg' + datatypeMod, Keyword.Type), # Regular keywords (r'\$(full|base|small|large|default|zero|near)', Keyword), (words(('module', 'extension', 'pragma', 'prog', 'indirect', 'signature', 'decl', 'kernel', 'function', 'enablebreakexceptions', 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize', 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize', 'requiredworkgroupsize', 'requirenopartialworkgroups'), suffix=r'\b'), Keyword), # instructions (roundingMod, Keyword), (datatypeMod, Keyword), (r'_(' + alignQual + '|' + widthQual + ')', Keyword), (r'_kernarg', Keyword), (r'(nop|imagefence)\b', Keyword), (words( ('cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim', 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid', 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid', 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev', 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos', 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt', 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid', 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor', 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign', 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi', 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect', 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul', 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert', 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt', 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st', '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu', '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt', '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu', '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add', '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec', '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global', 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave', '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4', '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth', '_width', '_height', '_depth', '_array', '_channelorder', '_channeltype', 'querysampler', '_coord', '_filter', '_addressing', 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar', 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid', 'activelanecount', 'activelanemask', 'activelanepermute', 'call', 'scall', 'icall', 'alloca', 'packetcompletionsig', 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex', 'stqueuereadindex', 'readonly', 'global', 'private', 'group', 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat', '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni', '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat', '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat', '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword), # Integer types (r'i[1-9]\d*', Keyword) ] }
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words( ('@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation ('([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), ('([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [('([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), ('([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop')], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods ( r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
class LlvmMirBodyLexer(RegexLexer): """ For LLVM MIR examples without the YAML wrapper. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR Body' aliases = ['llvm-mir-body'] filenames = [] mimetypes = [] tokens = { 'root': [ # Attributes on basic blocks (words(('liveins', 'successors'), suffix=':'), Keyword), # Basic Block Labels (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label), (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label), (r'%bb\.[0-9]+(\.\w+)?', Name.Label), # Stack references (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), # Subreg indices (r'%subreg\.\w+', Name), # Virtual registers (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), # Reference to LLVM-IR global include('global'), # Reference to Intrinsic (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), # Comparison predicates (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge', 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'), Name.Builtin), # Physical registers (r'\$\w+', String.Single), # Assignment operator (r'=', Operator), # gMIR Opcodes (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|' r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|' r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|' r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|' r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|' r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|' r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|' r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|' r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|' r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|' r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|' r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|' r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|' r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|' r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|' r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|' r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|' r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|' r'FSUB)' r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|' r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|' r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|' r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|' r'G_SHUFFLE_VECTOR)\b', Name.Builtin), # Target independent opcodes (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b', Name.Builtin), # Flags (words(('killed', 'implicit')), Keyword), # ConstantInt values (r'i[0-9]+ +', Keyword.Type, 'constantint'), # ConstantFloat values (r'(half|float|double) +', Keyword.Type, 'constantfloat'), # Bare immediates include('integer'), # MMO's (r':: *', Operator, 'mmo'), # MIR Comments (r';.*', Comment), # If we get here, assume it's a target instruction (r'[a-zA-Z0-9_]+', Name), # Everything else that isn't highlighted (r'[(), \n]+', Text), ], # The integer constant from a ConstantInt value 'constantint': [ include('integer'), (r'(?=.)', Text, '#pop'), ], # The floating point constant from a ConstantFloat value 'constantfloat': [ include('float'), (r'(?=.)', Text, '#pop'), ], 'vreg': [ # The bank or class if there is one (r' *:(?!:)', Keyword, ('#pop', 'vreg_bank_or_class')), # The LLT if there is one (r' *\(', Text, 'vreg_type'), (r'(?=.)', Text, '#pop'), ], 'vreg_bank_or_class': [ # The unassigned bank/class (r' *_', Name.Variable.Magic), (r' *[a-zA-Z0-9_]+', Name.Variable), # The LLT if there is one (r' *\(', Text, 'vreg_type'), (r'(?=.)', Text, '#pop'), ], 'vreg_type': [ # Scalar and pointer types (r' *[sp][0-9]+', Keyword.Type), (r' *<[0-9]+ *x *[sp][0-9]+>', Keyword.Type), (r'\)', Text, '#pop'), (r'(?=.)', Text, '#pop'), ], 'mmo': [ (r'\(', Text), (r' +', Text), (words( ('load', 'store', 'on', 'into', 'from', 'align', 'monotonic', 'acquire', 'release', 'acq_rel', 'seq_cst')), Keyword), # IR references (r'%ir\.[a-zA-Z0-9_.-]+', Name), (r'%ir-block\.[a-zA-Z0-9_.-]+', Name), (r'[-+]', Operator), include('integer'), include('global'), (r',', Punctuation), (r'\), \(', Text), (r'\)', Text, '#pop'), ], 'integer': [ (r'-?[0-9]+', Number.Integer), ], 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)], }
class WhileyLexer(RegexLexer): """ Lexer for the Whiley programming language. .. versionadded:: 2.2 """ name = 'Whiley' filenames = ['*.whiley'] aliases = ['whiley'] mimetypes = ['text/x-whiley'] # See the language specification: # http://whiley.org/download/WhileyLanguageSpec.pdf tokens = { 'root': [ # Whitespace (r'\s+', Text), # Comments (r'//.*', Comment.Single), # don't parse empty comment as doc comment (r'/\*\*/', Comment.Multiline), (r'(?s)/\*\*.*?\*/', String.Doc), (r'(?s)/\*.*?\*/', Comment.Multiline), # Keywords (words(('if', 'else', 'while', 'for', 'do', 'return', 'switch', 'case', 'default', 'break', 'continue', 'requires', 'ensures', 'where', 'assert', 'assume', 'all', 'no', 'some', 'in', 'is', 'new', 'throw', 'try', 'catch', 'debug', 'skip', 'fail', 'finite', 'total'), suffix=r'\b'), Keyword.Reserved), (words(('function', 'method', 'public', 'private', 'protected', 'export', 'native'), suffix=r'\b'), Keyword.Declaration), # "constant" & "type" are not keywords unless used in declarations (r'(constant|type)(\s+)([a-zA-Z_]\w*)(\s+)(is)\b', bygroups(Keyword.Declaration, Text, Name, Text, Keyword.Reserved)), (r'(true|false|null)\b', Keyword.Constant), (r'(bool|byte|int|real|any|void)\b', Keyword.Type), # "from" is not a keyword unless used with import (r'(import)(\s+)(\*)([^\S\n]+)(from)\b', bygroups(Keyword.Namespace, Text, Punctuation, Text, Keyword.Namespace)), (r'(import)(\s+)([a-zA-Z_]\w*)([^\S\n]+)(from)\b', bygroups(Keyword.Namespace, Text, Name, Text, Keyword.Namespace)), (r'(package|import)\b', Keyword.Namespace), # standard library: https://github.com/Whiley/WhileyLibs/ ( words( ( # types defined in whiley.lang.Int 'i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64', 'uint', 'nat', # whiley.lang.Any 'toString'), suffix=r'\b'), Name.Builtin), # byte literal (r'[01]+b', Number.Bin), # decimal literal (r'[0-9]+\.[0-9]+', Number.Float), # match "1." but not ranges like "3..5" (r'[0-9]+\.(?!\.)', Number.Float), # integer literal (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), # character literal (r"""'[^\\]'""", String.Char), (r"""(')(\\['"\\btnfr])(')""", bygroups(String.Char, String.Escape, String.Char)), # string literal (r'"', String, 'string'), # operators and punctuation (r'[{}()\[\],.;]', Punctuation), ( r'[+\-*/%&|<>^!~@=:?' # unicode operators r'\u2200\u2203\u2205\u2282\u2286\u2283\u2287' r'\u222A\u2229\u2264\u2265\u2208\u2227\u2228' r']', Operator), # identifier (r'[a-zA-Z_]\w*', Name), ], 'string': [ (r'"', String, '#pop'), (r'\\[btnfr]', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), (r'\\.', String), (r'[^\\"]+', String), ], }
class CoqLexer(RegexLexer): """ For the `Coq <http://coq.inria.fr/>`_ theorem prover. .. versionadded:: 1.5 """ name = 'Coq' aliases = ['coq'] filenames = ['*.v'] mimetypes = ['text/x-coq'] keywords1 = ( # Vernacular commands 'Section', 'Module', 'End', 'Require', 'Import', 'Export', 'Variable', 'Variables', 'Parameter', 'Parameters', 'Axiom', 'Hypothesis', 'Hypotheses', 'Notation', 'Local', 'Tactic', 'Reserved', 'Scope', 'Open', 'Close', 'Bind', 'Delimit', 'Definition', 'Let', 'Ltac', 'Fixpoint', 'CoFixpoint', 'Morphism', 'Relation', 'Implicit', 'Arguments', 'Set', 'Unset', 'Contextual', 'Strict', 'Prenex', 'Implicits', 'Inductive', 'CoInductive', 'Record', 'Structure', 'Canonical', 'Coercion', 'Theorem', 'Lemma', 'Corollary', 'Proposition', 'Fact', 'Remark', 'Example', 'Proof', 'Goal', 'Save', 'Qed', 'Defined', 'Hint', 'Resolve', 'Rewrite', 'View', 'Search', 'Show', 'Print', 'Printing', 'All', 'Graph', 'Projections', 'inside', 'outside', 'Check', 'Global', 'Instance', 'Class', 'Existing', 'Universe', 'Polymorphic', 'Monomorphic', 'Context') keywords2 = ( # Gallina 'forall', 'exists', 'exists2', 'fun', 'fix', 'cofix', 'struct', 'match', 'end', 'in', 'return', 'let', 'if', 'is', 'then', 'else', 'for', 'of', 'nosimpl', 'with', 'as', ) keywords3 = ( # Sorts 'Type', 'Prop', ) keywords4 = ( # Tactics 'pose', 'set', 'move', 'case', 'elim', 'apply', 'clear', 'hnf', 'intro', 'intros', 'generalize', 'rename', 'pattern', 'after', 'destruct', 'induction', 'using', 'refine', 'inversion', 'injection', 'rewrite', 'congr', 'unlock', 'compute', 'ring', 'field', 'replace', 'fold', 'unfold', 'change', 'cutrewrite', 'simpl', 'have', 'suff', 'wlog', 'suffices', 'without', 'loss', 'nat_norm', 'assert', 'cut', 'trivial', 'revert', 'bool_congr', 'nat_congr', 'symmetry', 'transitivity', 'auto', 'split', 'left', 'right', 'autorewrite', 'tauto', 'setoid_rewrite', 'intuition', 'eauto', 'eapply', 'econstructor', 'etransitivity', 'constructor', 'erewrite', 'red', 'cbv', 'lazy', 'vm_compute', 'native_compute', 'subst', ) keywords5 = ( # Terminators 'by', 'done', 'exact', 'reflexivity', 'tauto', 'romega', 'omega', 'assumption', 'solve', 'contradiction', 'discriminate', 'congruence', ) keywords6 = ( # Control 'do', 'last', 'first', 'try', 'idtac', 'repeat', ) # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', # 'downto', 'else', 'end', 'exception', 'external', 'false', # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', # 'type', 'val', 'virtual', 'when', 'while', 'with' keyopts = ( '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', '<-', '<->', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~', '=>', r'/\\', r'\\/', r'\{\|', r'\|\}', u'Π', u'λ', ) operators = r'[!$%&*+\./:<=>?@^|~-]' prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' primitives = ('unit', 'nat', 'bool', 'string', 'ascii', 'list') tokens = { 'root': [ (r'\s+', Text), (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), (r'\(\*', Comment, 'comment'), (words(keywords1, prefix=r'\b', suffix=r'\b'), Keyword.Namespace), (words(keywords2, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords3, prefix=r'\b', suffix=r'\b'), Keyword.Type), (words(keywords4, prefix=r'\b', suffix=r'\b'), Keyword), (words(keywords5, prefix=r'\b', suffix=r'\b'), Keyword.Pseudo), (words(keywords6, prefix=r'\b', suffix=r'\b'), Keyword.Reserved), # (r'\b([A-Z][\w\']*)(\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][\w\']*)', Name), (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), (r'0[oO][0-7][0-7_]*', Number.Oct), (r'0[bB][01][01_]*', Number.Bin), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name), ], 'comment': [ (r'[^(*)]+', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), (r'[(*)]', Comment), ], 'string': [ (r'[^"]+', String.Double), (r'""', String.Double), (r'"', String.Double, '#pop'), ], 'dotted': [(r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), default('#pop')], } def analyse_text(text): if text.startswith('(*'): return True
class GDScriptLexer(RegexLexer): """ For `Godot source code <https://www.godotengine.org>`_ source code. """ name = 'GDScript' aliases = ['gdscript', 'gd'] filenames = ['*.gd'] mimetypes = ['text/x-gdscript', 'application/x-gdscript'] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|$]', Operator), include('keywords'), (r'(func)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), include('builtins'), ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(('and', 'in', 'not', 'or', 'as', 'breakpoint', 'class', 'class_name', 'extends', 'is', 'func', 'setget', 'signal', 'tool', 'const', 'enum', 'export', 'onready', 'static', 'var', 'break', 'continue', 'if', 'elif', 'else', 'for', 'pass', 'return', 'match', 'while', 'remote', 'master', 'puppet', 'remotesync', 'mastersync', 'puppetsync'), suffix=r'\b'), Keyword), ], 'builtins': [ (words(('Color8', 'ColorN', 'abs', 'acos', 'asin', 'assert', 'atan', 'atan2', 'bytes2var', 'ceil', 'char', 'clamp', 'convert', 'cos', 'cosh', 'db2linear', 'decimals', 'dectime', 'deg2rad', 'dict2inst', 'ease', 'exp', 'floor', 'fmod', 'fposmod', 'funcref', 'hash', 'inst2dict', 'instance_from_id', 'is_inf', 'is_nan', 'lerp', 'linear2db', 'load', 'log', 'max', 'min', 'nearest_po2', 'pow', 'preload', 'print', 'print_stack', 'printerr', 'printraw', 'prints', 'printt', 'rad2deg', 'rand_range', 'rand_seed', 'randf', 'randi', 'randomize', 'range', 'round', 'seed', 'sign', 'sin', 'sinh', 'sqrt', 'stepify', 'str', 'str2var', 'tan', 'tan', 'tanh', 'type_exist', 'typeof', 'var2bytes', 'var2str', 'weakref', 'yield'), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), (r'((?<!\.)(self|false|true)|(PI|TAU|NAN|INF)' r')\b', Name.Builtin.Pseudo), (words(( 'bool', 'int', 'float', 'String', 'NodePath' 'Vector2', 'Rect2', 'Transform2D', 'Vector3', 'Rect3', 'Plane', 'Quat', 'Basis', 'Transform', 'Color', "RID", 'Object', 'NodePath', 'Dictionary', 'Array', 'PoolByteArray', 'PoolIntArray', 'PoolRealArray', 'PoolStringArray', 'PoolVector2Array', 'PoolVector3Array', 'PoolColorArray', 'null', ), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin.Type), ], 'numbers': [(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float), (r'\d+[eE][+-]?[0-9]+j?', Number.Float), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+j?', Number.Integer)], 'name': [ ('[a-zA-Z_]\w*', Name), ], 'funcname': [ ('[a-zA-Z_]\w*', Name.Function, '#pop'), default('#pop'), ], 'classname': [('[a-zA-Z_]\w*', Name.Class, '#pop')], 'stringescape': [(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)], 'strings-single': innerstring_rules(String.Single), 'strings-double': innerstring_rules(String.Double), 'dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('strings-double') ], 'sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('strings-single') ], 'tdqs': [(r'"""', String.Double, '#pop'), include('strings-double'), (r'\n', String.Double)], 'tsqs': [(r"'''", String.Single, '#pop'), include('strings-single'), (r'\n', String.Single)], }
def keywords(*args): return words(args, prefix=r'\b', suffix=r'\b')
class SASLexer(RegexLexer): """ For `SAS <http://www.sas.com/>`_ files. .. versionadded:: 2.2 """ # Syntax from syntax/sas.vim by James Kidd <*****@*****.**> name = 'SAS' aliases = ['sas'] filenames = ['*.SAS', '*.sas'] mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas'] flags = re.IGNORECASE | re.MULTILINE builtins_macros = ("bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp", "display", "do", "else", "end", "eval", "global", "goto", "if", "index", "input", "keydef", "label", "left", "length", "let", "local", "lowcase", "macro", "mend", "nrquote", "nrstr", "put", "qleft", "qlowcase", "qscan", "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan", "str", "substr", "superq", "syscall", "sysevalf", "sysexec", "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput", "then", "to", "trim", "unquote", "until", "upcase", "verify", "while", "window") builtins_conditionals = ("do", "if", "then", "else", "end", "until", "while") builtins_statements = ( "abort", "array", "attrib", "by", "call", "cards", "cards4", "catname", "continue", "datalines", "datalines4", "delete", "delim", "delimiter", "display", "dm", "drop", "endsas", "error", "file", "filename", "footnote", "format", "goto", "in", "infile", "informat", "input", "keep", "label", "leave", "length", "libname", "link", "list", "lostcard", "merge", "missing", "modify", "options", "output", "out", "page", "put", "redirect", "remove", "rename", "replace", "retain", "return", "select", "set", "skip", "startsas", "stop", "title", "update", "waitsas", "where", "window", "x", "systask") builtins_sql = ("add", "and", "alter", "as", "cascade", "check", "create", "delete", "describe", "distinct", "drop", "foreign", "from", "group", "having", "index", "insert", "into", "in", "key", "like", "message", "modify", "msgtype", "not", "null", "on", "or", "order", "primary", "references", "reset", "restrict", "select", "set", "table", "unique", "update", "validate", "view", "where") builtins_functions = ( "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc", "attrn", "band", "betainv", "blshift", "bnot", "bor", "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv", "close", "cnonct", "collate", "compbl", "compound", "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb", "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date", "datejul", "datepart", "datetime", "day", "dclose", "depdb", "depdbsl", "depsl", "depsyd", "deptab", "dequote", "dhms", "dif", "digamma", "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum", "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp", "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs", "fexist", "fget", "fileexist", "filename", "fileref", "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor", "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint", "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz", "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn", "hbound", "hms", "hosthelp", "hour", "ibessel", "index", "indexc", "indexw", "input", "inputc", "inputn", "int", "intck", "intnx", "intrr", "irr", "jbessel", "juldate", "kurtosis", "lag", "lbound", "left", "length", "lgamma", "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf", "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute", "mod", "month", "mopen", "mort", "n", "netpv", "nmiss", "normal", "note", "npv", "open", "ordinal", "pathname", "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke", "probbeta", "probbnml", "probchi", "probf", "probgam", "probhypr", "probit", "probnegb", "probnorm", "probt", "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau", "ranexp", "rangam", "range", "rank", "rannor", "ranpoi", "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse", "rewind", "right", "round", "saving", "scan", "sdf", "second", "sign", "sin", "sinh", "skewness", "soundex", "spedis", "sqrt", "std", "stderr", "stfips", "stname", "stnamel", "substr", "sum", "symget", "sysget", "sysmsg", "sysprod", "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv", "tnonct", "today", "translate", "tranwrd", "trigamma", "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var", "varfmt", "varinfmt", "varlabel", "varlen", "varname", "varnum", "varray", "varrayx", "vartype", "verify", "vformat", "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw", "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat", "vinformatd", "vinformatdx", "vinformatn", "vinformatnx", "vinformatw", "vinformatwx", "vinformatx", "vlabel", "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype", "vtypex", "weekday", "year", "yyq", "zipfips", "zipname", "zipnamel", "zipstate") tokens = { 'root': [ include('comments'), include('proc-data'), include('cards-datalines'), include('logs'), include('general'), (r'.', Text), ], # SAS is multi-line regardless, but * is ended by ; 'comments': [ (r'^\s*\*.*?;', Comment), (r'/\*.*?\*/', Comment), (r'^\s*\*(.|\n)*?;', Comment.Multiline), (r'/[*](.|\n)*?[*]/', Comment.Multiline), ], # Special highlight for proc, data, quit, run 'proc-data': [ (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]', Keyword.Reserved), ], # Special highlight cards and datalines 'cards-datalines': [ (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'), ], 'data': [ (r'(.|\n)*^\s*;\s*$', Other, '#pop'), ], # Special highlight for put NOTE|ERROR|WARNING (order matters) 'logs': [ (r'\n?^\s*%?put ', Keyword, 'log-messages'), ], 'log-messages': [ (r'NOTE(:|-).*', Generic, '#pop'), (r'WARNING(:|-).*', Generic.Emph, '#pop'), (r'ERROR(:|-).*', Generic.Error, '#pop'), include('general'), ], 'general': [ include('keywords'), include('vars-strings'), include('special'), include('numbers'), ], # Keywords, statements, functions, macros 'keywords': [ (words(builtins_statements, prefix=r'\b', suffix=r'\b'), Keyword), (words(builtins_sql, prefix=r'\b', suffix=r'\b'), Keyword), (words(builtins_conditionals, prefix=r'\b', suffix=r'\b'), Keyword), (words(builtins_macros, prefix=r'%', suffix=r'\b'), Name.Builtin), (words(builtins_functions, prefix=r'\b', suffix=r'\('), Name.Builtin), ], # Strings and user-defined variables and macros (order matters) 'vars-strings': [ (r'&[a-z_]\w{0,31}\.?', Name.Variable), (r'%[a-z_]\w{0,31}', Name.Function), (r'\'', String, 'string_squote'), (r'"', String, 'string_dquote'), ], 'string_squote': [ ('\'', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # AFAIK, macro variables are not evaluated in single quotes # (r'&', Name.Variable, 'validvar'), (r'[^$\'\\]+', String), (r'[$\'\\]', String), ], 'string_dquote': [ (r'"', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), (r'&', Name.Variable, 'validvar'), (r'[^$&"\\]+', String), (r'[$"\\]', String), ], 'validvar': [ (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'), ], # SAS numbers and special variables 'numbers': [ (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b', Number), ], 'special': [ (r'(null|missing|_all_|_automatic_|_character_|_n_|' r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)', Keyword.Constant), ], # 'operators': [ # (r'(-|=|<=|>=|<|>|<>|&|!=|' # r'\||\*|\+|\^|/|!|~|~=)', Operator) # ], }
class ErlangLexer(RegexLexer): """ For the Erlang functional programming language. Blame Jeremy Thurgood (http://jerith.za.net/). .. versionadded:: 0.9 """ name = 'Erlang' aliases = ['erlang'] filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] mimetypes = ['text/x-erlang'] keywords = ( 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive', 'try', 'when', ) builtins = ( # See erlang(3) man page 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 'float', 'float_to_list', 'fun_info', 'fun_to_list', 'function_exported', 'garbage_collect', 'get', 'get_keys', 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 'pid_to_list', 'port_close', 'port_command', 'port_connect', 'port_control', 'port_call', 'port_info', 'port_to_list', 'process_display', 'process_flag', 'process_info', 'purge_module', 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 'spawn_opt', 'split_binary', 'start_timer', 'statistics', 'suspend_process', 'system_flag', 'system_info', 'system_monitor', 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis') operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' word_operators = ('and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor') atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" variable_re = r'(?:[A-Z_]\w*)' esc_char_re = r'[bdefnrstv\'"\\]' esc_octal_re = r'[0-7][0-7]?[0-7]?' esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})' esc_ctrl_re = r'\^[a-zA-Z]' escape_re = r'(?:\\(?:' + esc_char_re + r'|' + esc_octal_re + r'|' + esc_hex_re + r'|' + esc_ctrl_re + r'))' macro_re = r'(?:' + variable_re + r'|' + atom_re + r')' base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' tokens = { 'root': [ (r'\s+', Text), (r'%.*\n', Comment), (words(keywords, suffix=r'\b'), Keyword), (words(builtins, suffix=r'\b'), Name.Builtin), (words(word_operators, suffix=r'\b'), Operator.Word), (r'^-', Punctuation, 'directive'), (operators, Operator), (r'"', String, 'string'), (r'<<', Name.Label), (r'>>', Name.Label), ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), (r'[+-]?\d+', Number.Integer), (r'[+-]?\d+.\d+', Number.Float), (r'[]\[:_@\".{}()|;,]', Punctuation), (variable_re, Name.Variable), (atom_re, Name), (r'\?' + macro_re, Name.Constant), (r'\$(?:' + escape_re + r'|\\[ %]|[^\\])', String.Char), (r'#' + atom_re + r'(:?\.' + atom_re + r')?', Name.Label), # Erlang script shebang (r'\A#!.+\n', Comment.Hashbang), # EEP 43: Maps # http://www.erlang.org/eeps/eep-0043.html (r'#\{', Punctuation, 'map_key'), ], 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], 'directive': [ (r'(define)(\s*)(\()(' + macro_re + r')', bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), (r'(record)(\s*)(\()(' + macro_re + r')', bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), (atom_re, Name.Entity, '#pop'), ], 'map_key': [ include('root'), (r'=>', Punctuation, 'map_val'), (r':=', Punctuation, 'map_val'), (r'\}', Punctuation, '#pop'), ], 'map_val': [ include('root'), (r',', Punctuation, '#pop'), (r'(?=\})', Punctuation, '#pop'), ], }
def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Regex, match.group(4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones (r'\:@{0,2}[a-zA-Z_]\w*[\!\?]?', String.Symbol), (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-'+name] = [ include('string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \ ('\\[', '\\]', 'sb'), \ ('\\(', '\\)', 'pa'), \ ('<', '>', 'ab'): states[name+'-intp-string'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + lbrace + rbrace + ']', String.Other), (r'[^\\#' + lbrace + rbrace + ']+', String.Other), ] states['strings'].append((r'%[QWx]?' + lbrace, String.Other, name+'-intp-string')) states[name+'-string'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + lbrace + rbrace + ']', String.Other), (r'[^\\#' + lbrace + rbrace + ']+', String.Other), ] states['strings'].append((r'%[qsw]' + lbrace, String.Other, name+'-string')) states[name+'-regex'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[mixounse]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + lbrace + rbrace + ']', String.Regex), (r'[^\\#' + lbrace + rbrace + ']+', String.Regex), ] states['strings'].append((r'%r' + lbrace, String.Regex, name+'-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', intp_regex_callback), # regular fancy strings with qsw (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other), (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states
class SolidityLexer(RegexLexer): """ For Solidity source code. .. versionadded:: 2.5 """ name = 'Solidity' aliases = ['solidity'] filenames = ['*.sol'] mimetypes = [] flags = re.MULTILINE | re.UNICODE datatype = ( r'\b(address|bool|(?:(?:bytes|hash|int|string|uint)(?:8|16|24|32|40|48|56|64' r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208' r'|216|224|232|240|248|256)?))\b') tokens = { 'root': [ include('whitespace'), include('comments'), (r'\bpragma\s+solidity\b', Keyword, 'pragma'), (r'\b(contract)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword, Whitespace, Name.Entity)), (datatype + r'(\s+)((?:external|public|internal|private)\s+)?' + r'([a-zA-Z_]\w*)', bygroups(Keyword.Type, Whitespace, Keyword, Name.Variable)), (r'\b(enum|event|function|struct)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword.Type, Whitespace, Name.Variable)), (r'\b(msg|block|tx)\.([A-Za-z_][a-zA-Z0-9_]*)\b', Keyword), (words(('block', 'break', 'constant', 'constructor', 'continue', 'contract', 'do', 'else', 'external', 'false', 'for', 'function', 'if', 'import', 'inherited', 'internal', 'is', 'library', 'mapping', 'memory', 'modifier', 'msg', 'new', 'payable', 'private', 'public', 'require', 'return', 'returns', 'struct', 'suicide', 'throw', 'this', 'true', 'tx', 'var', 'while'), prefix=r'\b', suffix=r'\b'), Keyword.Type), (words(('keccak256', ), prefix=r'\b', suffix=r'\b'), Name.Builtin), (datatype, Keyword.Type), include('constants'), (r'[a-zA-Z_]\w*', Text), (r'[!<=>+*/-]', Operator), (r'[.;:{}(),\[\]]', Punctuation) ], 'comments': [(r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), (r'/(\\\n)?[*][\w\W]*', Comment.Multiline)], 'constants': [ (r'("(\\"|.)*?")', String.Double), (r"('(\\'|.)*?')", String.Single), (r'\b0[xX][0-9a-fA-F]+\b', Number.Hex), (r'\b\d+\b', Number.Decimal), ], 'pragma': [ include('whitespace'), include('comments'), (r'(\^|>=|<)(\s*)(\d+\.\d+\.\d+)', bygroups(Operator, Whitespace, Keyword)), (r';', Punctuation, '#pop') ], 'whitespace': [(r'\s+', Whitespace), (r'\n', Whitespace)] }
def gen_crystalstrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ["interpolated-regex"]) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group(4) # end[imsx]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ["interpolated-string"]) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states["strings"] = [ (r"\:@{0,2}[a-zA-Z_]\w*[!?]?", String.Symbol), (words(CRYSTAL_OPERATORS, prefix=r"\:@{0,2}"), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), # This allows arbitrary text after '\ for simplicity (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), (r':"', String.Symbol, "simple-sym"), # Crystal doesn't have "symbol:"s but this simplifies function args (r"([a-zA-Z_]\w*)(:)(?!:)", bygroups(String.Symbol, Punctuation)), (r'"', String.Double, "simple-string"), (r"(?<!\.)`", String.Backtick, "simple-backtick"), ] # double-quoted string and symbol for name, ttype, end in ( ("string", String.Double, '"'), ("sym", String.Symbol, '"'), ("backtick", String.Backtick, "`"), ): states["simple-" + name] = [ include("string-escaped" if name == "sym" else "string-intp-escaped"), (r"[^\\%s#]+" % end, ttype), (r"[\\#]", ttype), (end, ttype, "#pop"), ] # braced quoted strings for lbrace, rbrace, bracecc, name in ( ("\\{", "\\}", "{}", "cb"), ("\\[", "\\]", "\\[\\]", "sb"), ("\\(", "\\)", "()", "pa"), ("<", ">", "<>", "ab"), ): states[name + "-intp-string"] = [ (r"\\[" + lbrace + "]", String.Other), (lbrace, String.Other, "#push"), (rbrace, String.Other, "#pop"), include("string-intp-escaped"), (r"[\\#" + bracecc + "]", String.Other), (r"[^\\#" + bracecc + "]+", String.Other), ] states["strings"].append((r"%" + lbrace, String.Other, name + "-intp-string")) states[name + "-string"] = [ (r"\\[\\" + bracecc + "]", String.Other), (lbrace, String.Other, "#push"), (rbrace, String.Other, "#pop"), (r"[\\#" + bracecc + "]", String.Other), (r"[^\\#" + bracecc + "]+", String.Other), ] # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html states["strings"].append((r"%[wi]" + lbrace, String.Other, name + "-string")) states[name + "-regex"] = [ (r"\\[\\" + bracecc + "]", String.Regex), (lbrace, String.Regex, "#push"), (rbrace + "[imsx]*", String.Regex, "#pop"), include("string-intp"), (r"[\\#" + bracecc + "]", String.Regex), (r"[^\\#" + bracecc + "]+", String.Regex), ] states["strings"].append((r"%r" + lbrace, String.Regex, name + "-regex")) # these must come after %<brace>! states["strings"] += [ # %r regex (r"(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)", intp_regex_callback), # regular fancy strings with qsw (r"(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r"(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r"^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r"(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback), ] return states
words( ( "azimuth", "background-attachment", "background-color", "background-image", "background-position", "background-repeat", "background", "border-bottom-color", "border-bottom-style", "border-bottom-width", "border-left-color", "border-left-style", "border-left-width", "border-right", "border-right-color", "border-right-style", "border-right-width", "border-top-color", "border-top-style", "border-top-width", "border-bottom", "border-collapse", "border-left", "border-width", "border-color", "border-spacing", "border-style", "border-top", "border", "caption-side", "clear", "clip", "color", "content", "counter-increment", "counter-reset", "cue-after", "cue-before", "cue", "cursor", "direction", "display", "elevation", "empty-cells", "float", "font-family", "font-size", "font-size-adjust", "font-stretch", "font-style", "font-variant", "font-weight", "font", "height", "letter-spacing", "line-height", "list-style-type", "list-style-image", "list-style-position", "list-style", "margin-bottom", "margin-left", "margin-right", "margin-top", "margin", "marker-offset", "marks", "max-height", "max-width", "min-height", "min-width", "opacity", "orphans", "outline", "outline-color", "outline-style", "outline-width", "overflow", "padding-bottom", "padding-left", "padding-right", "padding-top", "padding", "page", "page-break-after", "page-break-before", "page-break-inside", "pause-after", "pause-before", "pause", "pitch", "pitch-range", "play-during", "position", "quotes", "richness", "right", "size", "speak-header", "speak-numeral", "speak-punctuation", "speak", "speech-rate", "stress", "table-layout", "text-align", "text-decoration", "text-indent", "text-shadow", "text-transform", "top", "unicode-bidi", "vertical-align", "visibility", "voice-family", "volume", "white-space", "widows", "width", "word-spacing", "z-index", "bottom", "left", "above", "absolute", "always", "armenian", "aural", "auto", "avoid", "baseline", "behind", "below", "bidi-override", "blink", "block", "bold", "bolder", "both", "capitalize", "center-left", "center-right", "center", "circle", "cjk-ideographic", "close-quote", "collapse", "condensed", "continuous", "crop", "crosshair", "cross", "cursive", "dashed", "decimal-leading-zero", "decimal", "default", "digits", "disc", "dotted", "double", "e-resize", "embed", "extra-condensed", "extra-expanded", "expanded", "fantasy", "far-left", "far-right", "faster", "fast", "fixed", "georgian", "groove", "hebrew", "help", "hidden", "hide", "higher", "high", "hiragana-iroha", "hiragana", "icon", "inherit", "inline-table", "inline", "inset", "inside", "invert", "italic", "justify", "katakana-iroha", "katakana", "landscape", "larger", "large", "left-side", "leftwards", "level", "lighter", "line-through", "list-item", "loud", "lower-alpha", "lower-greek", "lower-roman", "lowercase", "ltr", "lower", "low", "medium", "message-box", "middle", "mix", "monospace", "n-resize", "narrower", "ne-resize", "no-close-quote", "no-open-quote", "no-repeat", "none", "normal", "nowrap", "nw-resize", "oblique", "once", "open-quote", "outset", "outside", "overline", "pointer", "portrait", "px", "relative", "repeat-x", "repeat-y", "repeat", "rgb", "ridge", "right-side", "rightwards", "s-resize", "sans-serif", "scroll", "se-resize", "semi-condensed", "semi-expanded", "separate", "serif", "show", "silent", "slow", "slower", "small-caps", "small-caption", "smaller", "soft", "solid", "spell-out", "square", "static", "status-bar", "super", "sw-resize", "table-caption", "table-cell", "table-column", "table-column-group", "table-footer-group", "table-header-group", "table-row", "table-row-group", "text", "text-bottom", "text-top", "thick", "thin", "transparent", "ultra-condensed", "ultra-expanded", "underline", "upper-alpha", "upper-latin", "upper-roman", "uppercase", "url", "visible", "w-resize", "wait", "wider", "x-fast", "x-high", "x-large", "x-loud", "x-low", "x-small", "x-soft", "xx-large", "xx-small", "yes", ), suffix=r"\b", ),