def format(self, extension=None): """Write pretty HTML logs.""" M = self.M # pygments lexing setup: # (pygments HTML-formatter handles HTML-escaping) import pygments from pygments.lexers import IrcLogsLexer from pygments.formatters import HtmlFormatter import pygments.token as token from pygments.lexer import bygroups # Don't do any encoding in this function with pygments. # That's only right before the i/o functions in the Config # object. formatter = HtmlFormatter(lineanchors='l', full=True, style=M.config.pygmentizeStyle, output_encoding=self.M.config.output_codec) Lexer = IrcLogsLexer Lexer.tokens['msg'][1:1] = \ [ # match: #topic commands (r"(\#topic[ \t\f\v]*)(.*\n)", bygroups(token.Keyword, token.Generic.Heading), '#pop'), # match: #command (others) (r"(\#[^\s]+[ \t\f\v]*)(.*\n)", bygroups(token.Keyword, token.Generic.Strong), '#pop'), ] lexer = Lexer() #from rkddp.interact import interact ; interact() out = pygments.highlight("\n".join(M.lines), lexer, formatter) return out
def string_rules(state): return [ (r'(")((?:[^\r\n"\\]|(?:\\.))+)(")', bygroups(Text, String, Text), state), (r'(")((?:[^\r\n"\\]|(?:\\.))+)', bygroups(Text, String), state), (r"(')((?:[^\r\n'\\]|(?:\\.))+)(')", bygroups(Text, String, Text), state), (r"(')((?:[^\r\n'\\]|(?:\\.))+)", bygroups(Text, String), state), (r'([^\s\'\\]|(\\.))+', String, state) ]
def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', 'cb'), (r'\[', r'\]', 'sb'), (r'\(', r'\)', 'pa'), (r'<', r'>', 'ab'), (r'/', r'/', 'slas'), (r'\|', r'\|', 'pipe'), ('"', '"', 'quot'), ("'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, token, interpol=False) return states
def format(self, extension=None): """Write pretty HTML logs.""" M = self.M # pygments lexing setup: # (pygments HTML-formatter handles HTML-escaping) import pygments from pygments.lexers import IrcLogsLexer from pygments.formatters import HtmlFormatter import pygments.token as token from pygments.lexer import bygroups # Don't do any encoding in this function with pygments. # That's only right before the i/o functions in the Config # object. formatter = HtmlFormatter( lineanchors='l', full=True, style=M.config.pygmentizeStyle, outencoding=self.M.config.output_codec) Lexer = IrcLogsLexer Lexer.tokens['msg'][1:1] = \ [ # match: #topic commands (r"(\#topic[ \t\f\v]*)(.*\n)", bygroups(token.Keyword, token.Generic.Heading), '#pop'), # match: #command (others) (r"(\#[^\s]+[ \t\f\v]*)(.*\n)", bygroups(token.Keyword, token.Generic.Strong), '#pop'), ] lexer = Lexer() #from rkddp.interact import interact ; interact() out = pygments.highlight("\n".join(M.lines), lexer, formatter) # Hack it to add "pre { white-space: pre-wrap; }", which make # it wrap the pygments html logs. I think that in a newer # version of pygmetns, the "prestyles" HTMLFormatter option # would do this, but I want to maintain compatibility with # lenny. Thus, I do these substitution hacks to add the # format in. Thanks to a comment on the blog of Francis # Giannaros (http://francis.giannaros.org) for the suggestion # and instructions for how. out, n = re.subn( r"(\n\s*pre\s*\{[^}]+;\s*)(\})", r"\1\n white-space: pre-wrap;\2", out, count=1) if n == 0: out = re.sub(r"(\n\s*</style>)", r"\npre { white-space: pre-wrap; }\1", out, count=1) return out
def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _ws=_ws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' % (_nl, _ws, _nl, _ws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ]
def _make_call_state(compound, _label=_label, _label_compound=_label_compound): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(:?)(%s)' % (_label_compound if compound else _label), bygroups(Punctuation, Name.Label), '#pop')) return state
def gen_elixir_string_rules(name, symbol, token): states = {} states['string_' + name] = [ (r'[^#%s\\]+' % (symbol,), token), include('escapes'), (r'\\.', token), (r'(%s)' % (symbol,), bygroups(token), "#pop"), include('interpol') ] return states
def _objdump_lexer_tokens(asm_lexer): """ Common objdump lexer tokens to wrap an ASM lexer. """ hex_re = r'[0-9A-Za-z]' return { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('('+hex_re+'+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(asm_lexer))), # Code line with ascii ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes (r'\t\.\.\.$', Text), # Relocation line # (With offset) (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), (r'[^\n]+\n', Other) ] }
def _make_label_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' % (_label_compound if compound else _label, _string, _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl, _punct, r')' if compound else ''), bygroups(Name.Label, Comment.Single), '#pop')) return state
def _make_follow_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _space=_space, _start_label=_start_label, _token=_token, _token_compound=_token_compound, _ws=_ws): suffix = '/compound' if compound else '' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'%s([%s]*)(%s)(.*)' % (_start_label, _ws, _label_compound if compound else _label), bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)), include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'), (r'\|\|?|&&?', Punctuation, '#pop'), include('text') ] return state
def get_lexer_tokens(tag_highlighting=False, project_support=False): """Return the tokens needed for RegexLexer :param tag_highlighting: if True we support tag highlighting. See AdaLexerWithTags documentation :type tag_highlighting: bool :param project_support: if True support additional keywors associated with project files. :type project_support: bool :return: a dictionary following the structure required by RegexLexer :rtype: dict """ if project_support: project_pattern = r'project\s+|' project_pattern2 = r'project|' else: project_pattern = r'' project_pattern2 = r'' result = { 'root': [ # Comments (r'--.*$', Comment), # Character literal (r"'.'", String.Char), # Strings (r'"[^"]*"', String), # Numeric # Based literal (r'[0-9][0-9_]*#[0-9a-f][0-9a-f_]*#(E[\+-]?[0-9][0-9_]*)?', Number.Integer), (r'[0-9][0-9_]*#[0-9a-f][0-9a-f_]*' r'\.[0-9a-f][0-9a-f_]*#(E[\+-]?[0-9][0-9_]*)?', Number.Float), # Decimal literal (r'[0-9][0-9_]*\.[0-9][0-9_](E[\+-]?[0-9][0-9_]*)?', Number.Float), (r'[0-9][0-9_]*(E[\+-]?[0-9][0-9_]*)?', Number.Integer), # Match use and with statements # The first part of the pattern is be sure we don't match # for/use constructs. (r'(\n\s*|;\s*)(with|use)(\s+[\w\.]+)', bygroups(Punctuation, Keyword.Reserved, Name.Namespace)), # Match procedure, package and function declarations (r'end\s+(if|loop|record)', Keyword), (r'(package(?:\s+body)?\s+|' + project_pattern + r'function\s+|end\s+|procedure\s+)([\w\.]+)', bygroups(Keyword, Name.Function)), # Ada 2012 standard attributes, GNAT specific ones and # Spark 2014 ones ('Update and 'Loop_Entry) # (reversed order to avoid having for # example Max before Max_Alignment_For_Allocation). (r'\'(Write|Width|Wide_Width|Wide_Wide_Width|Wide_Wide_Value|' r'Wide_Wide_Image|Wide_Value|Wide_Image|Word_Size|Wchar_T_Size|' r'Version|Value_Size|Value|Valid_Scalars|VADS_Size|Valid|Val|' r'Update|Unrestricted_Access|Universal_Literal_String|' r'Unconstrained_Array|Unchecked_Access|Unbiased_Rounding|' r'Truncation|Type_Class|To_Address|Tick|Terminated|' r'Target_Name|Tag|System_Allocator_Alignment|Succ|Stub_Type|' r'Stream_Size|Storage_Unit|Storage_Size|Storage_Pool|Small|Size|' r'Simple_Storage_Pool|Signed_Zeros|Scaling|Scale|' r'Scalar_Storage_Order|Safe_Last|Safe_Large|Safe_First|' r'Safe_Emax|Rounding|Round|Result|Remainder|Ref|Read|' r'Range_Length|Range|Priority|Pred|' r'Position|Pos|Pool_Address|Passed_By_Reference|Partition_Id|' r'Overlaps_Storage|Output|Old|Object_Size|Null_Parameter|Modulus|' r'Model_Small|Model_Mantissa|Model_Epsilon|Model_Emin|Model|Mod|' r'Min|Mechanism_Code|Maximum_Alignment|' r'Max_Size_In_Storage_Elements|Max_Priority|' r'Max_Interrupt_Priority|Max_Alignment_For_Allocation|' r'Max|Mantissa|Machine_Size|Machine_Rounds|Machine_Rounding|' r'Machine_Radix|Machine_Overflows|Machine_Mantissa|Machine_Emin|' r'Machine_Emax|Machine|Loop_Entry|Length|Length|Leading_Part|' r'Last_Valid|Last_Bit|Last|Large|Invalid_Value|Integer_Value|' r'Input|Image|Img|Identity|Has_Same_Storage|Has_Discriminants|' r'Has_Access_Values|Fraction|Fore|Floor|Fixed_Value|First_Valid|' r'First_Bit|First|External_Tag|Exponent|Epsilon|Enum_Val|' r'Enum_Rep|Enabled|Emax|Elaborated|Elab_Subp_Body|Elab_Spec|' r'Elab_Body|Descriptor_Size|Digits|Denorm|Delta|Definite|' r'Default_Bit_Order|Count|Copy_Sign|Constrained|' r'Compose|Component_Size|Compiler_Version|Code_Address|Class|' r'Ceiling|Caller|Callable|Body_Version|Bit_Order|Bit_Position|' r'Bit|Base|Asm_Output|Asm_Input|Alignment|Aft|Adjacent|' r'Address_Size|Address|Access|Abort_Signal|AST_Entry)', Name.Attribute), # All Ada2012 reserved words (r'(abort|abstract|abs|accept|access|aliased|all|and|array|at|' r'begin|body|case|constant|declare|delay|delta|digits|do|' r'else|elsif|end|entry|exception|exit|for|function|generic|goto|' r'if|interface|in|is|limited|loop|mod|new|not|null|' r'of|or|others|out|overriding|' + project_pattern2 + r'package|pragma|private|procedure|protected|' r'raise|range|record|rem|renames|requeue|return|reverse|' r'select|separate|some|subtype|synchronized|' r'tagged|task|terminate|then|type|until|use|when|while|with|xor' r')([\s;,])', bygroups(Keyword.Reserved, Punctuation)), # Two characters operators (r'=>|\.\.|\*\*|:=|/=|>=|<=|<<|>>|<>', Operator), # One character operators (r'&|\'|\(|\)|\*|\+|-|\.|/|:|<|=|>|\|', Operator), (r',|;', Punctuation), # Spaces (r'\s+', Text), # Builtin values (r'False|True', Keyword.Constant), # Identifiers (r'[\w\.]+', Name)], } # Insert tag highlighting before identifiers if tag_highlighting: result['root'].insert(-1, (r'\[[\w ]*\]', Name.Tag)) return result
# - manual/cmake-buildsystem.7.html (with nested $<..>; relative and absolute paths, "::") from pygments.lexers import CMakeLexer from pygments.token import Name, Operator, Punctuation, String, Text, Comment, Generic, Whitespace, Number from pygments.lexer import bygroups # Notes on regular expressions below: # - [\.\+-] are needed for string constants like gtk+-2.0 # - Unix paths are recognized by '/'; support for Windows paths may be added if needed # - (\\.) allows for \-escapes (used in manual/cmake-language.7) # - $<..$<..$>..> nested occurence in cmake-buildsystem # - Nested variable evaluations are only supported in a limited capacity. Only # one level of nesting is supported and at most one nested variable can be present. CMakeLexer.tokens["root"] = [ (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Function, Text, Name.Function), '#push'), # fctn( (r'\(', Name.Function, '#push'), (r'\)', Name.Function, '#pop'), (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), (r'[|;,.=*\-]', Punctuation), (r'\\\\', Punctuation), # used in commands/source_group (r'[:]', Operator), (r'[<>]=', Punctuation), # used in FindPkgConfig.cmake (r'\$<', Operator, '#push'), # $<...> (r'<[^<|]+?>(\w*\.\.\.)?', Name.Variable), # <expr> (r'(\$\w*\{)([^\}\$]*)?(?:(\$\w*\{)([^\}]+?)(\}))?([^\}]*?)(\})', # ${..} $ENV{..}, possibly nested bygroups(Operator, Name.Tag, Operator, Name.Tag, Operator, Name.Tag, Operator)), (r'([A-Z]+\{)(.+?)(\})', bygroups(Operator, Name.Tag, Operator)), # DATA{ ...} (r'[a-z]+(@|(://))((\\.)|[\w.+-:/\\])+', Name.Attribute), # URL, git@, ... (r'/\w[\w\.\+-/\\]*', Name.Attribute), # absolute path
tokens = {"root": [(r"`[^`]*?`", String.Backtick), inherit]} ROOT_TOKENS = [ (r"\?", Keyword), (r"\$\w+", Name.Variable), (r"\$\{", Keyword, ("pymode",)), (r"\$\(", Keyword, ("subproc",)), (r"\$\[", Keyword, ("subproc",)), (r"@\(", Keyword, ("pymode",)), inherit, ] PYMODE_TOKENS = [ (r"(.+)(\))", bygroups(using(this), Keyword), "#pop"), (r"(.+)(\})", bygroups(using(this), Keyword), "#pop"), ] SUBPROC_TOKENS = [ (r"(.+)(\))", bygroups(using(XonshSubprocLexer), Keyword), "#pop"), (r"(.+)(\])", bygroups(using(XonshSubprocLexer), Keyword), "#pop"), ] class XonshLexer(PythonLexer): """Xonsh console lexer for pygments.""" name = "Xonsh lexer" aliases = ["xonsh", "xsh"] filenames = ["*.xsh", "*xonshrc"]
def gen_crystalstrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Regex, match.group(4) # end[imsx]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), # This allows arbitrary text after '\ for simplicity (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), (r':"', String.Symbol, 'simple-sym'), # Crystal doesn't have "symbol:"s but this simplifies function args (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-'+name] = [ include('string-escaped' if name == 'sym' else 'string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, bracecc, name in \ ('\\{', '\\}', '{}', 'cb'), \ ('\\[', '\\]', '\\[\\]', 'sb'), \ ('\\(', '\\)', '()', 'pa'), \ ('<', '>', '<>', 'ab'): states[name+'-intp-string'] = [ (r'\\[' + lbrace + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] states['strings'].append((r'%' + lbrace, String.Other, name+'-intp-string')) states[name+'-string'] = [ (r'\\[\\' + bracecc + ']', String.Other), (lbrace, String.Other, '#push'), (rbrace, String.Other, '#pop'), (r'[\\#' + bracecc + ']', String.Other), (r'[^\\#' + bracecc + ']+', String.Other), ] # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html states['strings'].append((r'%[wi]' + lbrace, String.Other, name+'-string')) states[name+'-regex'] = [ (r'\\[\\' + bracecc + ']', String.Regex), (lbrace, String.Regex, '#push'), (rbrace + '[imsx]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + bracecc + ']', String.Regex), (r'[^\\#' + bracecc + ']+', String.Regex), ] states['strings'].append((r'%r' + lbrace, String.Regex, name+'-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)', intp_regex_callback), # regular fancy strings with qsw (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states
) from pygments.token import ( Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error, ) from pygments.util import get_bool_opt # Local line_re = re.compile('.*?\n') __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer', 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer', 'IPythonConsoleLexer', 'IPyLexer'] ipython_tokens = [ (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)), (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))), (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)), (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)), (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)), (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)), (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)), ] def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`.
class FSharpLexer(RegexLexer): """ For the `F# language <https://fsharp.org/>`_ (version 3.0). .. versionadded:: 1.5 """ name = 'F#' aliases = ['fsharp', 'f#'] filenames = ['*.fs', '*.fsi'] mimetypes = ['text/x-fsharp'] keywords = [ 'abstract', 'as', 'assert', 'base', 'begin', 'class', 'default', 'delegate', 'do!', 'do', 'done', 'downcast', 'downto', 'elif', 'else', 'end', 'exception', 'extern', 'false', 'finally', 'for', 'function', 'fun', 'global', 'if', 'inherit', 'inline', 'interface', 'internal', 'in', 'lazy', 'let!', 'let', 'match', 'member', 'module', 'mutable', 'namespace', 'new', 'null', 'of', 'open', 'override', 'private', 'public', 'rec', 'return!', 'return', 'select', 'static', 'struct', 'then', 'to', 'true', 'try', 'type', 'upcast', 'use!', 'use', 'val', 'void', 'when', 'while', 'with', 'yield!', 'yield', ] # Reserved words; cannot hurt to color them as keywords too. keywords += [ 'atomic', 'break', 'checked', 'component', 'const', 'constraint', 'constructor', 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', 'method', 'mixin', 'object', 'parallel', 'process', 'protected', 'pure', 'sealed', 'tailcall', 'trait', 'virtual', 'volatile', ] keyopts = [ '!=', '#', '&&', '&', r'\(', r'\)', r'\*', r'\+', ',', r'-\.', '->', '-', r'\.\.', r'\.', '::', ':=', ':>', ':', ';;', ';', '<-', r'<\]', '<', r'>\]', '>', r'\?\?', r'\?', r'\[<', r'\[\|', r'\[', r'\]', '_', '`', r'\{', r'\|\]', r'\|', r'\}', '~', '<@@', '<@', '=', '@>', '@@>', ] operators = r'[!$%&*+\./:<=>?@^|~-]' word_operators = ['and', 'or', 'not'] prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' primitives = [ 'sbyte', 'byte', 'char', 'nativeint', 'unativeint', 'float32', 'single', 'float', 'double', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64', 'decimal', 'unit', 'bool', 'string', 'list', 'exn', 'obj', 'enum', ] # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or # http://fsharp.org/about/files/spec.pdf for reference. Good luck. tokens = { 'escape-sequence': [ (r'\\[\\"\'ntbrafv]', String.Escape), (r'\\[0-9]{3}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), (r'\\U[0-9a-fA-F]{8}', String.Escape), ], 'root': [ (r'\s+', Text), (r'\(\)|\[\]', Name.Builtin.Pseudo), (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][\w\']*)', Name), (r'///.*?\n', String.Doc), (r'//.*?\n', Comment.Single), (r'\(\*(?!\))', Comment, 'comment'), (r'@"', String, 'lstring'), (r'"""', String, 'tqs'), (r'"', String, 'string'), (r'\b(open|module)(\s+)([\w.]+)', bygroups(Keyword, Text, Name.Namespace)), (r'\b(let!?)(\s+)(\w+)', bygroups(Keyword, Text, Name.Variable)), (r'\b(type)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class)), (r'\b(member|override)(\s+)(\w+)(\.)(\w+)', bygroups(Keyword, Text, Name, Punctuation, Name.Function)), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name), (r'(%s)' % '|'.join(keyopts), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n', Comment.Preproc), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex), (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct), (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'@?"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name.Variable), ], 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name, '#pop'), (r'[a-z_][\w\']*', Name, '#pop'), # e.g. dictionary index access default('#pop'), ], 'comment': [ (r'[^(*)@"]+', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), # comments cannot be closed within strings in comments (r'@"', String, 'lstring'), (r'"""', String, 'tqs'), (r'"', String, 'string'), (r'[(*)@]', Comment), ], 'string': [ (r'[^\\"]+', String), include('escape-sequence'), (r'\\\n', String), (r'\n', String), # newlines are allowed in any string (r'"B?', String, '#pop'), ], 'lstring': [ (r'[^"]+', String), (r'\n', String), (r'""', String), (r'"B?', String, '#pop'), ], 'tqs': [ (r'[^"]+', String), (r'\n', String), (r'"""B?', String, '#pop'), (r'"', String), ], }
class ScamlLexer(ExtendedRegexLexer): """ For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala. .. versionadded:: 1.4 """ name = 'Scaml' aliases = ['scaml'] filenames = ['*.scaml'] mimetypes = ['text/x-scaml'] flags = re.IGNORECASE # Scaml does not yet support the " |\n" notation to # wrap long lines. Once it does, use the custom faux # dot instead. # _dot = r'(?: \|\n(?=.* \|)|.)' _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)), (r'\[' + _dot + '*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
:license: BSD, see LICENSE for details. """ import re from pygments.lexer import RegexLexer, bygroups, default, include, using, words from pygments.token import Comment, Error, Keyword, Name, Number, Operator, Punctuation, \ String, Text, Whitespace from pygments.lexers._csound_builtins import OPCODES, DEPRECATED_OPCODES from pygments.lexers.html import HtmlLexer from pygments.lexers.python import PythonLexer from pygments.lexers.scripting import LuaLexer __all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer'] newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text)) class CsoundLexer(RegexLexer): tokens = { 'whitespace': [(r'[ \t]+', Text), (r'/[*](?:.|\n)*?[*]/', Comment.Multiline), (r'(?:;|//).*$', Comment.Single), (r'(\\)(\n)', bygroups(Whitespace, Text))], 'preprocessor directives': [(r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc), (r'#includestr', Comment.Preproc, 'includestr directive'), (r'#include', Comment.Preproc, 'include directive'), (r'#[ \t]*define', Comment.Preproc, 'define directive'), (r'#(?:ifn?def|undef)\b', Comment.Preproc, 'macro directive')], 'include directive':
class CsoundOrchestraLexer(CsoundLexer): """ For `Csound <https://csound.com>`_ orchestras. .. versionadded:: 2.1 """ name = 'Csound Orchestra' aliases = ['csound', 'csound-orc'] filenames = ['*.orc', '*.udo'] user_defined_opcodes = set() def opcode_name_callback(lexer, match): opcode = match.group(0) lexer.user_defined_opcodes.add(opcode) yield match.start(), Name.Function, opcode def name_callback(lexer, match): type_annotation_token = Keyword.Type name = match.group(1) if name in OPCODES or name in DEPRECATED_OPCODES: yield match.start(), Name.Builtin, name elif name in lexer.user_defined_opcodes: yield match.start(), Name.Function, name else: type_annotation_token = Name name_match = re.search(r'^(g?[afikSw])(\w+)', name) if name_match: yield name_match.start(1), Keyword.Type, name_match.group(1) yield name_match.start(2), Name, name_match.group(2) else: yield match.start(), Name, name if match.group(2): yield match.start(2), Punctuation, match.group(2) yield match.start(3), type_annotation_token, match.group(3) tokens = { 'root': [(r'\n', Text), (r'^([ \t]*)(\w+)(:)(?:[ \t]+|$)', bygroups(Text, Name.Label, Punctuation)), include('whitespace and macro uses'), include('preprocessor directives'), (r'\binstr\b', Keyword.Declaration, 'instrument numbers and identifiers'), (r'\bopcode\b', Keyword.Declaration, 'after opcode keyword'), (r'\b(?:end(?:in|op))\b', Keyword.Declaration), include('partial statements')], 'partial statements': [(r'\b(?:0dbfs|A4|k(?:r|smps)|nchnls(?:_i)?|sr)\b', Name.Variable.Global), include('numbers'), (r'\+=|-=|\*=|/=|<<|>>|<=|>=|==|!=|&&|\|\||[~¬]|[=!+\-*/^%&|<>#?:]', Operator), (r'[(),\[\]]', Punctuation), (r'"', String, 'quoted string'), (r'\{\{', String, 'braced string'), (words(( 'do', 'else', 'elseif', 'endif', 'enduntil', 'fi', 'if', 'ithen', 'kthen', 'od', 'then', 'until', 'while', ), prefix=r'\b', suffix=r'\b'), Keyword), (words(('return', 'rireturn'), prefix=r'\b', suffix=r'\b'), Keyword.Pseudo), (r'\b[ik]?goto\b', Keyword, 'goto label'), (r'\b(r(?:einit|igoto)|tigoto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation), 'goto label'), (r'\b(c(?:g|in?|k|nk?)goto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation), ('goto label', 'goto argument')), (r'\b(timout)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation), ('goto label', 'goto argument', 'goto argument')), (r'\b(loop_[gl][et])(\(|\b)', bygroups(Keyword.Pseudo, Punctuation), ('goto label', 'goto argument', 'goto argument', 'goto argument')), (r'\bprintk?s\b', Name.Builtin, 'prints opcode'), (r'\b(?:readscore|scoreline(?:_i)?)\b', Name.Builtin, 'Csound score opcode'), (r'\bpyl?run[it]?\b', Name.Builtin, 'Python opcode'), (r'\blua_(?:exec|opdef)\b', Name.Builtin, 'Lua opcode'), (r'\bp\d+\b', Name.Variable.Instance), (r'\b([A-Z_a-z]\w*)(?:(:)([A-Za-z]))?\b', name_callback)], 'instrument numbers and identifiers': [ include('whitespace and macro uses'), (r'\d+|[A-Z_a-z]\w*', Name.Function), (r'[+,]', Punctuation), (r'\n', Text, '#pop') ], 'after opcode keyword': [ include('whitespace and macro uses'), (r'[A-Z_a-z]\w*', opcode_name_callback, ('#pop', 'opcode type signatures')), (r'\n', Text, '#pop') ], 'opcode type signatures': [ include('whitespace and macro uses'), # https://github.com/csound/csound/search?q=XIDENT+path%3AEngine+filename%3Acsound_orc.lex (r'0|[afijkKoOpPStV\[\]]+', Keyword.Type), (r',', Punctuation), (r'\n', Text, '#pop') ], 'quoted string': [(r'"', String, '#pop'), (r'[^\\"$%)]+', String), include('macro uses'), include('escape sequences'), include('format specifiers'), (r'[\\$%)]', String)], 'braced string': [(r'\}\}', String, '#pop'), (r'(?:[^\\%)}]|\}(?!\}))+', String), include('escape sequences'), include('format specifiers'), (r'[\\%)]', String)], 'escape sequences': [ # https://github.com/csound/csound/search?q=unquote_string+path%3AEngine+filename%3Acsound_orc_compile.c (r'\\(?:[\\abnrt"]|[0-7]{1,3})', String.Escape) ], # Format specifiers are highlighted in all strings, even though only # fprintks https://csound.com/docs/manual/fprintks.html # fprints https://csound.com/docs/manual/fprints.html # printf/printf_i https://csound.com/docs/manual/printf.html # printks https://csound.com/docs/manual/printks.html # prints https://csound.com/docs/manual/prints.html # sprintf https://csound.com/docs/manual/sprintf.html # sprintfk https://csound.com/docs/manual/sprintfk.html # work with strings that contain format specifiers. In addition, these opcodes’ # handling of format specifiers is inconsistent: # - fprintks, fprints, printks, and prints do accept %a and %A # specifiers, but can’t accept %s specifiers. # - printf, printf_i, sprintf, and sprintfk don’t accept %a and %A # specifiers, but can accept %s specifiers. # See https://github.com/csound/csound/issues/747 for more information. 'format specifiers': [(r'%[#0\- +]*\d*(?:\.\d+)?[diuoxXfFeEgGaAcs]', String.Interpol), (r'%%', String.Escape)], 'goto argument': [ include('whitespace and macro uses'), (r',', Punctuation, '#pop'), include('partial statements') ], 'goto label': [ include('whitespace and macro uses'), (r'\w+', Name.Label, '#pop'), default('#pop') ], 'prints opcode': [ include('whitespace and macro uses'), (r'"', String, 'prints quoted string'), default('#pop') ], 'prints quoted string': [(r'\\\\[aAbBnNrRtT]', String.Escape), (r'%[!nNrRtT]|[~^]{1,2}', String.Escape), include('quoted string')], 'Csound score opcode': [ include('whitespace and macro uses'), (r'"', String, 'quoted string'), (r'\{\{', String, 'Csound score'), (r'\n', Text, '#pop') ], 'Csound score': [(r'\}\}', String, '#pop'), (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer))], 'Python opcode': [ include('whitespace and macro uses'), (r'"', String, 'quoted string'), (r'\{\{', String, 'Python'), (r'\n', Text, '#pop') ], 'Python': [(r'\}\}', String, '#pop'), (r'([^}]+)|\}(?!\})', using(PythonLexer))], 'Lua opcode': [ include('whitespace and macro uses'), (r'"', String, 'quoted string'), (r'\{\{', String, 'Lua'), (r'\n', Text, '#pop') ], 'Lua': [(r'\}\}', String, '#pop'), (r'([^}]+)|\}(?!\})', using(LuaLexer))] }
class CddlLexer(RegexLexer): name = "CDDL" aliases = ["cddl"] filenames = ["*.cddl"] mimetypes = ["text/x-cddl"] _prelude_types = [ "any", "b64legacy", "b64url", "bigfloat", "bigint", "bignint", "biguint", "bool", "bstr", "bytes", "cbor-any", "decfrac", "eb16", "eb64legacy", "eb64url", "encoded-cbor", "false", "float", "float16", "float16-32", "float32", "float32-64", "float64", "int", "integer", "mime-message", "nil", "nint", "null", "number", "regexp", "tdate", "text", "time", "true", "tstr", "uint", "undefined", "unsigned", "uri", ] _controls = [ ".and", ".bits", ".cbor", ".cborseq", ".default", ".eq", ".ge", ".gt", ".le", ".lt", ".ne", ".regexp", ".size", ".within", ] _re_id = r"""(?x) [$@A-Z_a-z] (?:[\-\.]*[$@0-9A-Z_a-z]|[$@0-9A-Z_a-z])* """ # While the spec reads more like "an int must not start with 0" we use a # lookahead here that says "after a 0 there must be no digit". This makes the # '0' the invalid character in '01', which looks nicer when highlighted. _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))" _re_int = r"-?" + _re_uint flags = re.UNICODE | re.MULTILINE tokens = { "commentsandwhitespace": [(r"\s+", Text), (r";.+$", Comment.Single)], "root": [ include("commentsandwhitespace"), # tag types (r"#\d(\.{uint}|)".format(uint=_re_uint), Keyword.Type), (r"#", Keyword.Type), # any # occurence ( r"({uint}|)(\*)({uint}|)".format(uint=_re_uint), bygroups(Number, Operator, Number), ), (r"\?|\+", Operator), # occurrence (r"\^", Operator), # cuts (r"(\.\.\.|\.\.)", Operator), # rangeop (words(_controls, suffix=r"\b"), Operator.Word), # ctlops # into choice op (r"&(?=\s*({groupname}|\())".format(groupname=_re_id), Operator), (r"~(?=\s*{})".format(_re_id), Operator), # unwrap op (r"//|/(?!/)", Operator), # double und single slash (r"=>|/==|/=|=", Operator), (r"[\[\]{}\(\),<>:]", Punctuation), # Bytestrings (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"), (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"), (r"'", String.Single, "bstr"), # Barewords as member keys (must be matched before values, types, typenames, groupnames). # Token type is String as barewords are always interpreted as such. ( r"({bareword})(\s*)(:)".format(bareword=_re_id), bygroups(String, Text, Punctuation), ), # predefined types ( words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"), Name.Builtin, ), # user-defined groupnames, typenames (_re_id, Name.Class), # values (r"0b[01]+", Number.Bin), (r"0o[0-7]+", Number.Oct), (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+|)p[+-]?\d+", Number.Hex), # hexfloat (r"0x[0-9a-fA-F]+", Number.Hex), # hex # Float ( r"""(?x) {int} (?=(\.\d|e[+-]?\d)) # lookahead; at least one float-y thing coming? (?:\.\d+)? # fraction (?:e[+-]?\d+)? # and/or exponent """.format(int=_re_int), Number.Float, ), # Int (_re_int, Number.Int), (r'"(\\\\|\\"|[^"])*"', String.Double), ], "bstrb64url": [ (r"'", String.Single, "#pop"), include("commentsandwhitespace"), (r"\\.", String.Escape), (r"[0-9a-zA-Z\-_=]+", String.Single), (r".", Error), # (r";.+$", Token.Other), ], "bstrh": [ (r"'", String.Single, "#pop"), include("commentsandwhitespace"), (r"\\.", String.Escape), (r"[0-9a-fA-F]+", String.Single), (r".", Error), ], "bstr": [ (r"'", String.Single, "#pop"), (r"\\.", String.Escape), (r"[^']", String.Single), ], }
class PrologLexer(RegexLexer): """ Lexer for Prolog files. """ name = 'Prolog' aliases = ['prolog'] filenames = ['*.ecl', '*.prolog', '*.pro', '*.pl'] mimetypes = ['text/x-prolog'] flags = re.UNICODE | re.MULTILINE tokens = { 'root': [ (r'/\*', Comment.Multiline, 'nested-comment'), (r'%.*', Comment.Single), # character literal (r'0\'.', String.Char), (r'0b[01]+', Number.Bin), (r'0o[0-7]+', Number.Oct), (r'0x[0-9a-fA-F]+', Number.Hex), # literal with prepended base (r'\d\d?\'[a-zA-Z0-9]+', Number.Integer), (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+', Number.Integer), (r'[\[\](){}|.,;!]', Punctuation), (r':-|-->', Punctuation), (r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|' r'\\[0-7]+\\|\\["\nabcefnrstv]|[^\\"])*"', String.Double), (r"'(?:''|[^'])*'", String.Atom), # quoted atom # Needs to not be followed by an atom. # (r'=(?=\s|[a-zA-Z\[])', Operator), (r'is\b', Operator), (r'(<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])', Operator), (r'(mod|div|not)\b', Operator), (r'_', Keyword), # The don't-care variable (r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)), (r'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' r'(\s*)(:-|-->)', bygroups(Name.Function, Text, Operator)), # function defn (r'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' r'(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*', String.Atom), # atom, characters # This one includes ! (r'[#&*+\-./:<=>?@\\^~\u00a1-\u00bf\u2010-\u303f]+', String.Atom ), # atom, graphics (r'[A-Z_]\w*', Name.Variable), (r'\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text), ], 'nested-comment': [ (r'\*/', Comment.Multiline, '#pop'), (r'/\*', Comment.Multiline, '#push'), (r'[^*/]+', Comment.Multiline), (r'[*/]', Comment.Multiline), ], } def analyse_text(text): return ':-' in text
class PowerShellLexer(RegexLexer): """ For Windows PowerShell code. .. versionadded:: 1.5 """ name = 'PowerShell' aliases = ['powershell', 'posh', 'ps1', 'psm1'] filenames = ['*.ps1', '*.psm1'] mimetypes = ['text/x-powershell'] flags = re.DOTALL | re.IGNORECASE | re.MULTILINE keywords = ( 'while validateset validaterange validatepattern validatelength ' 'validatecount until trap switch return ref process param parameter in ' 'if global: function foreach for finally filter end elseif else ' 'dynamicparam do default continue cmdletbinding break begin alias \\? ' '% #script #private #local #global mandatory parametersetname position ' 'valuefrompipeline valuefrompipelinebypropertyname ' 'valuefromremainingarguments helpmessage try catch throw').split() operators = ( 'and as band bnot bor bxor casesensitive ccontains ceq cge cgt cle ' 'clike clt cmatch cne cnotcontains cnotlike cnotmatch contains ' 'creplace eq exact f file ge gt icontains ieq ige igt ile ilike ilt ' 'imatch ine inotcontains inotlike inotmatch ireplace is isnot le like ' 'lt match ne not notcontains notlike notmatch or regex replace ' 'wildcard').split() verbs = ( 'write where wait use update unregister undo trace test tee take ' 'suspend stop start split sort skip show set send select scroll resume ' 'restore restart resolve resize reset rename remove register receive ' 'read push pop ping out new move measure limit join invoke import ' 'group get format foreach export expand exit enter enable disconnect ' 'disable debug cxnew copy convertto convertfrom convert connect ' 'complete compare clear checkpoint aggregate add').split() commenthelp = ( 'component description example externalhelp forwardhelpcategory ' 'forwardhelptargetname functionality inputs link ' 'notes outputs parameter remotehelprunspace role synopsis').split() tokens = { 'root': [ # we need to count pairs of parentheses for correct highlight # of '$(...)' blocks in strings (r'\(', Punctuation, 'child'), (r'\s+', Text), (r'^(\s*#[#\s]*)(\.(?:%s))([^\n]*$)' % '|'.join(commenthelp), bygroups(Comment, String.Doc, Comment)), (r'#[^\n]*?$', Comment), (r'(<|<)#', Comment.Multiline, 'multline'), (r'@"\n', String.Heredoc, 'heredoc-double'), (r"@'\n.*?\n'@", String.Heredoc), # escaped syntax (r'`[\'"$@-]', Punctuation), (r'"', String.Double, 'string'), (r"'([^']|'')*'", String.Single), (r'(\$|@@|@)((global|script|private|env):)?\w+', Name.Variable), (r'(%s)\b' % '|'.join(keywords), Keyword), (r'-(%s)\b' % '|'.join(operators), Operator), (r'(%s)-[a-z_]\w*\b' % '|'.join(verbs), Name.Builtin), (r'\[[a-z_\[][\w. `,\[\]]*\]', Name.Constant), # .net [type]s (r'-[a-z_]\w*', Name), (r'\w+', Name), (r'[.,;@{}\[\]$()=+*/\\&%!~?^`|<>-]|::', Punctuation), ], 'child': [ (r'\)', Punctuation, '#pop'), include('root'), ], 'multline': [ (r'[^#&.]+', Comment.Multiline), (r'#(>|>)', Comment.Multiline, '#pop'), (r'\.(%s)' % '|'.join(commenthelp), String.Doc), (r'[#&.]', Comment.Multiline), ], 'string': [ (r"`[0abfnrtv'\"$`]", String.Escape), (r'[^$`"]+', String.Double), (r'\$\(', Punctuation, 'child'), (r'""', String.Double), (r'[`$]', String.Double), (r'"', String.Double, '#pop'), ], 'heredoc-double': [ (r'\n"@', String.Heredoc, '#pop'), (r'\$\(', Punctuation, 'child'), (r'[^@\n]+"]', String.Heredoc), (r".", String.Heredoc), ] }
class TcshLexer(RegexLexer): """ Lexer for tcsh scripts. .. versionadded:: 0.10 """ name = 'Tcsh' aliases = ['tcsh', 'csh'] filenames = ['*.tcsh', '*.csh'] mimetypes = ['application/x-csh'] tokens = { 'root': [ include('basic'), (r'\$\(', Keyword, 'paren'), (r'\$\{#?', Keyword, 'curly'), (r'`', String.Backtick, 'backticks'), include('data'), ], 'basic': [ (r'\b(if|endif|else|while|then|foreach|case|default|' r'continue|goto|breaksw|end|switch|endsw)\s*\b', Keyword), (r'\b(alias|alloc|bg|bindkey|break|builtins|bye|caller|cd|chdir|' r'complete|dirs|echo|echotc|eval|exec|exit|fg|filetest|getxvers|' r'glob|getspath|hashstat|history|hup|inlib|jobs|kill|' r'limit|log|login|logout|ls-F|migrate|newgrp|nice|nohup|notify|' r'onintr|popd|printenv|pushd|rehash|repeat|rootnode|popd|pushd|' r'set|shift|sched|setenv|setpath|settc|setty|setxvers|shift|' r'source|stop|suspend|source|suspend|telltc|time|' r'umask|unalias|uncomplete|unhash|universe|unlimit|unset|unsetenv|' r'ver|wait|warp|watchlog|where|which)\s*\b', Name.Builtin), (r'#.*', Comment), (r'\\[\w\W]', String.Escape), (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), (r'[\[\]{}()=]+', Operator), (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String), (r';', Punctuation), ], 'data': [ (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r'\s+', Text), (r'[^=\s\[\]{}()$"\'`\\;#]+', Text), (r'\d+(?= |\Z)', Number), (r'\$#?(\w+|.)', Name.Variable), ], 'curly': [ (r'\}', Keyword, '#pop'), (r':-', Keyword), (r'\w+', Name.Variable), (r'[^}:"\'`$]+', Punctuation), (r':', Punctuation), include('root'), ], 'paren': [ (r'\)', Keyword, '#pop'), include('root'), ], 'backticks': [ (r'`', String.Backtick, '#pop'), include('root'), ], }
class BashLexer(RegexLexer): """ Lexer for (ba|k|)sh shell scripts. .. versionadded:: 0.6 """ name = 'Bash' aliases = ['bash', 'sh', 'ksh', 'shell'] filenames = [ '*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass', '.bashrc', 'bashrc', '.bash_*', 'bash_*', 'PKGBUILD' ] mimetypes = ['application/x-sh', 'application/x-shellscript'] tokens = { 'root': [ include('basic'), (r'`', String.Backtick, 'backticks'), include('data'), include('interp'), ], 'interp': [ (r'\$\(\(', Keyword, 'math'), (r'\$\(', Keyword, 'paren'), (r'\$\{#?', String.Interpol, 'curly'), (r'\$#?(\w+|.)', Name.Variable), ], 'basic': [ (r'\b(if|fi|else|while|do|done|for|then|return|function|case|' r'select|continue|until|esac|elif)(\s*)\b', bygroups(Keyword, Text)), (r'\b(alias|bg|bind|break|builtin|caller|cd|command|compgen|' r'complete|declare|dirs|disown|echo|enable|eval|exec|exit|' r'export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|' r'local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|' r'shopt|source|suspend|test|time|times|trap|true|type|typeset|' r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)', Name.Builtin), (r'#.*\n', Comment), (r'\\[\w\W]', String.Escape), (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), (r'[\[\]{}()=]', Operator), (r'<<<', Operator), # here-string (r'<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2', String), (r'&&|\|\|', Operator), ], 'data': [ (r'(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\$])*"', String.Double), (r'"', String.Double, 'string'), (r"(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r"(?s)'.*?'", String.Single), (r';', Punctuation), (r'&', Punctuation), (r'\|', Punctuation), (r'\s+', Text), (r'\d+(?= |\Z)', Number), (r'[^=\s\[\]{}()$"\'`\\<&|;]+', Text), (r'<', Text), ], 'string': [ (r'"', String.Double, '#pop'), (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\$])+', String.Double), include('interp'), ], 'curly': [ (r'\}', String.Interpol, '#pop'), (r':-', Keyword), (r'\w+', Name.Variable), (r'[^}:"\'`$\\]+', Punctuation), (r':', Punctuation), include('root'), ], 'paren': [ (r'\)', Keyword, '#pop'), include('root'), ], 'math': [ (r'\)\)', Keyword, '#pop'), (r'[-+*/%^|&]|\*\*|\|\|', Operator), (r'\d+#\d+', Number), (r'\d+#(?! )', Number), (r'\d+', Number), include('root'), ], 'backticks': [ (r'`', String.Backtick, '#pop'), include('root'), ], } def analyse_text(text): if shebang_matches(text, r'(ba|z|)sh'): return 1 if text.startswith('$ '): return 0.2
class JadeLexer(ExtendedRegexLexer): """ For Jade markup. Jade is a variant of Scaml, see: http://scalate.fusesource.org/documentation/scaml-reference.html .. versionadded:: 1.4 """ name = 'Jade' aliases = ['jade'] filenames = ['*.jade'] mimetypes = ['text/x-jade'] flags = re.IGNORECASE _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), (r'[\w:-]+', Name.Tag, 'tag'), (r'\|', Text, 'eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)), (r'\[' + _dot + '*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
class CsoundLexer(RegexLexer): tokens = { 'whitespace': [(r'[ \t]+', Text), (r'/[*](?:.|\n)*?[*]/', Comment.Multiline), (r'(?:;|//).*$', Comment.Single), (r'(\\)(\n)', bygroups(Whitespace, Text))], 'preprocessor directives': [(r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc), (r'#includestr', Comment.Preproc, 'includestr directive'), (r'#include', Comment.Preproc, 'include directive'), (r'#[ \t]*define', Comment.Preproc, 'define directive'), (r'#(?:ifn?def|undef)\b', Comment.Preproc, 'macro directive')], 'include directive': [include('whitespace'), (r'([^ \t]).*?\1', String, '#pop')], 'includestr directive': [include('whitespace'), (r'"', String, ('#pop', 'quoted string'))], 'define directive': [ (r'\n', Text), include('whitespace'), (r'([A-Z_a-z]\w*)(\()', bygroups(Comment.Preproc, Punctuation), ('#pop', 'macro parameter name list')), (r'[A-Z_a-z]\w*', Comment.Preproc, ('#pop', 'before macro body')) ], 'macro parameter name list': [ include('whitespace'), (r'[A-Z_a-z]\w*', Comment.Preproc), (r"['#]", Punctuation), (r'\)', Punctuation, ('#pop', 'before macro body')) ], 'before macro body': [(r'\n', Text), include('whitespace'), (r'#', Punctuation, ('#pop', 'macro body'))], 'macro body': [(r'(?:\\(?!#)|[^#\\]|\n)+', Comment.Preproc), (r'\\#', Comment.Preproc), (r'(?<!\\)#', Punctuation, '#pop')], 'macro directive': [include('whitespace'), (r'[A-Z_a-z]\w*', Comment.Preproc, '#pop')], 'macro uses': [(r'(\$[A-Z_a-z]\w*\.?)(\()', bygroups(Comment.Preproc, Punctuation), 'macro parameter value list'), (r'\$[A-Z_a-z]\w*(?:\.|\b)', Comment.Preproc)], 'macro parameter value list': [ (r'(?:[^\'#"{()]|\{(?!\{))+', Comment.Preproc), (r"['#]", Punctuation), (r'"', String, 'macro parameter value quoted string'), (r'\{\{', String, 'macro parameter value braced string'), (r'\(', Comment.Preproc, 'macro parameter value parenthetical'), (r'\)', Punctuation, '#pop') ], 'macro parameter value quoted string': [(r"\\[#'()]", Comment.Preproc), (r"[#'()]", Error), include('quoted string')], 'macro parameter value braced string': [(r"\\[#'()]", Comment.Preproc), (r"[#'()]", Error), include('braced string')], 'macro parameter value parenthetical': [ (r'(?:[^\\()]|\\\))+', Comment.Preproc), (r'\(', Comment.Preproc, '#push'), (r'\)', Comment.Preproc, '#pop') ], 'whitespace and macro uses': [include('whitespace'), include('macro uses')], 'numbers': [(r'\d+[Ee][+-]?\d+|(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?', Number.Float), (r'(0[Xx])([0-9A-Fa-f]+)', bygroups(Keyword.Type, Number.Hex)), (r'\d+', Number.Integer)], 'quoted string': [(r'"', String, '#pop'), (r'[^"$]+', String), include('macro uses'), (r'[$]', String)], 'braced string': [ # Do nothing. This must be defined in subclasses. ] }
def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ]
class AADLLexer(RegexLexer): """ Pygments parser for AADL models. See <http://www.aadl.info> for more details. """ name = 'AADL' aliases = ['aadl'] filenames = ['*.aadl'] mimetypes = ['text/x-aadl'] flags = re.MULTILINE | re.DOTALL | re.IGNORECASE iden_rex = r'[a-zA-Z_][a-zA-Z0-9_\.]*' class_iden_rex = r'(' + iden_rex + r')(::)(' + iden_rex + r')' definition_rex = r'(' + iden_rex + r')' + r'(\s*:\s*)\b' component_category = r'(abstract|data|subprogram|subprogram\s+group|thread|thread\s+group|process|memory|processor|bus|device|virtual\s+processor|virtual\s+bus|system)\b' with_tuple = (r'(with)(\s+)', bygroups(Keyword, Whitespace), 'with-list') text_tuple = (r'([^\S\n]+)', Text) terminator_tuple = (r'(;)(\s*)', bygroups(Punctuation, Whitespace), '#pop') comment_tuple = (r'(--[^\n]*\n)', Comment.Single) comment_whitespace_tuple = (r'(--[^\n]*\n)(\s+)', bygroups(Comment.Single, Whitespace)) accesses_tuple = ( r'(bus|subprogram|subprogram\s+group|data)(\s+)(access)\b', bygroups(Keyword, Whitespace, Keyword)) features_tuple = ( r'(feature|port|event\s+port|data\s+port|event\s+data\s+port|feature\s+group)\b', Keyword) tokens = { 'packageOrSystem': [ text_tuple, (r'(implementation)(\s+)(' + iden_rex + r')', bygroups(Name.Class, Whitespace, Name.Class), '#pop'), (iden_rex, Name.Class, '#pop'), ], 'annex': [ (r'(\s*)(' + iden_rex + r')(\s*)({\*\*.*\*\*})(\s*)(;)', bygroups(Whitespace, Name.Class, Whitespace, Comment.Multiline, Whitespace, Punctuation)), ], 'with-list': [ (r'\s*(,)\s*', Punctuation), (r'[a-zA-Z_]\w*', Name.Namespace), terminator_tuple, ], 'alias-body': [ (component_category, Keyword.Declaration), (r'(\s+)', Whitespace), (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity)), terminator_tuple, ], 'package-declaration': [ text_tuple, (r'(implementation)', Keyword.Declaration), (r'(' + iden_rex + r')(;)', bygroups(Name.Class, Punctuation), '#pop'), (class_iden_rex + r'(;)', bygroups(Name.Class, Punctuation, Name.Entity, Punctuation), '#pop'), (r'(' + iden_rex + r')(\s*)(extends)(\s*)', bygroups(Name.Class, Whitespace, Keyword.Declaration, Whitespace)), (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity), '#pop'), (iden_rex, Name.Class, '#pop'), ], 'declaration': [ text_tuple, (r'(in|out|event|data)', Keyword), (r'(provides|requires)', Keyword), features_tuple, accesses_tuple, (r'(flow|path|thread|subprogram)', Keyword), (component_category, Keyword), (class_iden_rex, bygroups(Name.Class, Punctuation, Name)), (r'(' + iden_rex + r')(\s*)(->|<-|<->)(\s*)(' + iden_rex + r')', bygroups(Name, Whitespace, Operator, Whitespace, Name.Variable)), (iden_rex, Name.Function), (r'({)(\s+)', bygroups(Punctuation, Whitespace), 'property-constant-declaration'), (r'}', Punctuation), terminator_tuple, ], 'applies-to': [ text_tuple, (r'\(', Punctuation), (r'\s*(,)\s*', Punctuation), (r'\s*(\*\*)\s*', Operator), features_tuple, accesses_tuple, (component_category, Keyword), (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity)), (r'(' + iden_rex + r')', Name.Class), (r'(\{)(' + iden_rex + r')(\})', bygroups(Punctuation, Name.Class, Punctuation)), (r'\)', Punctuation), (r';', Punctuation, '#pop:2'), ], 'property-value': [ (r'(true|false)', Keyword.Constant), (r'\(', Punctuation), (r'\)', Punctuation), (r',', Punctuation), (r'[0-9]+\.[0-9]*', Number.Float), (r'[0-9]+', Number.Integer), (r'(reference)(\s*)(\()(' + iden_rex + ')(\))', bygroups(Keyword.Declaration, Whitespace, Punctuation, Name.Variable.Instance, Punctuation)), (r'"[^"]*"', Literal.String.Double), (r'(\s*)(\.\.)(\s+)', bygroups(Whitespace, Operator, Whitespace)), (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Variable)), (r'(\s*)(applies)(\s+)(to)(\s+)', bygroups(Whitespace, Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace), 'applies-to'), (r'(' + iden_rex + r')', Name.Constant), (r'(\[)(\s*)', bygroups(Punctuation, Whitespace), 'record_term'), (r'(\s+)', Whitespace), terminator_tuple, ], 'record_term': [ (r'(' + iden_rex + r')(\s*)(=>)(\s*)', bygroups(Name.Class, Whitespace, Operator, Whitespace), 'property-value'), (r'(\])', Punctuation, '#pop'), ], 'property-section-property-value': [ include('property-value'), terminator_tuple, ], 'property-constant-value': [ include('property-value'), (r'(;)(\s+)', bygroups(Punctuation, Whitespace), '#pop:2') ], 'aggregate-property-constant-list': [ (r'(' + iden_rex + r')(\s*)(=>)(\s*)', bygroups(Name.Class, Whitespace, Operator, Whitespace)), (r'\s*;\s*', Punctuation), include('property-value'), (r'(\]\s*;)(\s+)', bygroups(Punctuation, Whitespace), '#pop:2'), ], 'property-declaration': [ comment_tuple, (r'(inherit|list\s+of)', Keyword.Declaration), # aadl property types (r'(aadlboolean|aadlinteger|aadlreal|aadlstring|enumeration|range\s+of|classifier|reference|record)', Keyword.Type), (r'(,|\(|\)|\+|-|\.\.|:|;)', Punctuation), (r'(units)(\s*)(\()', bygroups(Keyword.Declaration, Whitespace, Punctuation), 'units-list'), (r'[0-9]+', Number.Integer), features_tuple, accesses_tuple, (component_category, Keyword.Type), (r'(=>)(\s*)', bygroups(Operator, Whitespace), 'applies-to-property-value'), (r'(applies)(\s+)(to)(\s+)', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace), 'applies-to'), (class_iden_rex, Name.Class), (r'(' + iden_rex + r')', Name.Class), (r'(\s+)', Whitespace), ], 'units-list': [ comment_tuple, (r'(' + iden_rex + r')', Name.Class), (r'(,|\*|=>)', Punctuation), (r'(\s+)', Whitespace), (r'(\))', Punctuation, '#pop'), ], 'applies-to-property-value': [ (r'(applies)(\s+)(to)(\s+)', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace), 'applies-to'), include('property-value'), ], 'property-constant-declaration': [ text_tuple, (class_iden_rex + r'(\s*)(=>)(\s*)(\[)(\s*)', bygroups(Name.Class, Punctuation, Name.Constant, Whitespace, Operator, Whitespace, Punctuation, Whitespace), 'aggregate-property-constant-list'), (r'(' + iden_rex + r')(\s*)(=>)(\s*)(\[)(\s*)', bygroups(Name.Class, Whitespace, Operator, Whitespace, Punctuation, Whitespace), 'aggregate-property-constant-list'), (class_iden_rex + r'(\s*)(=>)(\s*)', bygroups(Name.Class, Punctuation, Name.Constant, Whitespace, Operator, Whitespace), 'property-constant-value'), (r'(' + iden_rex + r')(\s*)(=>)(\s*)', bygroups(Name.Class, Whitespace, Operator, Whitespace), 'property-constant-value'), ], 'property-set': [ comment_tuple, with_tuple, (r'(' + iden_rex + r')(\s+)(is)(\s+)', bygroups(Name.Class, Whitespace, Keyword.Namespace, Whitespace)), (definition_rex + r'(constant)', bygroups(Name.Variable.Global, Punctuation, Keyword), 'property-constant-declaration'), (definition_rex, bygroups(Name.Variable.Global, Punctuation), 'property-declaration'), (r'(end)(\s+)(' + iden_rex + r')(;)', bygroups(Keyword.Namespace, Whitespace, Name.Class, Punctuation), '#pop'), (r'(\s+)', Whitespace), ], 'property-section': [ text_tuple, comment_whitespace_tuple, (class_iden_rex + r'(\s*)(=>)(\s*)', bygroups(Name.Class, Punctuation, Name.Entity, Whitespace, Operator, Whitespace), 'property-section-property-value'), (r'(' + iden_rex + r')(\s*)(=>)(\s*)', bygroups(Name.Class, Whitespace, Operator, Whitespace), 'property-section-property-value'), (r'(\*\*})(\s*)(;)', bygroups(Punctuation, Whitespace, Punctuation), '#pop'), (r'([\s+])', Whitespace), (r'', Whitespace, '#pop'), ], 'call-section': [ text_tuple, comment_whitespace_tuple, (r'(' + iden_rex + r')(\s*)(:)(\s*)({)(\s*)', bygroups(Name.Class, Whitespace, Punctuation, Whitespace, Punctuation, Whitespace)), (definition_rex, bygroups(Name.Variable, Punctuation), 'declaration'), (r'}', Punctuation), terminator_tuple, ], 'id-or-classid': [ (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity), '#pop'), (r'(' + iden_rex + r')', Name.Entity, '#pop'), ], 'semicolon': [ (r'(\s*)(;)', bygroups(Whitespace, Punctuation), '#pop'), ], 'emv2-annex': [ (r'(use)(\s+)(types|type\s+equivalence|mappings|behavior)(\s+)', bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace, Whitespace), ('semicolon', 'id-or-classid')), (r'(error\s+propagations)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'emv2-propagations'), (r'(component\s+error\s+behavior)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'emv2-component'), (r'(\*\*})(\s*)(;)', bygroups(Punctuation, Whitespace, Punctuation), '#pop'), (r'(\s+)', Whitespace), ], 'emv2-propagations': [ (r'(not|in|out|propagation)', Keyword.Namespace), (r'(:|{|\*|::|}|;)', Punctuation), (r'(end\s+propagations)(\s*)(;)', bygroups(Keyword.Namespace, Whitespace, Punctuation), '#pop'), (r'(' + iden_rex + r')', Name.Entity), (r'(\s+)', Whitespace), ], 'emv2-component': [ (r'(use)(\s+)(transformations)(\s+)', bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace, Whitespace), ('semicolon', 'id-or-classid')), (r'(events|transitions|propagations|detections|mode\s+mappings)', Keyword.Namespace), (r'(all|noerror)', Keyword.Constant), (r'(:|;|{|}|\(|\))', Punctuation), (r'(-\[)(\s*)', bygroups(Punctuation, Whitespace), 'emv2-error-condition'), (r'(end\s+component)(\s*)(;)', bygroups(Keyword.Namespace, Whitespace, Punctuation), '#pop'), (r'(' + iden_rex + r')', Name.Entity), (r'(\s+)', Whitespace), ], 'emv2-error-condition': [ (r'(and|ormore|orless|or)', Keyword.Constant), (r'(\(|\)|\{|\}|::)', Punctuation), (r'[0-9]+', Number.Integer), (r'(\]->)', Punctuation, '#pop'), (r'(' + iden_rex + r')', Name.Entity), (r'(\s+)', Whitespace), ], 'root': [ (r'(\n\s*|\t)', Whitespace), comment_tuple, (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'packageOrSystem'), (r'(public|private)', Keyword.Namespace), # import_declaration with_tuple, # alias_declaration (r'(' + iden_rex + r')(\s+)(renames)(\s+)', bygroups(Name.Namespace, Whitespace, Keyword, Whitespace), 'alias-body'), (r'(annex)(\s+)(EMV2)(\s*)({\*\*)', bygroups(Keyword.Namespace, Whitespace, Name.Namespace, Whitespace, Punctuation), 'emv2-annex'), (r'(annex)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'annex'), (component_category + r'(\s+)', bygroups(Keyword.Type, Whitespace), 'package-declaration'), (r'(calls)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'call-section'), (r'(subcomponents|connections|features|flows)(\s+)', bygroups(Keyword.Namespace, Whitespace)), (definition_rex, bygroups(Name.Variable, Punctuation), 'declaration'), (r'(properties)(\s*)', bygroups(Keyword.Namespace, Whitespace), 'property-section'), (r'(end)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'package-declaration'), (r'(property\s+set)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'property-set'), (r'(\s+)', Whitespace), ] }
def gen_crystalstrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ["interpolated-regex"]) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Regex, match.group(4) # end[imsx]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ["interpolated-string"]) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3) + i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states["strings"] = [ (r"\:@{0,2}[a-zA-Z_]\w*[!?]?", String.Symbol), (words(CRYSTAL_OPERATORS, prefix=r"\:@{0,2}"), String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), # This allows arbitrary text after '\ for simplicity (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), (r':"', String.Symbol, "simple-sym"), # Crystal doesn't have "symbol:"s but this simplifies function args (r"([a-zA-Z_]\w*)(:)(?!:)", bygroups(String.Symbol, Punctuation)), (r'"', String.Double, "simple-string"), (r"(?<!\.)`", String.Backtick, "simple-backtick"), ] # double-quoted string and symbol for name, ttype, end in ( ("string", String.Double, '"'), ("sym", String.Symbol, '"'), ("backtick", String.Backtick, "`"), ): states["simple-" + name] = [ include("string-escaped" if name == "sym" else "string-intp-escaped"), (r"[^\\%s#]+" % end, ttype), (r"[\\#]", ttype), (end, ttype, "#pop"), ] # braced quoted strings for lbrace, rbrace, bracecc, name in ( ("\\{", "\\}", "{}", "cb"), ("\\[", "\\]", "\\[\\]", "sb"), ("\\(", "\\)", "()", "pa"), ("<", ">", "<>", "ab"), ): states[name + "-intp-string"] = [ (r"\\[" + lbrace + "]", String.Other), (lbrace, String.Other, "#push"), (rbrace, String.Other, "#pop"), include("string-intp-escaped"), (r"[\\#" + bracecc + "]", String.Other), (r"[^\\#" + bracecc + "]+", String.Other), ] states["strings"].append((r"%" + lbrace, String.Other, name + "-intp-string")) states[name + "-string"] = [ (r"\\[\\" + bracecc + "]", String.Other), (lbrace, String.Other, "#push"), (rbrace, String.Other, "#pop"), (r"[\\#" + bracecc + "]", String.Other), (r"[^\\#" + bracecc + "]+", String.Other), ] # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html states["strings"].append((r"%[wi]" + lbrace, String.Other, name + "-string")) states[name + "-regex"] = [ (r"\\[\\" + bracecc + "]", String.Regex), (lbrace, String.Regex, "#push"), (rbrace + "[imsx]*", String.Regex, "#pop"), include("string-intp"), (r"[\\#" + bracecc + "]", String.Regex), (r"[^\\#" + bracecc + "]+", String.Regex), ] states["strings"].append((r"%r" + lbrace, String.Regex, name + "-regex")) # these must come after %<brace>! states["strings"] += [ # %r regex (r"(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)", intp_regex_callback), # regular fancy strings with qsw (r"(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r"(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r"^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r"(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback), ] return states
class BetterTypeScriptLexer(RegexLexer): """ For `TypeScript <https://www.typescriptlang.org/>`_ source code. """ name = 'TypeScript' aliases = ['ts'] filenames = ['*.ts'] mimetypes = ['text/x-typescript'] flags = re.DOTALL tokens = { 'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline)], 'slashstartsregex': [ include('commentsandwhitespace'), ( r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop' ), (r'(?=/)', Text, ('#pop', 'badregex')), (r'', Text, '#pop') ], 'badregex': [(r'\n', Text, '#pop')], 'typeexp': [ (r'[a-zA-Z0-9_?.$]+', Keyword.Type), (r'\s+', Text), (r'[|]', Text), (r'\n', Text, "#pop"), (r';', Text, "#pop"), (r'', Text, "#pop"), ], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), ( r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex' ), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), ( r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'this)\b', Keyword, 'slashstartsregex' ), ( r'(var|let|const|with|function)\b', Keyword.Declaration, 'slashstartsregex' ), ( r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' r'extends|final|float|goto|implements|import|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' r'transient|volatile)\b', Keyword.Reserved ), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), ( r'(Array|Boolean|Date|Error|Function|Math|netscape|' r'Number|Object|Packages|RegExp|String|sun|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin ), # Match stuff like: module name {...} ( r'\b(module)(\s*)(\s*[a-zA-Z0-9_?.$][\w?.$]*)(\s*)', bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex' ), # Match variable type keywords (r'\b(string|bool|number)\b', Keyword.Type), # Match stuff like: constructor (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved), # Match stuff like: super(argument, list) ( r'(super)(\s*)\(([a-zA-Z0-9,_?.$\s]+\s*)\)', bygroups(Keyword.Reserved, Text), 'slashstartsregex' ), # Match stuff like: function() {...} (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'), # Match stuff like: (function: return type) ( r'([a-zA-Z0-9_?.$][\w?.$]*)(\s*:\s*)', bygroups(Name.Other, Text), 'typeexp' ), # Match stuff like: type Foo = Bar | Baz ( r'\b(type)(\s*)([a-zA-Z0-9_?.$]+)(\s*)(=)(\s*)', bygroups( Keyword.Reserved, Text, Name.Other, Text, Operator, Text ), 'typeexp' ), (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), ] }
class TAPLexer(RegexLexer): """ For Test Anything Protocol (TAP) output. .. versionadded:: 2.1 """ name = 'TAP' aliases = ['tap'] filenames = ['*.tap'] tokens = { 'root': [ # A TAP version may be specified. (r'^TAP version \d+\n', Name.Namespace), # Specify a plan with a plan line. (r'^1..\d+', Keyword.Declaration, 'plan'), # A test failure (r'^(not ok)([^\S\n]*)(\d*)', bygroups(Generic.Error, Text, Number.Integer), 'test'), # A test success (r'^(ok)([^\S\n]*)(\d*)', bygroups(Keyword.Reserved, Text, Number.Integer), 'test'), # Diagnostics start with a hash. (r'^#.*\n', Comment), # TAP's version of an abort statement. (r'^Bail out!.*\n', Generic.Error), # TAP ignores any unrecognized lines. (r'^.*\n', Text), ], 'plan': [ # Consume whitespace (but not newline). (r'[^\S\n]+', Text), # A plan may have a directive with it. (r'#', Comment, 'directive'), # Or it could just end. (r'\n', Comment, '#pop'), # Anything else is wrong. (r'.*\n', Generic.Error, '#pop'), ], 'test': [ # Consume whitespace (but not newline). (r'[^\S\n]+', Text), # A test may have a directive with it. (r'#', Comment, 'directive'), (r'\S+', Text), (r'\n', Text, '#pop'), ], 'directive': [ # Consume whitespace (but not newline). (r'[^\S\n]+', Comment), # Extract todo items. (r'(?i)\bTODO\b', Comment.Preproc), # Extract skip items. (r'(?i)\bSKIP\S*', Comment.Preproc), (r'\S+', Comment), (r'\n', Comment, '#pop:2'), ], }
class RstLexer(RegexLexer): """ For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. .. versionadded:: 0.7 Additional options accepted: `handlecodeblocks` Highlight the contents of ``.. sourcecode:: language``, ``.. code:: language`` and ``.. code-block:: language`` directives with a lexer for the given language (default: ``True``). .. versionadded:: 0.8 """ name = 'reStructuredText' aliases = ['rst', 'rest', 'restructuredtext'] filenames = ['*.rst', '*.rest'] mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] flags = re.MULTILINE def _handle_sourcecode(self, match): from pygments.lexers import get_lexer_by_name # section header yield match.start(1), Punctuation, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator.Word, match.group(3) yield match.start(4), Punctuation, match.group(4) yield match.start(5), Text, match.group(5) yield match.start(6), Keyword, match.group(6) yield match.start(7), Text, match.group(7) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(6).strip()) except ClassNotFound: pass indention = match.group(8) indention_size = len(indention) code = (indention + match.group(9) + match.group(10) + match.group(11)) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(8), String, code return # highlight the lines with the lexer. ins = [] codelines = code.splitlines(True) code = '' for line in codelines: if len(line) > indention_size: ins.append((len(code), [(0, Text, line[:indention_size])])) code += line[indention_size:] else: code += line for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item # from docutils.parsers.rst.states closers = u'\'")]}>\u2019\u201d\xbb!?' unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' % (re.escape(unicode_delimiters), re.escape(closers))) tokens = { 'root': [ # Heading with overline (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' r'(.+)(\n)(\1)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text, Generic.Heading, Text)), # Plain heading (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), # Bulleted lists (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), # Numbered lists (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), # Numbered, but keep words at BOL from becoming lists (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', bygroups(Text, Number, using(this, state='inline'))), (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', bygroups(Text, Number, using(this, state='inline'))), # Line blocks (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', _handle_sourcecode), # A directive (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # A reference target (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A footnote/citation target (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A substitution def (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # Comments (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), # Field list (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text, Name.Function)), # Definition list (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', bygroups(using(this, state='inline'), using(this, state='inline'))), # Code blocks (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', bygroups(String.Escape, Text, String, String, Text, String)), include('inline'), ], 'inline': [ (r'\\.', Text), # escape (r'``', String, 'literal'), # code ( r'(`.+?)(<.+?>)(`__?)', # reference with inline target bygroups(String, String.Interpol, String)), (r'`.+?`__?', String), # reference (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', bygroups(Name.Variable, Name.Attribute)), # role (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', bygroups(Name.Attribute, Name.Variable)), # role (content first) (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis (r'\*.+?\*', Generic.Emph), # Emphasis (r'\[.*?\]_', String), # Footnote or citation (r'<.+?>', Name.Tag), # Hyperlink (r'[^\\\n\[*`:]+', Text), (r'.', Text), ], 'literal': [ (r'[^`]+', String), (r'``' + end_string_suffix, String, '#pop'), (r'`', String), ] } def __init__(self, **options): self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) RegexLexer.__init__(self, **options) def analyse_text(text): if text[:2] == '..' and text[2:3] != '.': return 0.3 p1 = text.find("\n") p2 = text.find("\n", p1 + 1) if (p2 > -1 and # has two lines p1 * 2 + 1 == p2 and # they are the same length text[p1 + 1] in '-=' and # the next line both starts and ends with text[p1 + 1] == text[p2 - 1]): # ...a sufficiently high header return 0.5
# # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the License for more information. # ============================================================================= import os import re # Monkey patch for pygments reporting an error when generator expressions are # used. # https://bitbucket.org/birkenfeld/pygments-main/issue/942/cmake-generator-expressions-not-handled from pygments.lexers import CMakeLexer from pygments.token import Name, Operator from pygments.lexer import bygroups CMakeLexer.tokens["args"].append(("(\\$<)(.+?)(>)", bygroups(Operator, Name.Variable, Operator))) # Monkey patch for sphinx generating invalid content for qcollectiongenerator # https://bitbucket.org/birkenfeld/sphinx/issue/1435/qthelp-builder-should-htmlescape-keywords from sphinx.util.pycompat import htmlescape from sphinx.builders.qthelp import QtHelpBuilder old_build_keywords = QtHelpBuilder.build_keywords def new_build_keywords(self, title, refs, subitems): old_items = old_build_keywords(self, title, refs, subitems) new_items = [] for item in old_items: before, rest = item.split('ref="', 1) ref, after = rest.split('"')
class BooLexer(RegexLexer): """ For `Boo <http://boo.codehaus.org/>`_ source code. """ name = 'Boo' aliases = ['boo'] filenames = ['*.boo'] mimetypes = ['text/x-boo'] tokens = { 'root': [ (r'\s+', Text), (r'(#|//).*$', Comment.Single), (r'/[*]', Comment.Multiline, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'/(\\\\|\\/|[^/\s])/', String.Regex), (r'@/(\\\\|\\/|[^/])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' r'partial|private|protected|public|return|set|static|' r'struct|transient|virtual|yield|super|and|break|cast|' r'continue|elif|else|ensure|except|for|given|goto|if|in|' r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|' r'while|from|as)\b', Keyword), (r'def(?=\s+\(.*?\))', Keyword), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|' r'assert|checked|enumerate|filter|getter|len|lock|map|' r'matrix|max|min|normalArrayIndexing|print|property|range|' r'rawArrayIndexing|required|typeof|unchecked|using|' r'yieldAll|zip)\b', Name.Builtin), (r'"""(\\\\|\\"|.*?)"""', String.Double), (r'"(\\\\|\\"|[^"]*?)"', String.Double), (r"'(\\\\|\\'|[^']*?)'", String.Single), (r'[a-zA-Z_]\w*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'[0-9][0-9.]*(ms?|d|h|s)', Number), (r'0\d+', Number.Oct), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer), ], 'comment': [ ('/[*]', Comment.Multiline, '#push'), ('[*]/', Comment.Multiline, '#pop'), ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline) ], 'funcname': [ (r'[a-zA-Z_]\w*', Name.Function, '#pop') ], 'classname': [ (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'namespace': [ (r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop') ] }
class NedLexer(RegexLexer): name = 'ned' filenames = ['*.ned'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(true|false)\b', Name.Builtin), (r'(<-->|-->|<--|\.\.)', Keyword), (r'(bool|double|int|xml)\b', Keyword.Type), (r'(inout|input|output)\b', Keyword.Type), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words(("channel", "channelinterface", "simple", "module", "network", "moduleinterface"), suffix=r'\b'), Keyword), (words( ("parameters", "gates", "types", "submodules", "connections"), suffix=r'\b'), Keyword), (words(("volatile", "allowunconnected", "extends", "for", "if", "import", "like", "package", "property"), suffix=r'\b'), Keyword), (words(("sizeof", "const", "default", "ask", "this", "index", "typename", "xmldoc"), suffix=r'\b'), Keyword), (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta", "binomial", "cauchy", "ceil", "chi_square", "cos", "erlang_k", "exp", "exponential", "fabs", "floor", "fmod", "gamma_d", "genk_exponential", "genk_intuniform", "genk_normal", "genk_truncnormal", "genk_uniform", "geometric", "hypergeometric", "hypot", "intuniform", "log", "log10", "lognormal", "max", "min", "negbinomial", "normal", "pareto_shifted", "poisson", "pow", "simTime", "sin", "sqrt", "student_t", "tan", "triang", "truncnormal", "uniform", "weibull", "xml", "xmldoc"), suffix=r'\b'), Name.Builtin), ('@[a-zA-Z_]\w*', Name.Builtin), ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ] }
class CSharpLexer(RegexLexer): """ For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. .. versionadded:: 0.8 """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': r'@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in levels.items(): tokens[levelname] = { 'root': [ # method names (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|as|async|await|base|break|by|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|let|lock|new|null|on|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial|yield|add|remove|value|alias|ascending|' r'descending|from|group|into|orderby|select|thenby|where|' r'join|equals)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|' r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [ (cs_ident, Name.Class, '#pop'), default('#pop'), ], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'), ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
from pygments.lexers import BashLexer, PythonLexer, Python3Lexer from pygments.lexer import ( Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using, ) from pygments.token import ( Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error, ) from pygments.util import get_bool_opt # Local from IPython.testing.skipdoctest import skip_doctest line_re = re.compile('.*?\n') ipython_tokens = [ (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)), (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), ] def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`. The lexer inherits from an appropriate Python lexer and then adds information about IPython specific keywords (i.e. magic commands, shell commands, etc.) Parameters ---------- python3 : bool If `True`, then build an IPython lexer from a Python 3 lexer.
def build_ipy_lexer(python3): """Builds IPython lexers depending on the value of `python3`. The lexer inherits from an appropriate Python lexer and then adds information about IPython specific keywords (i.e. magic commands, shell commands, etc.) Parameters ---------- python3 : bool If `True`, then build an IPython lexer from a Python 3 lexer. """ # It would be nice to have a single IPython lexer class which takes # a boolean `python3`. But since there are two Python lexer classes, # we will also have two IPython lexer classes. if python3: PyLexer = Python3Lexer name = 'IPython3' aliases = ['ipython3'] doc = """IPython3 Lexer""" else: PyLexer = PythonLexer name = 'IPython' aliases = ['ipython2', 'ipython'] doc = """IPython Lexer""" ipython_tokens = [ (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))), (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))), (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))), (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))), (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))), (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))), (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))), (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))), (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))), (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)), (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))), (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)), (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)), (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword, using(BashLexer), Text)), (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)), (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)), (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)), (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)), ] tokens = PyLexer.tokens.copy() tokens['root'] = ipython_tokens + tokens['root'] attrs = {'name': name, 'aliases': aliases, 'filenames': [], '__doc__': doc, 'tokens': tokens} return type(name, (PyLexer,), attrs)
:license: BSD, see LICENSE for details. """ import re from pygments.lexer import RegexLexer, bygroups, default, include, using, words from pygments.token import Comment, Error, Keyword, Name, Number, Operator, Punctuation, \ String, Text, Whitespace from pygments.lexers._csound_builtins import OPCODES, DEPRECATED_OPCODES from pygments.lexers.html import HtmlLexer from pygments.lexers.python import PythonLexer from pygments.lexers.scripting import LuaLexer __all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer'] newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text)) class CsoundLexer(RegexLexer): tokens = { 'whitespace': [ (r'[ \t]+', Text), (r'/[*](?:.|\n)*?[*]/', Comment.Multiline), (r'(?:;|//).*$', Comment.Single), (r'(\\)(\n)', bygroups(Whitespace, Text)) ], 'preprocessor directives': [ (r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc), (r'#include', Comment.Preproc, 'include directive'), (r'#[ \t]*define', Comment.Preproc, 'define directive'),
class NemerleLexer(RegexLexer): """ For `Nemerle <http://nemerle.org>`_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. .. versionadded:: 1.5 """ name = 'Nemerle' aliases = ['nemerle'] filenames = ['*.n'] mimetypes = ['text/x-nemerle'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': r'@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), 'full': ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in levels.items(): tokens[levelname] = { 'root': [ # method names (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), (r'\n', Text), (r'\$\s*"', String, 'splice-string'), (r'\$\s*<#', String, 'splice-string2'), (r'<#', String, 'recursive-string'), (r'(<\[)\s*(' + cs_ident + ':)?', Keyword), (r'\]\>', Keyword), # quasiquotation only (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b.*?\n', Comment.Preproc), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, Keyword)), (r'(abstract|and|as|base|catch|def|delegate|' r'enum|event|extern|false|finally|' r'fun|implements|interface|internal|' r'is|macro|match|matches|module|mutable|new|' r'null|out|override|params|partial|private|' r'protected|public|ref|sealed|static|' r'syntax|this|throw|true|try|type|typeof|' r'virtual|volatile|when|where|with|' r'assert|assert2|async|break|checked|continue|do|else|' r'ensures|for|foreach|if|late|lock|new|nolate|' r'otherwise|regexp|repeat|requires|return|surroundwith|' r'unchecked|unless|using|while|yield)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort|void|array|list)\b\??', Keyword.Type), (r'(:>?)\s*(' + cs_ident + r'\??)', bygroups(Punctuation, Keyword.Type)), (r'(class|struct|variant|module)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [ (cs_ident, Name.Class, '#pop') ], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ], 'splice-string': [ (r'[^"$]', String), (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'\\"', String), (r'"', String, '#pop') ], 'splice-string2': [ (r'[^#<>$]', String), (r'\$' + cs_ident, Name), (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), (r'<#', String, '#push'), (r'#>', String, '#pop') ], 'recursive-string': [ (r'[^#<>]', String), (r'<#', String, '#push'), (r'#>', String, '#pop') ], 'splice-string-content': [ (r'if|match', Keyword), (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation), (cs_ident, Name), (r'\d+', Number), (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options)
class DtdLexer(RegexLexer): """ A lexer for DTDs (Document Type Definitions). .. versionadded:: 1.5 """ flags = re.MULTILINE | re.DOTALL name = 'DTD' aliases = ['dtd'] filenames = ['*.dtd'] mimetypes = ['application/xml-dtd'] tokens = { 'root': [ include('common'), (r'(<!ELEMENT)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'element'), (r'(<!ATTLIST)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'attlist'), (r'(<!ENTITY)(\s+)(\S+)', bygroups(Keyword, Text, Name.Entity), 'entity'), (r'(<!NOTATION)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'notation'), ( r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections bygroups(Keyword, Name.Entity, Text, Keyword)), (r'(<!DOCTYPE)(\s+)([^>\s]+)', bygroups(Keyword, Text, Name.Tag)), (r'PUBLIC|SYSTEM', Keyword.Constant), (r'[\[\]>]', Keyword), ], 'common': [ (r'\s+', Text), (r'(%|&)[^;]*;', Name.Entity), ('<!--', Comment, 'comment'), (r'[(|)*,?+]', Operator), (r'"[^"]*"', String.Double), (r'\'[^\']*\'', String.Single), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'element': [ include('common'), (r'EMPTY|ANY|#PCDATA', Keyword.Constant), (r'[^>\s|()?+*,]+', Name.Tag), (r'>', Keyword, '#pop'), ], 'attlist': [ include('common'), (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION', Keyword.Constant), (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant), (r'xml:space|xml:lang', Keyword.Reserved), (r'[^>\s|()?+*,]+', Name.Attribute), (r'>', Keyword, '#pop'), ], 'entity': [ include('common'), (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant), (r'[^>\s|()?+*,]+', Name.Entity), (r'>', Keyword, '#pop'), ], 'notation': [ include('common'), (r'SYSTEM|PUBLIC', Keyword.Constant), (r'[^>\s|()?+*,]+', Name.Attribute), (r'>', Keyword, '#pop'), ], } def analyse_text(text): if not looks_like_xml(text) and \ ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text): return 0.8
def check_lexer(lexer_name, cls, mod_path, min_level, output_stream=sys.stdout): #print lexer_name #print cls().tokens has_errors = False bygroups_callback = func_code(bygroups(1)) for state, pats in cls().tokens.items(): if not isinstance(pats, list): # This is for Inform7Lexer print(lexer_name, 'WEIRD', file=output_stream) return output_stream for i, pat in enumerate(pats): if hasattr(pat, 'state'): # new 'default' continue try: if isinstance(pat[0], Future): pat = (pat[0].get(),) + pat[1:] reg = Regex.get_parse_tree(pat[0], cls.flags) except TypeError: # Doesn't support _inherit yet. continue except Exception: try: print(pat[0], cls, file=output_stream) except: pass raise # Special problem: display an error if count of args to # bygroups(...) doesn't match the number of capture groups if callable(pat[1]) and func_code(pat[1]) is bygroups_callback: by_groups = func_closure(pat[1]) else: by_groups = None if ONLY_FUNC: errs = [] getattr(regexlint.checkers, ONLY_FUNC)(reg, errs) else: errs = run_all_checkers(reg, by_groups) # Special case for empty string, since it needs action. manual_check_for_empty_string_match(reg, errs, pat) errs.sort(key=lambda k: (k[1], k[0])) if errs: #print "Errors in", lexer_name, state, "pattern", i for num, severity, pos1, text in errs: if severity < min_level: continue # Only set this if we're going to output something -- # otherwise the [Lexer] OK won't print has_errors = True foo = find_offending_line(mod_path, lexer_name, state, i, pos1) if foo: line = 'L' + str(foo[0]) else: line = 'pat#' + str(i+1) print('%s%s:%s:%s:%s: %s' % ( logging.getLevelName(severity)[0], num, lexer_name, state, line, text), file=output_stream) if foo: mark(*(foo + (output_stream,))) else: mark_str(pos1, pos1+1, pat[0], output_stream) if not has_errors: print(lexer_name, 'OK', file=output_stream) return output_stream
def check_lexer(lexer_name, cls, mod_path, min_level, output_stream=sys.stdout): #print lexer_name #print cls().tokens has_errors = False bygroups_callback = bygroups(1).func_code for state, pats in cls().tokens.iteritems(): if not isinstance(pats, list): # This is for Inform7Lexer print >>output_stream, lexer_name, 'WEIRD' return output_stream for i, pat in enumerate(pats): if hasattr(pat, 'state'): # new 'default' continue try: if isinstance(pat[0], Future): pat = (pat[0].get(),) + pat[1:] reg = Regex.get_parse_tree(pat[0], cls.flags) except TypeError: # Doesn't support _inherit yet. continue except Exception: try: print >>output_stream, pat[0], cls except: pass raise # Special problem: display an error if count of args to # bygroups(...) doesn't match the number of capture groups if callable(pat[1]) and pat[1].func_code is bygroups_callback: by_groups = pat[1].func_closure[0].cell_contents else: by_groups = None if ONLY_FUNC: errs = [] getattr(regexlint.checkers, ONLY_FUNC)(reg, errs) else: errs = run_all_checkers(reg, by_groups) # Special case for empty string, since it needs action. manual_check_for_empty_string_match(reg, errs, pat) errs.sort(key=lambda k: (k[1], k[0])) if errs: #print "Errors in", lexer_name, state, "pattern", i for num, severity, pos1, text in errs: if severity < min_level: continue # Only set this if we're going to output something -- # otherwise the [Lexer] OK won't print has_errors = True foo = find_offending_line(mod_path, lexer_name, state, i, pos1) if foo: line = 'L' + str(foo[0]) else: line = 'pat#' + str(i+1) print >>output_stream, '%s%s:%s:%s:%s: %s' % ( logging.getLevelName(severity)[0], num, lexer_name, state, line, text) if foo: mark(*(foo + (output_stream,))) else: # Substract one for closing quote start = len(consistent_repr(pat[0][:pos1])) - 1 end = len(consistent_repr(pat[0][:pos1+1])) - 1 if start == end: # This handles the case where pos1 points to the end # of the string. Regex "|" with pos1 = 1. end += 1 assert end > start text, start, end = shorten(repr(pat[0]), start, end) mark(-1, start, end, text, output_stream) if not has_errors: print >>output_stream, lexer_name, "OK" return output_stream
class GherkinLexer(RegexLexer): """ For `Gherkin <http://github.com/aslakhellesoy/gherkin/>` syntax. .. versionadded:: 1.2 """ name = 'Gherkin' aliases = ['cucumber', 'gherkin'] filenames = ['*.feature'] mimetypes = ['text/x-gherkin'] feature_keywords = '^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' feature_element_keywords = '^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' examples_keywords = '^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' step_keywords = '^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' tokens = { 'comments': [ (r'^\s*#.*$', Comment), ], 'feature_elements': [ (step_keywords, Keyword, "step_content_stack"), include('comments'), (r"(\s|.)", Name.Function), ], 'feature_elements_on_stack': [ (step_keywords, Keyword, "#pop:2"), include('comments'), (r"(\s|.)", Name.Function), ], 'examples_table': [ (r"\s+\|", Keyword, 'examples_table_header'), include('comments'), (r"(\s|.)", Name.Function), ], 'examples_table_header': [ (r"\s+\|\s*$", Keyword, "#pop:2"), include('comments'), (r"\\\|", Name.Variable), (r"\s*\|", Keyword), (r"[^|]", Name.Variable), ], 'scenario_sections_on_stack': [ (feature_element_keywords, bygroups(Name.Function, Keyword, Keyword, Name.Function), "feature_elements_on_stack"), ], 'narrative': [ include('scenario_sections_on_stack'), include('comments'), (r"(\s|.)", Name.Function), ], 'table_vars': [ (r'(<[^>]+>)', Name.Variable), ], 'numbers': [ (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', String), ], 'string': [ include('table_vars'), (r'(\s|.)', String), ], 'py_string': [ (r'"""', Keyword, "#pop"), include('string'), ], 'step_content_root': [ (r"$", Keyword, "#pop"), include('step_content'), ], 'step_content_stack': [ (r"$", Keyword, "#pop:2"), include('step_content'), ], 'step_content': [ (r'"', Name.Function, "double_string"), include('table_vars'), include('numbers'), include('comments'), (r'(\s|.)', Name.Function), ], 'table_content': [ (r"\s+\|\s*$", Keyword, "#pop"), include('comments'), (r"\\\|", String), (r"\s*\|", Keyword), include('string'), ], 'double_string': [ (r'"', Name.Function, "#pop"), include('string'), ], 'root': [ (r'\n', Name.Function), include('comments'), (r'"""', Keyword, "py_string"), (r'\s+\|', Keyword, 'table_content'), (r'"', Name.Function, "double_string"), include('table_vars'), include('numbers'), (r'(\s*)(@[^@\r\n\t ]+)', bygroups(Name.Function, Name.Tag)), (step_keywords, bygroups(Name.Function, Keyword), 'step_content_root'), (feature_keywords, bygroups(Keyword, Keyword, Name.Function), 'narrative'), (feature_element_keywords, bygroups(Name.Function, Keyword, Keyword, Name.Function), 'feature_elements'), (examples_keywords, bygroups(Name.Function, Keyword, Keyword, Name.Function), 'examples_table'), (r'(\s|.)', Name.Function), ] }
class MarkdownLexer(RegexLexer): """ For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup. .. versionadded:: 2.2 """ name = 'markdown' aliases = ['md'] filenames = ['*.md'] mimetypes = ["text/x-markdown"] flags = re.MULTILINE def _handle_codeblock(self, match): """ match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks """ from pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) yield match.start(3), Text, match.group(3) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(2).strip()) except ClassNotFound: pass code = match.group(4) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(4), String, code else: for item in do_insertions([], lexer.get_tokens_unprocessed(code)): yield item yield match.start(5), String, match.group(5) tokens = { 'root': [ # heading with pound prefix (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), # task list (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', bygroups(Text, Keyword, Keyword, using(this, state='inline'))), # bulleted lists (r'^(\s*)([*-])(\s)(.+\n)', bygroups(Text, Keyword, Text, using(this, state='inline'))), # numbered lists (r'^(\s*)([0-9]+\.)( .+\n)', bygroups(Text, Keyword, using(this, state='inline'))), # quote (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), # text block (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), # code block with language (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), include('inline'), ], 'inline': [ # escape (r'\\.', Text), # italics (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)), # bold # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)), # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)), # strikethrough (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)), # inline code (r'`[^`]+`', String.Backtick), # mentions and topics (twitter and github stuff) (r'[@#][\w/:]+', Name.Entity), # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), # reference-style links, e.g.: # [an example][id] # [id]: http://example.com/ (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', bygroups(Text, Name.Label, Text, Name.Attribute)), # general text, must come last! (r'[^\\\s]+', Text), (r'.', Text), ], } def __init__(self, **options): self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) RegexLexer.__init__(self, **options)
class VbNetLexer(RegexLexer): """ For `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_ source code. """ name = 'VB.net' aliases = ['vb.net', 'vbnet'] filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \ '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*' flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^\s*<.*?>', Name.Attribute), (r'\s+', Text), (r'\n', Text), (r'rem\b.*?\n', Comment), (r"'.*?\n", Comment), (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#Else|#End\s+If|#Const|' r'#ExternalSource.*?\n|#End\s+ExternalSource|' r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc), (r'[(){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), (words(( 'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case', 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl', 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng', 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare', 'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else', 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False', 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo', 'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let', 'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase', 'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing', 'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator', 'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides', 'ParamArray', 'Partial', 'Private', 'Protected', 'Public', 'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume', 'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single', 'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To', 'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While', 'Widening', 'With', 'WithEvents', 'WriteOnly'), prefix=r'(?<!\.)', suffix=r'\b'), Keyword), (r'(?<!\.)End\b', Keyword, 'end'), (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'), (r'(?<!\.)(Function|Sub|Property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(?<!\.)(Class|Structure|Enum)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(?<!\.)(Module|Namespace|Imports)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|' r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|' r'UShort)\b', Keyword.Type), (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|' r'Or|OrElse|TypeOf|Xor)\b', Operator.Word), (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|' r'<=|>=|<>|[-&*/\\^+=<>\[\]]', Operator), ('"', String, 'string'), (r'_\n', Text), # Line continuation (must be before Name) (uni_name + '[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)(F[+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer), ], 'string': [ (r'""', String), (r'"C?', String, '#pop'), (r'[^"]+', String), ], 'dim': [ (uni_name, Name.Variable, '#pop'), default('#pop'), # any other syntax ], 'funcname': [ (uni_name, Name.Function, '#pop'), ], 'classname': [ (uni_name, Name.Class, '#pop'), ], 'namespace': [ (uni_name, Name.Namespace), (r'\.', Name.Namespace), default('#pop'), ], 'end': [ (r'\s+', Text), (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b', Keyword, '#pop'), default('#pop'), ] } def analyse_text(text): if re.search(r'^\s*(#If|Module|Namespace)', text, re.MULTILINE): return 0.5
class ThingsDBLexer(RegexLexer): """ Lexer for the ThingsDB programming language. .. versionadded:: 2.9 """ name = 'ThingsDB' aliases = ['ti', 'thingsdb'] filenames = ['*.ti'] tokens = { 'root': [ include('expression'), ], 'expression': [ include('comments'), include('whitespace'), # numbers (r'[-+]?0b[01]+', Number.Bin), (r'[-+]?0o[0-8]+', Number.Oct), (r'([-+]?0x[0-9a-fA-F]+)', Number.Hex), (r'[-+]?[0-9]+', Number.Integer), (r'[-+]?((inf|nan)([^0-9A-Za-z_]|$)|[0-9]*\.[0-9]+(e[+-][0-9]+)?)', Number.Float), # strings (r'(?:"(?:[^"]*)")+', String.Double), (r"(?:'(?:[^']*)')+", String.Single), # literals (r'(true|false|nil)\b', Keyword.Constant), # regular expressions (r'(/[^/\\]*(?:\\.[^/\\]*)*/i?)', String.Regex), # thing id's (r'#[0-9]+', Comment.Preproc), # name, assignments and functions include('names'), (r'[(){}\[\],;]', Punctuation), (r'[+\-*/%&|<>^!~@=:?]', Operator), ], 'names': [ (r'(\.)' r'(add|call|contains|del|endswith|extend|filter|find|findindex|' r'get|has|id|indexof|keys|len|lower|map|pop|push|remove|set|sort|' r'splice|startswith|test|unwrap|upper|values|wrap)' r'(\()', bygroups(Name.Function, Name.Function, Punctuation), 'arguments'), (r'(array|assert|assert_err|auth_err|backup_info|backups_info|' r'bad_data_err|bool|closure|collection_info|collections_info|' r'counters|deep|del_backup|del_collection|del_expired|del_node|' r'del_procedure|del_token|del_type|del_user|err|float|' r'forbidden_err|grant|int|isarray|isascii|isbool|isbytes|iserr|' r'isfloat|isinf|isint|islist|isnan|isnil|israw|isset|isstr|' r'isthing|istuple|isutf8|lookup_err|max_quota_err|mod_type|new|' r'new_backup|new_collection|new_node|new_procedure|new_token|' r'new_type|new_user|node_err|node_info|nodes_info|now|' r'num_arguments_err|operation_err|overflow_err|procedure_doc|' r'procedure_info|procedures_info|raise|refs|rename_collection|' r'rename_user|reset_counters|return|revoke|run|set_log_level|set|' r'set_quota|set_type|shutdown|str|syntax_err|thing|try|type|' r'type_err|type_count|type_info|types_info|user_info|users_info|' r'value_err|wse|zero_div_err)' r'(\()', bygroups(Name.Function, Punctuation), 'arguments'), (r'(\.[A-Za-z_][0-9A-Za-z_]*)' r'(\s*)(=)', bygroups(Name.Attribute, Text, Operator)), (r'\.[A-Za-z_][0-9A-Za-z_]*', Name.Attribute), (r'([A-Za-z_][0-9A-Za-z_]*)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), (r'[A-Za-z_][0-9A-Za-z_]*', Name.Variable), ], 'whitespace': [ (r'\n', Whitespace), (r'\s+', Whitespace), ], 'comments': [ (r'//(.*?)\n', Comment.Single), (r'/\*', Comment.Multiline, 'comment'), ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'arguments': [ include('expression'), (',', Punctuation), (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), ], }
class LimboLexer(RegexLexer): """ Lexer for `Limbo programming language <http://www.vitanuova.com/inferno/limbo.html>`_ TODO: - maybe implement better var declaration highlighting - some simple syntax error highlighting .. versionadded:: 2.0 """ name = 'Limbo' aliases = ['limbo'] filenames = ['*.b'] mimetypes = ['text/limbo'] tokens = { 'whitespace': [ (r'^(\s*)([a-zA-Z_]\w*:(\s*)\n)', bygroups(Text, Name.Label)), (r'\n', Text), (r'\s+', Text), (r'#(\n|(.|\n)*?[^\\]\n)', Comment.Single), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\', String), # stray backslash ], 'statements': [ (r'"', String, 'string'), (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])', Number.Float), (r'16r[0-9a-fA-F]+', Number.Hex), (r'8r[0-7]+', Number.Oct), (r'((([1-3]\d)|([2-9]))r)?(\d+)', Number.Integer), (r'[()\[\],.]', Punctuation), (r'[~!%^&*+=|?:<>/-]|(->)|(<-)|(=>)|(::)', Operator), (r'(alt|break|case|continue|cyclic|do|else|exit' r'for|hd|if|implement|import|include|len|load|or' r'pick|return|spawn|tagof|tl|to|while)\b', Keyword), (r'(byte|int|big|real|string|array|chan|list|adt' r'|fn|ref|of|module|self|type)\b', Keyword.Type), (r'(con|iota|nil)\b', Keyword.Constant), (r'[a-zA-Z_]\w*', Name), ], 'statement' : [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'root': [ include('whitespace'), default('statement'), ], } def analyse_text(text): # Any limbo module implements something if re.search(r'^implement \w+;', text, re.MULTILINE): return 0.7
from pygments.lexer import RegexLexer, bygroups from pygments.token import * # See http://pygments.org/docs/lexerdevelopment/ for background. ROOT_TOKENS = [ (r'\"[^\"]*\"', Literal.String), (r'##.*\n', Comment), (r'#.*\n', Comment.Single), (r'[$]+([A-Za-z][A-Za-z0-9_:]*)?', Name.Variable), (r'[@]+([A-Za-z][A-Za-z0-9_:]*)?', Name.Class), (r'[A-Za-z0-9_]+:', Name.Tag), (r'(\.)([A-Za-z][A-Za-z0-9_]*)', bygroups(Text, Name.Function)), (r'(:=|=>)', Punctuation), (r'(\.?)([!+=<>/*%-]+)', bygroups(Text, Operator)), (r'\b(true|false|null)\b', Keyword.Constant), (r'\b(type|def|var|import)\b', Keyword.Declaration), (r'\b[A-Za-z][A-Za-z0-9_]+\b', Keyword), (r'[0-9]+', Number), (r'.', Text) ] class NeutrinoLexer(RegexLexer): name = 'Neutrino' aliases = ['neutrino'] filenames = ['*.n'] tokens = {'root': ROOT_TOKENS}
class TerraformLexer(RegexLexer): """ Lexer for `terraformi .tf files <https://www.terraform.io/>`_. .. versionadded:: 2.1 """ name = 'Terraform' aliases = ['terraform', 'tf'] filenames = ['*.tf'] mimetypes = ['application/x-tf', 'application/x-terraform'] embedded_keywords = ('ingress', 'egress', 'listener', 'default', 'connection', 'alias', 'terraform', 'tags', 'vars', 'config', 'lifecycle', 'timeouts') tokens = { 'root': [ include('string'), include('punctuation'), include('curly'), include('basic'), include('whitespace'), (r'[0-9]+', Number), ], 'basic': [ (words(('true', 'false'), prefix=r'\b', suffix=r'\b'), Keyword.Type), (r'\s*/\*', Comment.Multiline, 'comment'), (r'\s*#.*\n', Comment.Single), (r'(.*?)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)), (words(('variable', 'resource', 'provider', 'provisioner', 'module', 'backend', 'table_s', 'output'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved, 'function'), (words(embedded_keywords, prefix=r'\b', suffix=r'\b'), Keyword.Declaration), (r'\$\{', String.Interpol, 'var_builtin'), ], 'function': [ (r'(\s+)(".*")(\s+)', bygroups(Text, String, Text)), include('punctuation'), include('curly'), ], 'var_builtin': [ (r'\$\{', String.Interpol, '#push'), (words(('concat', 'file', 'join', 'lookup', 'element'), prefix=r'\b', suffix=r'\b'), Name.Builtin), include('string'), include('punctuation'), (r'\s+', Text), (r'\}', String.Interpol, '#pop'), ], 'string': [ (r'(".*")', bygroups(String.Double)), ], 'punctuation': [ (r'[\[\](),.]', Punctuation), ], # Keep this seperate from punctuation - we sometimes want to use different # Tokens for { } 'curly': [ (r'\{', Text.Punctuation), (r'\}', Text.Punctuation), ], 'comment': [ (r'[^*/]', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline) ], 'whitespace': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), ], }
def _stringescapes(lexer, match, ctx): lexer._start = match.group(3) lexer._end = match.group(5) return bygroups(Keyword.Reserved, Text, String.Escape, Text, String.Escape)(lexer, match, ctx)
class VCLLexer(RegexLexer): """ For Varnish Configuration Language (VCL). .. versionadded:: 2.2 """ name = 'VCL' aliases = ['vcl'] filenames = ['*.vcl'] mimetypes = ['text/x-vclsrc'] def analyse_text(text): # If the very first line is 'vcl 4.0;' it's pretty much guaranteed # that this is VCL if text.startswith('vcl 4.0;'): return 1.0 # Skip over comments and blank lines # This is accurate enough that returning 0.9 is reasonable. # Almost no VCL files start without some comments. elif '\nvcl 4.0;' in text[:1000]: return 0.9 tokens = { 'probe': [ include('whitespace'), include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), include('comments'), (r'[!/]+', Operator), (r';', Punctuation), (r'\d+', Number), (r'\}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), (r'(\.probe)(\s*=\s*)(\{)', bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'statements': [ (r'(\d\.)?\d+[sdwhmy]', Literal.Date), (r'(\d\.)?\d+ms', Literal.Date), (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|' r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|' r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|' r'miss|fetch|restart)\b', Name.Constant), (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), (words(( 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip', 'bereq.uncacheable'), suffix=r'\b'), Name.Variable), (r'[!%&+*\-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'return', 'set', 'unset', 'import', 'include', 'new', 'rollback', 'call'), suffix=r'\b'), Keyword), (r'storage\.\w+\.\w+\b', Name.Variable), (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), (r'(backend)(\s+\w+)(\s*\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), (r'(probe\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), (r'(acl\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved, Name.Constant, Punctuation)), (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', bygroups(Keyword, Name.Function, Punctuation)), (r'([a-zA-Z_]\w*)' r'(\.)' r'([a-zA-Z_]\w*)' r'(\s*\(.*\))', bygroups(Name.Function, Punctuation, Name.Function, using(this))), (r'[a-zA-Z_]\w*', Name), ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'comments': [ (r'#.*$', Comment), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment), ], 'string': [ (r'"', String, '#pop'), (r'[^"\n]+', String), # all other characters ], 'multistring': [ (r'[^"}]', String), (r'"\}', String, '#pop'), (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), (r'\{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation ], 'root': [ include('whitespace'), include('comments'), include('statements'), (r'\s+', Text), ], }
def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Regex, match.group(4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): yield match.start(1), String.Other, match.group(1) nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] # double-quoted string and symbol for name, ttype, end in ('string', String.Double, '"'), \ ('sym', String.Symbol, '"'), \ ('backtick', String.Backtick, '`'): states['simple-'+name] = [ include('string-intp-escaped'), (r'[^\\%s#]+' % end, ttype), (r'[\\#]', ttype), (end, ttype, '#pop'), ] # braced quoted strings for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \ ('\\[', '\\]', 'sb'), \ ('\\(', '\\)', 'pa'), \ ('<', '>', 'ab'): states[name+'-intp-string'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Other), (r'(?<!\\)' + lbrace, String.Other, '#push'), (r'(?<!\\)' + rbrace, String.Other, '#pop'), include('string-intp-escaped'), (r'[\\#' + lbrace + rbrace + ']', String.Other), (r'[^\\#' + lbrace + rbrace + ']+', String.Other), ] states['strings'].append((r'%[QWx]?' + lbrace, String.Other, name+'-intp-string')) states[name+'-string'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Other), (r'(?<!\\)' + lbrace, String.Other, '#push'), (r'(?<!\\)' + rbrace, String.Other, '#pop'), (r'[\\#' + lbrace + rbrace + ']', String.Other), (r'[^\\#' + lbrace + rbrace + ']+', String.Other), ] states['strings'].append((r'%[qsw]' + lbrace, String.Other, name+'-string')) states[name+'-regex'] = [ (r'\\[\\' + lbrace + rbrace + ']', String.Regex), (r'(?<!\\)' + lbrace, String.Regex, '#push'), (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'), include('string-intp'), (r'[\\#' + lbrace + rbrace + ']', String.Regex), (r'[^\\#' + lbrace + rbrace + ']+', String.Regex), ] states['strings'].append((r'%r' + lbrace, String.Regex, name+'-regex')) # these must come after %<brace>! states['strings'] += [ # %r regex (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', intp_regex_callback), # regular fancy strings with qsw (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other), (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # and because of fixed width lookbehinds the whole thing a # second time for line startings... (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), # all regular fancy strings without qsw (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', intp_string_callback), ] return states
class BlitzBasicLexer(RegexLexer): """ For `BlitzBasic <http://blitzbasic.com>`_ source code. .. versionadded:: 2.0 """ name = 'BlitzBasic' aliases = ['blitzbasic', 'b3d', 'bplus'] filenames = ['*.bb', '*.decls'] mimetypes = ['text/x-bb'] bb_sktypes = r'@{1,2}|[#$%]' bb_name = r'[a-z]\w*' bb_var = (r'(%s)(?:([ \t]*)(%s)|([ \t]*)([.])([ \t]*)(?:(%s)))?') % \ (bb_name, bb_sktypes, bb_name) flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ # Text (r'[ \t]+', Text), # Comments (r";.*?\n", Comment.Single), # Data types ('"', String.Double, 'string'), # Numbers (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), (r'\.[0-9]+(?!\.)', Number.Float), (r'[0-9]+', Number.Integer), (r'\$[0-9a-f]+', Number.Hex), (r'\%[10]+', Number.Bin), # Other (words(('Shl', 'Shr', 'Sar', 'Mod', 'Or', 'And', 'Not', 'Abs', 'Sgn', 'Handle', 'Int', 'Float', 'Str', 'First', 'Last', 'Before', 'After'), prefix=r'\b', suffix=r'\b'), Operator), (r'([+\-*/~=<>^])', Operator), (r'[(),:\[\]\\]', Punctuation), (r'\.([ \t]*)(%s)' % bb_name, Name.Label), # Identifiers (r'\b(New)\b([ \t]+)(%s)' % (bb_name), bygroups(Keyword.Reserved, Text, Name.Class)), (r'\b(Gosub|Goto)\b([ \t]+)(%s)' % (bb_name), bygroups(Keyword.Reserved, Text, Name.Label)), (r'\b(Object)\b([ \t]*)([.])([ \t]*)(%s)\b' % (bb_name), bygroups(Operator, Text, Punctuation, Text, Name.Class)), (r'\b%s\b([ \t]*)(\()' % bb_var, bygroups(Name.Function, Text, Keyword.Type, Text, Punctuation, Text, Name.Class, Text, Punctuation)), (r'\b(Function)\b([ \t]+)%s' % bb_var, bygroups(Keyword.Reserved, Text, Name.Function, Text, Keyword.Type, Text, Punctuation, Text, Name.Class)), (r'\b(Type)([ \t]+)(%s)' % (bb_name), bygroups(Keyword.Reserved, Text, Name.Class)), # Keywords (r'\b(Pi|True|False|Null)\b', Keyword.Constant), (r'\b(Local|Global|Const|Field|Dim)\b', Keyword.Declaration), (words(('End', 'Return', 'Exit', 'Chr', 'Len', 'Asc', 'New', 'Delete', 'Insert', 'Include', 'Function', 'Type', 'If', 'Then', 'Else', 'ElseIf', 'EndIf', 'For', 'To', 'Next', 'Step', 'Each', 'While', 'Wend', 'Repeat', 'Until', 'Forever', 'Select', 'Case', 'Default', 'Goto', 'Gosub', 'Data', 'Read', 'Restore'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), # Final resolve (for variable names and such) # (r'(%s)' % (bb_name), Name.Variable), (bb_var, bygroups(Name.Variable, Text, Keyword.Type, Text, Punctuation, Text, Name.Class)), ], 'string': [ (r'""', String.Double), (r'"C?', String.Double, '#pop'), (r'[^"]+', String.Double), ], }
'upper-alpha', 'upper-latin', 'upper-roman', 'uppercase', 'url', 'visible', 'w-resize', 'wait', 'wider', 'x-fast', 'x-high', 'x-large', 'x-loud', 'x-low', 'x-small', 'x-soft', 'xx-large', 'xx-small', 'yes'), suffix=r'\b'), Name.Constant), (words(_color_keywords, suffix=r'\b'), Name.Entity), (words(( 'black', 'silver', 'gray', 'white', 'maroon', 'red', 'purple', 'fuchsia', 'green', 'lime', 'olive', 'yellow', 'navy', 'blue', 'teal', 'aqua'), suffix=r'\b'), Name.Builtin), (r'\!(important|default)', Name.Exception), (r'(true|false)', Name.Pseudo), (r'(and|or|not)', Operator.Word), (r'/\*', Comment.Multiline, 'inline-comment'), (r'//[^\n]*', Comment.Single), (r'\#[a-z0-9]{1,6}', Number.Hex), (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)), (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)), (r'#\{', String.Interpol, 'interpolation'), (r'[~^*!&%<>|+=@:,./?-]+', Operator), (r'[\[\]()]+', Punctuation), (r'"', String.Double, 'string-double'), (r"'", String.Single, 'string-single'), (r'[a-z_-][\w-]*', Name), ], 'interpolation': [ (r'\}', String.Interpol, '#pop'), include('value'), ], 'selector': [
class MonkeyLexer(RegexLexer): """ For `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_ source code. .. versionadded:: 1.6 """ name = 'Monkey' aliases = ['monkey'] filenames = ['*.monkey'] mimetypes = ['text/x-monkey'] name_variable = r'[a-z_]\w*' name_function = r'[A-Z]\w*' name_constant = r'[A-Z_][A-Z0-9_]*' name_class = r'[A-Z]\w*' name_module = r'[a-z0-9_]*' keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)' # ? == Bool // % == Int // # == Float // $ == String keyword_type_special = r'[?%#$]' flags = re.MULTILINE tokens = { 'root': [ # Text (r'\s+', Text), # Comments (r"'.*", Comment), (r'(?i)^#rem\b', Comment.Multiline, 'comment'), # preprocessor directives (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b', Comment.Preproc), # preprocessor variable (any line starting with '#' that is not a directive) (r'^#', Comment.Preproc, 'variables'), # String ('"', String.Double, 'string'), # Numbers (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), (r'\.[0-9]+(?!\.)', Number.Float), (r'[0-9]+', Number.Integer), (r'\$[0-9a-fA-Z]+', Number.Hex), (r'\%[10]+', Number.Bin), # Native data types (r'\b%s\b' % keyword_type, Keyword.Type), # Exception handling (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved), (r'Throwable', Name.Exception), # Builtins (r'(?i)\b(?:Null|True|False)\b', Name.Builtin), (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo), (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant), # Keywords (r'(?i)^(Import)(\s+)(.*)(\n)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text)), (r'(?i)^Strict\b.*\n', Keyword.Reserved), (r'(?i)(Const|Local|Global|Field)(\s+)', bygroups(Keyword.Declaration, Text), 'variables'), (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)', bygroups(Keyword.Reserved, Text), 'classname'), (r'(?i)(Function|Method)(\s+)', bygroups(Keyword.Reserved, Text), 'funcname'), (r'(?i)(?:End|Return|Public|Private|Extern|Property|' r'Final|Abstract)\b', Keyword.Reserved), # Flow Control stuff (r'(?i)(?:If|Then|Else|ElseIf|EndIf|' r'Select|Case|Default|' r'While|Wend|' r'Repeat|Until|Forever|' r'For|To|Until|Step|EachIn|Next|' r'Exit|Continue)\s+', Keyword.Reserved), # not used yet (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved), # Array (r'[\[\]]', Punctuation), # Other (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator), (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word), (r'[(){}!#,.:]', Punctuation), # catch the rest (r'%s\b' % name_constant, Name.Constant), (r'%s\b' % name_function, Name.Function), (r'%s\b' % name_variable, Name.Variable), ], 'funcname': [(r'(?i)%s\b' % name_function, Name.Function), (r':', Punctuation, 'classname'), (r'\s+', Text), (r'\(', Punctuation, 'variables'), (r'\)', Punctuation, '#pop')], 'classname': [ (r'%s\.' % name_module, Name.Namespace), (r'%s\b' % keyword_type, Keyword.Type), (r'%s\b' % name_class, Name.Class), # array (of given size) (r'(\[)(\s*)(\d*)(\s*)(\])', bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)), # generics (r'\s+(?!<)', Text, '#pop'), (r'<', Punctuation, '#push'), (r'>', Punctuation, '#pop'), (r'\n', Text, '#pop'), default('#pop') ], 'variables': [(r'%s\b' % name_constant, Name.Constant), (r'%s\b' % name_variable, Name.Variable), (r'%s' % keyword_type_special, Keyword.Type), (r'\s+', Text), (r':', Punctuation, 'classname'), (r',', Punctuation, '#push'), default('#pop')], 'string': [ (r'[^"~]+', String.Double), (r'~q|~n|~r|~t|~z|~~', String.Escape), (r'"', String.Double, '#pop'), ], 'comment': [ (r'(?i)^#rem.*?', Comment.Multiline, "#push"), (r'(?i)^#end.*?', Comment.Multiline, "#pop"), (r'\n', Comment.Multiline), (r'.+', Comment.Multiline), ], }
# Grant U54 EB005149. # # * Kitware, Inc. # # #============================================================================= import os import re # Monkey patch for pygments reporting an error when generator expressions are # used. # https://bitbucket.org/birkenfeld/pygments-main/issue/942/cmake-generator-expressions-not-handled from pygments.lexers import CMakeLexer from pygments.token import Name, Operator from pygments.lexer import bygroups CMakeLexer.tokens["args"].append(('(\\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator))) # Monkey patch for sphinx generating invalid content for qcollectiongenerator # https://bitbucket.org/birkenfeld/sphinx/issue/1435/qthelp-builder-should-htmlescape-keywords from sphinx.util.pycompat import htmlescape from sphinx.builders.qthelp import QtHelpBuilder old_build_keywords = QtHelpBuilder.build_keywords def new_build_keywords(self, title, refs, subitems): old_items = old_build_keywords(self, title, refs, subitems) new_items = [] for item in old_items: before, rest = item.split("ref=\"", 1) ref, after = rest.split("\"") if ("<" in ref and ">" in ref): new_items.append(before + "ref=\"" + htmlescape(ref) + "\"" + after) else:
#------------------------------------------------------------------------------ # Copyright (c) 2013-2018, Nucleic Development Team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file COPYING.txt, distributed with this software. #------------------------------------------------------------------------------ from pygments.lexer import ExtendedRegexLexer, bygroups from pygments.lexers.agile import Python3Lexer from pygments.token import Text, Keyword, Name, Punctuation, Operator ENAMLDEF_START = ( r'^(enamldef)([ \t]+)([a-zA-Z_][a-zA-Z0-9_]*)([ \t]*)(\()', bygroups(Keyword, Text, Name.Class, Text, Punctuation), 'enamldef_base', ) ENAMLDEF_BASE = ( r'(\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*)', bygroups(Text, Text, Text), 'enamldef_end', ) ENAMLDEF_END = ( r'(\))([ \t]*)(:)([ \t]*\n)', bygroups(Punctuation, Text, Punctuation, Text), '#pop:2',
class XonshSubprocLexer(BashLexer): """Lexer for xonsh subproc mode.""" name = 'Xonsh subprocess lexer' tokens = {'root': [(SearchPath, String.Backtick), inherit, ]} ROOT_TOKENS = [(r'\?', Keyword), (r'\$\w+', Name.Variable), (r'\$\{', Keyword, ('pymode', )), (r'[\!\$]\(', Keyword, ('subproc', )), (r'[\!\$]\[', Keyword, ('subproc', )), (r'@\$\(', Keyword, ('subproc', )), (r'@\(', Keyword, ('pymode', )), inherit, ] PYMODE_TOKENS = [(r'(.+)(\))', bygroups(using(this), Keyword), '#pop'), (r'(.+)(\})', bygroups(using(this), Keyword), '#pop'), ] SUBPROC_TOKENS = [ (r'(.+)(\))', bygroups(using(XonshSubprocLexer), Keyword), '#pop'), (r'(.+)(\])', bygroups(using(XonshSubprocLexer), Keyword), '#pop'), ] class XonshLexer(PythonLexer): """Xonsh console lexer for pygments.""" name = 'Xonsh lexer' aliases = ['xonsh', 'xsh'] filenames = ['*.xsh', '*xonshrc']
class HamlLexer(ExtendedRegexLexer): """ For Haml markup. .. versionadded:: 1.3 """ name = 'Haml' aliases = ['haml'] filenames = ['*.haml'] mimetypes = ['text/x-haml'] flags = re.IGNORECASE # Haml can include " |\n" anywhere, # which is ignored and used to wrap long lines. # To accomodate this, use this custom faux dot instead. _dot = r'(?: \|\n(?=.* \|)|.)' # In certain places, a comma at the end of the line # allows line wrapping as well. _comma_dot = r'(?:,\s*\n|' + _dot + ')' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'), (r'(-)(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + ')*?\}', using(RubyLexer)), (r'\[' + _dot + '*?\]', using(RubyLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'haml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + '*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], }