Beispiel #1
0
 def format(self, extension=None):
     """Write pretty HTML logs."""
     M = self.M
     # pygments lexing setup:
     # (pygments HTML-formatter handles HTML-escaping)
     import pygments
     from pygments.lexers import IrcLogsLexer
     from pygments.formatters import HtmlFormatter
     import pygments.token as token
     from pygments.lexer import bygroups
     # Don't do any encoding in this function with pygments.
     # That's only right before the i/o functions in the Config
     # object.
     formatter = HtmlFormatter(lineanchors='l',
                               full=True, style=M.config.pygmentizeStyle,
                               output_encoding=self.M.config.output_codec)
     Lexer = IrcLogsLexer
     Lexer.tokens['msg'][1:1] = \
        [ # match:   #topic commands
         (r"(\#topic[ \t\f\v]*)(.*\n)",
          bygroups(token.Keyword, token.Generic.Heading), '#pop'),
          # match:   #command   (others)
         (r"(\#[^\s]+[ \t\f\v]*)(.*\n)",
          bygroups(token.Keyword, token.Generic.Strong), '#pop'),
        ]
     lexer = Lexer()
     #from rkddp.interact import interact ; interact()
     out = pygments.highlight("\n".join(M.lines), lexer, formatter)
     return out
Beispiel #2
0
def string_rules(state):
    return [
        (r'(")((?:[^\r\n"\\]|(?:\\.))+)(")',
         bygroups(Text, String, Text), state),

        (r'(")((?:[^\r\n"\\]|(?:\\.))+)', bygroups(Text, String), state),

        (r"(')((?:[^\r\n'\\]|(?:\\.))+)(')",
         bygroups(Text, String, Text), state),

        (r"(')((?:[^\r\n'\\]|(?:\\.))+)", bygroups(Text, String), state),

        (r'([^\s\'\\]|(\\.))+', String, state)
    ]
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
Beispiel #4
0
 def format(self, extension=None):
     """Write pretty HTML logs."""
     M = self.M
     # pygments lexing setup:
     # (pygments HTML-formatter handles HTML-escaping)
     import pygments
     from pygments.lexers import IrcLogsLexer
     from pygments.formatters import HtmlFormatter
     import pygments.token as token
     from pygments.lexer import bygroups
     # Don't do any encoding in this function with pygments.
     # That's only right before the i/o functions in the Config
     # object.
     formatter = HtmlFormatter(
         lineanchors='l',
         full=True,
         style=M.config.pygmentizeStyle,
         outencoding=self.M.config.output_codec)
     Lexer = IrcLogsLexer
     Lexer.tokens['msg'][1:1] = \
        [ # match:   #topic commands
         (r"(\#topic[ \t\f\v]*)(.*\n)",
          bygroups(token.Keyword, token.Generic.Heading), '#pop'),
          # match:   #command   (others)
         (r"(\#[^\s]+[ \t\f\v]*)(.*\n)",
          bygroups(token.Keyword, token.Generic.Strong), '#pop'),
        ]
     lexer = Lexer()
     #from rkddp.interact import interact ; interact()
     out = pygments.highlight("\n".join(M.lines), lexer, formatter)
     # Hack it to add "pre { white-space: pre-wrap; }", which make
     # it wrap the pygments html logs.  I think that in a newer
     # version of pygmetns, the "prestyles" HTMLFormatter option
     # would do this, but I want to maintain compatibility with
     # lenny.  Thus, I do these substitution hacks to add the
     # format in.  Thanks to a comment on the blog of Francis
     # Giannaros (http://francis.giannaros.org) for the suggestion
     # and instructions for how.
     out, n = re.subn(
         r"(\n\s*pre\s*\{[^}]+;\s*)(\})",
         r"\1\n      white-space: pre-wrap;\2",
         out,
         count=1)
     if n == 0:
         out = re.sub(r"(\n\s*</style>)",
                      r"\npre { white-space: pre-wrap; }\1",
                      out,
                      count=1)
     return out
Beispiel #5
0
 def _make_redirect_state(compound,
                          _core_token_compound=_core_token_compound,
                          _nl=_nl, _punct=_punct, _stoken=_stoken,
                          _string=_string, _space=_space,
                          _variable=_variable, _ws=_ws):
     stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                        (_punct, _string, _variable, _core_token_compound))
     return [
         (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
          (_nl, _ws, _nl, _ws),
          bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
         (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
          (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
          bygroups(Number.Integer, Punctuation, using(this, state='text')))
     ]
Beispiel #6
0
 def _make_call_state(compound, _label=_label,
                      _label_compound=_label_compound):
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                   bygroups(Punctuation, Name.Label), '#pop'))
     return state
def gen_elixir_string_rules(name, symbol, token):
    states = {}
    states['string_' + name] = [
        (r'[^#%s\\]+' % (symbol,), token),
        include('escapes'),
        (r'\\.', token),
        (r'(%s)' % (symbol,), bygroups(token), "#pop"),
        include('interpol')
    ]
    return states
def _objdump_lexer_tokens(asm_lexer):
    """
    Common objdump lexer tokens to wrap an ASM lexer.
    """
    hex_re = r'[0-9A-Za-z]'
    return {
        'root': [
            # File name & format:
            ('(.*?)(:)( +file format )(.*?)$',
                bygroups(Name.Label, Punctuation, Text, String)),
            # Section header
            ('(Disassembly of section )(.*?)(:)$',
                bygroups(Text, Name.Label, Punctuation)),
            # Function labels
            # (With offset)
            ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
                bygroups(Number.Hex, Text, Punctuation, Name.Function,
                         Punctuation, Number.Hex, Punctuation)),
            # (Without offset)
            ('('+hex_re+'+)( )(<)(.*?)(>:)$',
                bygroups(Number.Hex, Text, Punctuation, Name.Function,
                         Punctuation)),
            # Code line with disassembled instructions
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
                bygroups(Text, Name.Label, Text, Number.Hex, Text,
                         using(asm_lexer))),
            # Code line with ascii
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
                bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
            # Continued code line, only raw opcodes without disassembled
            # instruction
            ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
                bygroups(Text, Name.Label, Text, Number.Hex)),
            # Skipped a few bytes
            (r'\t\.\.\.$', Text),
            # Relocation line
            # (With offset)
            (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
                bygroups(Text, Name.Label, Text, Name.Property, Text,
                         Name.Constant, Punctuation, Number.Hex)),
            # (Without offset)
            (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
                bygroups(Text, Name.Label, Text, Name.Property, Text,
                         Name.Constant)),
            (r'[^\n]+\n', Other)
        ]
    }
Beispiel #9
0
 def _make_label_state(compound, _label=_label,
                       _label_compound=_label_compound, _nl=_nl,
                       _punct=_punct, _string=_string, _variable=_variable):
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state.append((r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' %
                   (_label_compound if compound else _label, _string,
                    _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl,
                    _punct, r')' if compound else ''),
                   bygroups(Name.Label, Comment.Single), '#pop'))
     return state
Beispiel #10
0
 def _make_follow_state(compound, _label=_label,
                        _label_compound=_label_compound, _nl=_nl,
                        _space=_space, _start_label=_start_label,
                        _token=_token, _token_compound=_token_compound,
                        _ws=_ws):
     suffix = '/compound' if compound else ''
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state += [
         (r'%s([%s]*)(%s)(.*)' %
          (_start_label, _ws, _label_compound if compound else _label),
          bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
         include('redirect%s' % suffix),
         (r'(?=[%s])' % _nl, Text, '#pop'),
         (r'\|\|?|&&?', Punctuation, '#pop'),
         include('text')
     ]
     return state
Beispiel #11
0
def get_lexer_tokens(tag_highlighting=False, project_support=False):
    """Return the tokens needed for RegexLexer

    :param tag_highlighting: if True we support tag highlighting. See
        AdaLexerWithTags documentation
    :type tag_highlighting: bool
    :param project_support: if True support additional keywors associated
        with project files.
    :type project_support:  bool

    :return: a dictionary following the structure required by RegexLexer
    :rtype: dict
    """
    if project_support:
        project_pattern = r'project\s+|'
        project_pattern2 = r'project|'
    else:
        project_pattern = r''
        project_pattern2 = r''

    result = {
        'root': [
            # Comments
            (r'--.*$', Comment),
            # Character literal
            (r"'.'", String.Char),
            # Strings
            (r'"[^"]*"', String),
            # Numeric
            # Based literal
            (r'[0-9][0-9_]*#[0-9a-f][0-9a-f_]*#(E[\+-]?[0-9][0-9_]*)?',
             Number.Integer),
            (r'[0-9][0-9_]*#[0-9a-f][0-9a-f_]*'
             r'\.[0-9a-f][0-9a-f_]*#(E[\+-]?[0-9][0-9_]*)?', Number.Float),
            # Decimal literal
            (r'[0-9][0-9_]*\.[0-9][0-9_](E[\+-]?[0-9][0-9_]*)?', Number.Float),
            (r'[0-9][0-9_]*(E[\+-]?[0-9][0-9_]*)?', Number.Integer),
            # Match use and with statements
            # The first part of the pattern is be sure we don't match
            # for/use constructs.
            (r'(\n\s*|;\s*)(with|use)(\s+[\w\.]+)',
             bygroups(Punctuation, Keyword.Reserved, Name.Namespace)),
            # Match procedure, package and function declarations
            (r'end\s+(if|loop|record)', Keyword),
            (r'(package(?:\s+body)?\s+|' + project_pattern +
             r'function\s+|end\s+|procedure\s+)([\w\.]+)',
             bygroups(Keyword, Name.Function)),
            # Ada 2012 standard attributes, GNAT specific ones and
            # Spark 2014 ones ('Update and 'Loop_Entry)
            # (reversed order to avoid having for
            #  example Max before Max_Alignment_For_Allocation).
            (r'\'(Write|Width|Wide_Width|Wide_Wide_Width|Wide_Wide_Value|'
             r'Wide_Wide_Image|Wide_Value|Wide_Image|Word_Size|Wchar_T_Size|'
             r'Version|Value_Size|Value|Valid_Scalars|VADS_Size|Valid|Val|'
             r'Update|Unrestricted_Access|Universal_Literal_String|'
             r'Unconstrained_Array|Unchecked_Access|Unbiased_Rounding|'
             r'Truncation|Type_Class|To_Address|Tick|Terminated|'
             r'Target_Name|Tag|System_Allocator_Alignment|Succ|Stub_Type|'
             r'Stream_Size|Storage_Unit|Storage_Size|Storage_Pool|Small|Size|'
             r'Simple_Storage_Pool|Signed_Zeros|Scaling|Scale|'
             r'Scalar_Storage_Order|Safe_Last|Safe_Large|Safe_First|'
             r'Safe_Emax|Rounding|Round|Result|Remainder|Ref|Read|'
             r'Range_Length|Range|Priority|Pred|'
             r'Position|Pos|Pool_Address|Passed_By_Reference|Partition_Id|'
             r'Overlaps_Storage|Output|Old|Object_Size|Null_Parameter|Modulus|'
             r'Model_Small|Model_Mantissa|Model_Epsilon|Model_Emin|Model|Mod|'
             r'Min|Mechanism_Code|Maximum_Alignment|'
             r'Max_Size_In_Storage_Elements|Max_Priority|'
             r'Max_Interrupt_Priority|Max_Alignment_For_Allocation|'
             r'Max|Mantissa|Machine_Size|Machine_Rounds|Machine_Rounding|'
             r'Machine_Radix|Machine_Overflows|Machine_Mantissa|Machine_Emin|'
             r'Machine_Emax|Machine|Loop_Entry|Length|Length|Leading_Part|'
             r'Last_Valid|Last_Bit|Last|Large|Invalid_Value|Integer_Value|'
             r'Input|Image|Img|Identity|Has_Same_Storage|Has_Discriminants|'
             r'Has_Access_Values|Fraction|Fore|Floor|Fixed_Value|First_Valid|'
             r'First_Bit|First|External_Tag|Exponent|Epsilon|Enum_Val|'
             r'Enum_Rep|Enabled|Emax|Elaborated|Elab_Subp_Body|Elab_Spec|'
             r'Elab_Body|Descriptor_Size|Digits|Denorm|Delta|Definite|'
             r'Default_Bit_Order|Count|Copy_Sign|Constrained|'
             r'Compose|Component_Size|Compiler_Version|Code_Address|Class|'
             r'Ceiling|Caller|Callable|Body_Version|Bit_Order|Bit_Position|'
             r'Bit|Base|Asm_Output|Asm_Input|Alignment|Aft|Adjacent|'
             r'Address_Size|Address|Access|Abort_Signal|AST_Entry)',
             Name.Attribute),
            # All Ada2012 reserved words
            (r'(abort|abstract|abs|accept|access|aliased|all|and|array|at|'
             r'begin|body|case|constant|declare|delay|delta|digits|do|'
             r'else|elsif|end|entry|exception|exit|for|function|generic|goto|'
             r'if|interface|in|is|limited|loop|mod|new|not|null|'
             r'of|or|others|out|overriding|' + project_pattern2 +
             r'package|pragma|private|procedure|protected|'
             r'raise|range|record|rem|renames|requeue|return|reverse|'
             r'select|separate|some|subtype|synchronized|'
             r'tagged|task|terminate|then|type|until|use|when|while|with|xor'
             r')([\s;,])',
             bygroups(Keyword.Reserved, Punctuation)),
            # Two characters operators
            (r'=>|\.\.|\*\*|:=|/=|>=|<=|<<|>>|<>', Operator),
            # One character operators
            (r'&|\'|\(|\)|\*|\+|-|\.|/|:|<|=|>|\|', Operator),
            (r',|;', Punctuation),
            # Spaces
            (r'\s+', Text),
            # Builtin values
            (r'False|True', Keyword.Constant),
            # Identifiers
            (r'[\w\.]+', Name)], }

    # Insert tag highlighting before identifiers
    if tag_highlighting:
        result['root'].insert(-1, (r'\[[\w ]*\]', Name.Tag))

    return result
Beispiel #12
0
# - manual/cmake-buildsystem.7.html (with nested $<..>; relative and absolute paths, "::")

from pygments.lexers import CMakeLexer
from pygments.token import Name, Operator, Punctuation, String, Text, Comment, Generic, Whitespace, Number
from pygments.lexer import bygroups

# Notes on regular expressions below:
# - [\.\+-] are needed for string constants like gtk+-2.0
# - Unix paths are recognized by '/'; support for Windows paths may be added if needed
# - (\\.) allows for \-escapes (used in manual/cmake-language.7)
# - $<..$<..$>..> nested occurence in cmake-buildsystem
# - Nested variable evaluations are only supported in a limited capacity. Only
#   one level of nesting is supported and at most one nested variable can be present.

CMakeLexer.tokens["root"] = [
  (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Function, Text, Name.Function), '#push'),     # fctn(
  (r'\(', Name.Function, '#push'),
  (r'\)', Name.Function, '#pop'),
  (r'\[', Punctuation, '#push'),
  (r'\]', Punctuation, '#pop'),
  (r'[|;,.=*\-]', Punctuation),
  (r'\\\\', Punctuation),                                   # used in commands/source_group
  (r'[:]', Operator),
  (r'[<>]=', Punctuation),                                  # used in FindPkgConfig.cmake
  (r'\$<', Operator, '#push'),                              # $<...>
  (r'<[^<|]+?>(\w*\.\.\.)?', Name.Variable),                # <expr>
  (r'(\$\w*\{)([^\}\$]*)?(?:(\$\w*\{)([^\}]+?)(\}))?([^\}]*?)(\})',  # ${..} $ENV{..}, possibly nested
    bygroups(Operator, Name.Tag, Operator, Name.Tag, Operator, Name.Tag, Operator)),
  (r'([A-Z]+\{)(.+?)(\})', bygroups(Operator, Name.Tag, Operator)),  # DATA{ ...}
  (r'[a-z]+(@|(://))((\\.)|[\w.+-:/\\])+', Name.Attribute),          # URL, git@, ...
  (r'/\w[\w\.\+-/\\]*', Name.Attribute),                    # absolute path
Beispiel #13
0
    tokens = {"root": [(r"`[^`]*?`", String.Backtick), inherit]}


ROOT_TOKENS = [
    (r"\?", Keyword),
    (r"\$\w+", Name.Variable),
    (r"\$\{", Keyword, ("pymode",)),
    (r"\$\(", Keyword, ("subproc",)),
    (r"\$\[", Keyword, ("subproc",)),
    (r"@\(", Keyword, ("pymode",)),
    inherit,
]

PYMODE_TOKENS = [
    (r"(.+)(\))", bygroups(using(this), Keyword), "#pop"),
    (r"(.+)(\})", bygroups(using(this), Keyword), "#pop"),
]

SUBPROC_TOKENS = [
    (r"(.+)(\))", bygroups(using(XonshSubprocLexer), Keyword), "#pop"),
    (r"(.+)(\])", bygroups(using(XonshSubprocLexer), Keyword), "#pop"),
]


class XonshLexer(PythonLexer):
    """Xonsh console lexer for pygments."""

    name = "Xonsh lexer"
    aliases = ["xonsh", "xsh"]
    filenames = ["*.xsh", "*xonshrc"]
Beispiel #14
0
    def gen_crystalstrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[imsx]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
            (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            # This allows arbitrary text after '\ for simplicity
            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
            (r':"', String.Symbol, 'simple-sym'),
            # Crystal doesn't have "symbol:"s but this simplifies function args
            (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-'+name] = [
                include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in \
                ('\\{', '\\}', '{}', 'cb'), \
                ('\\[', '\\]', '\\[\\]', 'sb'), \
                ('\\(', '\\)', '()', 'pa'), \
                ('<', '>', '<>', 'ab'):
            states[name+'-intp-string'] = [
                (r'\\[' + lbrace + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            states['strings'].append((r'%' + lbrace, String.Other,
                                      name+'-intp-string'))
            states[name+'-string'] = [
                (r'\\[\\' + bracecc + ']', String.Other),
                (lbrace, String.Other, '#push'),
                (rbrace, String.Other, '#pop'),
                (r'[\\#' + bracecc + ']', String.Other),
                (r'[^\\#' + bracecc + ']+', String.Other),
            ]
            # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
            states['strings'].append((r'%[wi]' + lbrace, String.Other,
                                      name+'-string'))
            states[name+'-regex'] = [
                (r'\\[\\' + bracecc + ']', String.Regex),
                (lbrace, String.Regex, '#push'),
                (rbrace + '[imsx]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + bracecc + ']', String.Regex),
                (r'[^\\#' + bracecc + ']+', String.Regex),
            ]
            states['strings'].append((r'%r' + lbrace, String.Regex,
                                      name+'-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)',
             intp_regex_callback),
            # regular fancy strings with qsw
            (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
        ]

        return states
Beispiel #15
0
)
from pygments.token import (
    Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
)
from pygments.util import get_bool_opt

# Local

line_re = re.compile('.*?\n')

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
           'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
           'IPythonConsoleLexer', 'IPyLexer']

ipython_tokens = [
  (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
  (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
  (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
  (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
  (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                       using(BashLexer), Text)),
  (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
  (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
  (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
  (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
  (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
]

def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.
Beispiel #16
0
class FSharpLexer(RegexLexer):
    """
    For the `F# language <https://fsharp.org/>`_ (version 3.0).

    .. versionadded:: 1.5
    """

    name = 'F#'
    aliases = ['fsharp', 'f#']
    filenames = ['*.fs', '*.fsi']
    mimetypes = ['text/x-fsharp']

    keywords = [
        'abstract', 'as', 'assert', 'base', 'begin', 'class', 'default',
        'delegate', 'do!', 'do', 'done', 'downcast', 'downto', 'elif', 'else',
        'end', 'exception', 'extern', 'false', 'finally', 'for', 'function',
        'fun', 'global', 'if', 'inherit', 'inline', 'interface', 'internal',
        'in', 'lazy', 'let!', 'let', 'match', 'member', 'module', 'mutable',
        'namespace', 'new', 'null', 'of', 'open', 'override', 'private', 'public',
        'rec', 'return!', 'return', 'select', 'static', 'struct', 'then', 'to',
        'true', 'try', 'type', 'upcast', 'use!', 'use', 'val', 'void', 'when',
        'while', 'with', 'yield!', 'yield',
    ]
    # Reserved words; cannot hurt to color them as keywords too.
    keywords += [
        'atomic', 'break', 'checked', 'component', 'const', 'constraint',
        'constructor', 'continue', 'eager', 'event', 'external', 'fixed',
        'functor', 'include', 'method', 'mixin', 'object', 'parallel',
        'process', 'protected', 'pure', 'sealed', 'tailcall', 'trait',
        'virtual', 'volatile',
    ]
    keyopts = [
        '!=', '#', '&&', '&', r'\(', r'\)', r'\*', r'\+', ',', r'-\.',
        '->', '-', r'\.\.', r'\.', '::', ':=', ':>', ':', ';;', ';', '<-',
        r'<\]', '<', r'>\]', '>', r'\?\?', r'\?', r'\[<', r'\[\|', r'\[', r'\]',
        '_', '`', r'\{', r'\|\]', r'\|', r'\}', '~', '<@@', '<@', '=', '@>', '@@>',
    ]

    operators = r'[!$%&*+\./:<=>?@^|~-]'
    word_operators = ['and', 'or', 'not']
    prefix_syms = r'[!?~]'
    infix_syms = r'[=<>@^|&+\*/$%-]'
    primitives = [
        'sbyte', 'byte', 'char', 'nativeint', 'unativeint', 'float32', 'single',
        'float', 'double', 'int8', 'uint8', 'int16', 'uint16', 'int32',
        'uint32', 'int64', 'uint64', 'decimal', 'unit', 'bool', 'string',
        'list', 'exn', 'obj', 'enum',
    ]

    # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or
    # http://fsharp.org/about/files/spec.pdf for reference.  Good luck.

    tokens = {
        'escape-sequence': [
            (r'\\[\\"\'ntbrafv]', String.Escape),
            (r'\\[0-9]{3}', String.Escape),
            (r'\\u[0-9a-fA-F]{4}', String.Escape),
            (r'\\U[0-9a-fA-F]{8}', String.Escape),
        ],
        'root': [
            (r'\s+', Text),
            (r'\(\)|\[\]', Name.Builtin.Pseudo),
            (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)',
             Name.Namespace, 'dotted'),
            (r'\b([A-Z][\w\']*)', Name),
            (r'///.*?\n', String.Doc),
            (r'//.*?\n', Comment.Single),
            (r'\(\*(?!\))', Comment, 'comment'),

            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),

            (r'\b(open|module)(\s+)([\w.]+)',
             bygroups(Keyword, Text, Name.Namespace)),
            (r'\b(let!?)(\s+)(\w+)',
             bygroups(Keyword, Text, Name.Variable)),
            (r'\b(type)(\s+)(\w+)',
             bygroups(Keyword, Text, Name.Class)),
            (r'\b(member|override)(\s+)(\w+)(\.)(\w+)',
             bygroups(Keyword, Text, Name, Punctuation, Name.Function)),
            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
            (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name),
            (r'(%s)' % '|'.join(keyopts), Operator),
            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
            (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n',
             Comment.Preproc),

            (r"[^\W\d][\w']*", Name),

            (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer),
            (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex),
            (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct),
            (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin),
            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?',
             Number.Float),

            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?",
             String.Char),
            (r"'.'", String.Char),
            (r"'", Keyword),  # a stray quote is another syntax element

            (r'@?"', String.Double, 'string'),

            (r'[~?][a-z][\w\']*:', Name.Variable),
        ],
        'dotted': [
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
            (r'[A-Z][\w\']*', Name, '#pop'),
            (r'[a-z_][\w\']*', Name, '#pop'),
            # e.g. dictionary index access
            default('#pop'),
        ],
        'comment': [
            (r'[^(*)@"]+', Comment),
            (r'\(\*', Comment, '#push'),
            (r'\*\)', Comment, '#pop'),
            # comments cannot be closed within strings in comments
            (r'@"', String, 'lstring'),
            (r'"""', String, 'tqs'),
            (r'"', String, 'string'),
            (r'[(*)@]', Comment),
        ],
        'string': [
            (r'[^\\"]+', String),
            include('escape-sequence'),
            (r'\\\n', String),
            (r'\n', String),  # newlines are allowed in any string
            (r'"B?', String, '#pop'),
        ],
        'lstring': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'""', String),
            (r'"B?', String, '#pop'),
        ],
        'tqs': [
            (r'[^"]+', String),
            (r'\n', String),
            (r'"""B?', String, '#pop'),
            (r'"', String),
        ],
    }
Beispiel #17
0
class ScamlLexer(ExtendedRegexLexer):
    """
    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.

    .. versionadded:: 1.4
    """

    name = 'Scaml'
    aliases = ['scaml']
    filenames = ['*.scaml']
    mimetypes = ['text/x-scaml']

    flags = re.IGNORECASE
    # Scaml does not yet support the " |\n" notation to
    # wrap long lines.  Once it does, use the custom faux
    # dot instead.
    # _dot = r'(?: \|\n(?=.* \|)|.)'
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + '*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Beispiel #18
0
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import RegexLexer, bygroups, default, include, using, words
from pygments.token import Comment, Error, Keyword, Name, Number, Operator, Punctuation, \
    String, Text, Whitespace
from pygments.lexers._csound_builtins import OPCODES, DEPRECATED_OPCODES
from pygments.lexers.html import HtmlLexer
from pygments.lexers.python import PythonLexer
from pygments.lexers.scripting import LuaLexer

__all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer']

newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text))


class CsoundLexer(RegexLexer):
    tokens = {
        'whitespace': [(r'[ \t]+', Text),
                       (r'/[*](?:.|\n)*?[*]/', Comment.Multiline),
                       (r'(?:;|//).*$', Comment.Single),
                       (r'(\\)(\n)', bygroups(Whitespace, Text))],
        'preprocessor directives':
        [(r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc),
         (r'#includestr', Comment.Preproc, 'includestr directive'),
         (r'#include', Comment.Preproc, 'include directive'),
         (r'#[ \t]*define', Comment.Preproc, 'define directive'),
         (r'#(?:ifn?def|undef)\b', Comment.Preproc, 'macro directive')],
        'include directive':
Beispiel #19
0
class CsoundOrchestraLexer(CsoundLexer):
    """
    For `Csound <https://csound.com>`_ orchestras.

    .. versionadded:: 2.1
    """

    name = 'Csound Orchestra'
    aliases = ['csound', 'csound-orc']
    filenames = ['*.orc', '*.udo']

    user_defined_opcodes = set()

    def opcode_name_callback(lexer, match):
        opcode = match.group(0)
        lexer.user_defined_opcodes.add(opcode)
        yield match.start(), Name.Function, opcode

    def name_callback(lexer, match):
        type_annotation_token = Keyword.Type

        name = match.group(1)
        if name in OPCODES or name in DEPRECATED_OPCODES:
            yield match.start(), Name.Builtin, name
        elif name in lexer.user_defined_opcodes:
            yield match.start(), Name.Function, name
        else:
            type_annotation_token = Name
            name_match = re.search(r'^(g?[afikSw])(\w+)', name)
            if name_match:
                yield name_match.start(1), Keyword.Type, name_match.group(1)
                yield name_match.start(2), Name, name_match.group(2)
            else:
                yield match.start(), Name, name

        if match.group(2):
            yield match.start(2), Punctuation, match.group(2)
            yield match.start(3), type_annotation_token, match.group(3)

    tokens = {
        'root': [(r'\n', Text),
                 (r'^([ \t]*)(\w+)(:)(?:[ \t]+|$)',
                  bygroups(Text, Name.Label, Punctuation)),
                 include('whitespace and macro uses'),
                 include('preprocessor directives'),
                 (r'\binstr\b', Keyword.Declaration,
                  'instrument numbers and identifiers'),
                 (r'\bopcode\b', Keyword.Declaration, 'after opcode keyword'),
                 (r'\b(?:end(?:in|op))\b', Keyword.Declaration),
                 include('partial statements')],
        'partial statements':
        [(r'\b(?:0dbfs|A4|k(?:r|smps)|nchnls(?:_i)?|sr)\b',
          Name.Variable.Global),
         include('numbers'),
         (r'\+=|-=|\*=|/=|<<|>>|<=|>=|==|!=|&&|\|\||[~¬]|[=!+\-*/^%&|<>#?:]',
          Operator), (r'[(),\[\]]', Punctuation),
         (r'"', String, 'quoted string'), (r'\{\{', String, 'braced string'),
         (words((
             'do',
             'else',
             'elseif',
             'endif',
             'enduntil',
             'fi',
             'if',
             'ithen',
             'kthen',
             'od',
             'then',
             'until',
             'while',
         ),
                prefix=r'\b',
                suffix=r'\b'), Keyword),
         (words(('return', 'rireturn'), prefix=r'\b',
                suffix=r'\b'), Keyword.Pseudo),
         (r'\b[ik]?goto\b', Keyword, 'goto label'),
         (r'\b(r(?:einit|igoto)|tigoto)(\(|\b)',
          bygroups(Keyword.Pseudo, Punctuation), 'goto label'),
         (r'\b(c(?:g|in?|k|nk?)goto)(\(|\b)',
          bygroups(Keyword.Pseudo,
                   Punctuation), ('goto label', 'goto argument')),
         (r'\b(timout)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
          ('goto label', 'goto argument', 'goto argument')),
         (r'\b(loop_[gl][et])(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
          ('goto label', 'goto argument', 'goto argument', 'goto argument')),
         (r'\bprintk?s\b', Name.Builtin, 'prints opcode'),
         (r'\b(?:readscore|scoreline(?:_i)?)\b', Name.Builtin,
          'Csound score opcode'),
         (r'\bpyl?run[it]?\b', Name.Builtin, 'Python opcode'),
         (r'\blua_(?:exec|opdef)\b', Name.Builtin, 'Lua opcode'),
         (r'\bp\d+\b', Name.Variable.Instance),
         (r'\b([A-Z_a-z]\w*)(?:(:)([A-Za-z]))?\b', name_callback)],
        'instrument numbers and identifiers': [
            include('whitespace and macro uses'),
            (r'\d+|[A-Z_a-z]\w*', Name.Function), (r'[+,]', Punctuation),
            (r'\n', Text, '#pop')
        ],
        'after opcode keyword': [
            include('whitespace and macro uses'),
            (r'[A-Z_a-z]\w*', opcode_name_callback,
             ('#pop', 'opcode type signatures')), (r'\n', Text, '#pop')
        ],
        'opcode type signatures': [
            include('whitespace and macro uses'),

            # https://github.com/csound/csound/search?q=XIDENT+path%3AEngine+filename%3Acsound_orc.lex
            (r'0|[afijkKoOpPStV\[\]]+', Keyword.Type),
            (r',', Punctuation),
            (r'\n', Text, '#pop')
        ],
        'quoted string': [(r'"', String, '#pop'), (r'[^\\"$%)]+', String),
                          include('macro uses'),
                          include('escape sequences'),
                          include('format specifiers'), (r'[\\$%)]', String)],
        'braced string': [(r'\}\}', String, '#pop'),
                          (r'(?:[^\\%)}]|\}(?!\}))+', String),
                          include('escape sequences'),
                          include('format specifiers'), (r'[\\%)]', String)],
        'escape sequences': [
            # https://github.com/csound/csound/search?q=unquote_string+path%3AEngine+filename%3Acsound_orc_compile.c
            (r'\\(?:[\\abnrt"]|[0-7]{1,3})', String.Escape)
        ],
        # Format specifiers are highlighted in all strings, even though only
        #   fprintks        https://csound.com/docs/manual/fprintks.html
        #   fprints         https://csound.com/docs/manual/fprints.html
        #   printf/printf_i https://csound.com/docs/manual/printf.html
        #   printks         https://csound.com/docs/manual/printks.html
        #   prints          https://csound.com/docs/manual/prints.html
        #   sprintf         https://csound.com/docs/manual/sprintf.html
        #   sprintfk        https://csound.com/docs/manual/sprintfk.html
        # work with strings that contain format specifiers. In addition, these opcodes’
        # handling of format specifiers is inconsistent:
        #   - fprintks, fprints, printks, and prints do accept %a and %A
        #     specifiers, but can’t accept %s specifiers.
        #   - printf, printf_i, sprintf, and sprintfk don’t accept %a and %A
        #     specifiers, but can accept %s specifiers.
        # See https://github.com/csound/csound/issues/747 for more information.
        'format specifiers':
        [(r'%[#0\- +]*\d*(?:\.\d+)?[diuoxXfFeEgGaAcs]', String.Interpol),
         (r'%%', String.Escape)],
        'goto argument': [
            include('whitespace and macro uses'), (r',', Punctuation, '#pop'),
            include('partial statements')
        ],
        'goto label': [
            include('whitespace and macro uses'), (r'\w+', Name.Label, '#pop'),
            default('#pop')
        ],
        'prints opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'prints quoted string'),
            default('#pop')
        ],
        'prints quoted string': [(r'\\\\[aAbBnNrRtT]', String.Escape),
                                 (r'%[!nNrRtT]|[~^]{1,2}', String.Escape),
                                 include('quoted string')],
        'Csound score opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'), (r'\{\{', String, 'Csound score'),
            (r'\n', Text, '#pop')
        ],
        'Csound score': [(r'\}\}', String, '#pop'),
                         (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer))],
        'Python opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'), (r'\{\{', String, 'Python'),
            (r'\n', Text, '#pop')
        ],
        'Python': [(r'\}\}', String, '#pop'),
                   (r'([^}]+)|\}(?!\})', using(PythonLexer))],
        'Lua opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'), (r'\{\{', String, 'Lua'),
            (r'\n', Text, '#pop')
        ],
        'Lua': [(r'\}\}', String, '#pop'),
                (r'([^}]+)|\}(?!\})', using(LuaLexer))]
    }
Beispiel #20
0
class CddlLexer(RegexLexer):
    name = "CDDL"
    aliases = ["cddl"]
    filenames = ["*.cddl"]
    mimetypes = ["text/x-cddl"]

    _prelude_types = [
        "any",
        "b64legacy",
        "b64url",
        "bigfloat",
        "bigint",
        "bignint",
        "biguint",
        "bool",
        "bstr",
        "bytes",
        "cbor-any",
        "decfrac",
        "eb16",
        "eb64legacy",
        "eb64url",
        "encoded-cbor",
        "false",
        "float",
        "float16",
        "float16-32",
        "float32",
        "float32-64",
        "float64",
        "int",
        "integer",
        "mime-message",
        "nil",
        "nint",
        "null",
        "number",
        "regexp",
        "tdate",
        "text",
        "time",
        "true",
        "tstr",
        "uint",
        "undefined",
        "unsigned",
        "uri",
    ]

    _controls = [
        ".and",
        ".bits",
        ".cbor",
        ".cborseq",
        ".default",
        ".eq",
        ".ge",
        ".gt",
        ".le",
        ".lt",
        ".ne",
        ".regexp",
        ".size",
        ".within",
    ]

    _re_id = r"""(?x)
        [$@A-Z_a-z]
        (?:[\-\.]*[$@0-9A-Z_a-z]|[$@0-9A-Z_a-z])*
    """

    # While the spec reads more like "an int must not start with 0" we use a
    # lookahead here that says "after a 0 there must be no digit". This makes the
    # '0' the invalid character in '01', which looks nicer when highlighted.
    _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))"
    _re_int = r"-?" + _re_uint

    flags = re.UNICODE | re.MULTILINE

    tokens = {
        "commentsandwhitespace": [(r"\s+", Text), (r";.+$", Comment.Single)],
        "root": [
            include("commentsandwhitespace"),
            # tag types
            (r"#\d(\.{uint}|)".format(uint=_re_uint), Keyword.Type),
            (r"#", Keyword.Type),  # any
            # occurence
            (
                r"({uint}|)(\*)({uint}|)".format(uint=_re_uint),
                bygroups(Number, Operator, Number),
            ),
            (r"\?|\+", Operator),  # occurrence
            (r"\^", Operator),  # cuts
            (r"(\.\.\.|\.\.)", Operator),  # rangeop
            (words(_controls, suffix=r"\b"), Operator.Word),  # ctlops
            # into choice op
            (r"&(?=\s*({groupname}|\())".format(groupname=_re_id), Operator),
            (r"~(?=\s*{})".format(_re_id), Operator),  # unwrap op
            (r"//|/(?!/)", Operator),  # double und single slash
            (r"=>|/==|/=|=", Operator),
            (r"[\[\]{}\(\),<>:]", Punctuation),
            # Bytestrings
            (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),
            (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),
            (r"'", String.Single, "bstr"),
            # Barewords as member keys (must be matched before values, types, typenames, groupnames).
            # Token type is String as barewords are always interpreted as such.
            (
                r"({bareword})(\s*)(:)".format(bareword=_re_id),
                bygroups(String, Text, Punctuation),
            ),
            # predefined types
            (
                words(_prelude_types,
                      prefix=r"(?![\-_$@])\b",
                      suffix=r"\b(?![\-_$@])"),
                Name.Builtin,
            ),
            # user-defined groupnames, typenames
            (_re_id, Name.Class),
            # values
            (r"0b[01]+", Number.Bin),
            (r"0o[0-7]+", Number.Oct),
            (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+|)p[+-]?\d+",
             Number.Hex),  # hexfloat
            (r"0x[0-9a-fA-F]+", Number.Hex),  # hex
            # Float
            (
                r"""(?x)
                {int}
                (?=(\.\d|e[+-]?\d)) # lookahead; at least one float-y thing coming?
                (?:\.\d+)?          # fraction
                (?:e[+-]?\d+)?      # and/or exponent
                """.format(int=_re_int),
                Number.Float,
            ),
            # Int
            (_re_int, Number.Int),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
        ],
        "bstrb64url": [
            (r"'", String.Single, "#pop"),
            include("commentsandwhitespace"),
            (r"\\.", String.Escape),
            (r"[0-9a-zA-Z\-_=]+", String.Single),
            (r".", Error),
            # (r";.+$", Token.Other),
        ],
        "bstrh": [
            (r"'", String.Single, "#pop"),
            include("commentsandwhitespace"),
            (r"\\.", String.Escape),
            (r"[0-9a-fA-F]+", String.Single),
            (r".", Error),
        ],
        "bstr": [
            (r"'", String.Single, "#pop"),
            (r"\\.", String.Escape),
            (r"[^']", String.Single),
        ],
    }
class PrologLexer(RegexLexer):
    """
    Lexer for Prolog files.
    """
    name = 'Prolog'
    aliases = ['prolog']
    filenames = ['*.ecl', '*.prolog', '*.pro', '*.pl']
    mimetypes = ['text/x-prolog']

    flags = re.UNICODE | re.MULTILINE

    tokens = {
        'root': [
            (r'/\*', Comment.Multiline, 'nested-comment'),
            (r'%.*', Comment.Single),
            # character literal
            (r'0\'.', String.Char),
            (r'0b[01]+', Number.Bin),
            (r'0o[0-7]+', Number.Oct),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            # literal with prepended base
            (r'\d\d?\'[a-zA-Z0-9]+', Number.Integer),
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
            (r'\d+', Number.Integer),
            (r'[\[\](){}|.,;!]', Punctuation),
            (r':-|-->', Punctuation),
            (r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|'
             r'\\[0-7]+\\|\\["\nabcefnrstv]|[^\\"])*"', String.Double),
            (r"'(?:''|[^'])*'", String.Atom),  # quoted atom
            # Needs to not be followed by an atom.
            # (r'=(?=\s|[a-zA-Z\[])', Operator),
            (r'is\b', Operator),
            (r'(<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])',
             Operator),
            (r'(mod|div|not)\b', Operator),
            (r'_', Keyword),  # The don't-care variable
            (r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)),
            (r'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
             r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
             r'(\s*)(:-|-->)', bygroups(Name.Function, Text,
                                        Operator)),  # function defn
            (r'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
             r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
             r'(\s*)(\()', bygroups(Name.Function, Text, Punctuation)),
            (r'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
             r'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*',
             String.Atom),  # atom, characters
            # This one includes !
            (r'[#&*+\-./:<=>?@\\^~\u00a1-\u00bf\u2010-\u303f]+', String.Atom
             ),  # atom, graphics
            (r'[A-Z_]\w*', Name.Variable),
            (r'\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text),
        ],
        'nested-comment': [
            (r'\*/', Comment.Multiline, '#pop'),
            (r'/\*', Comment.Multiline, '#push'),
            (r'[^*/]+', Comment.Multiline),
            (r'[*/]', Comment.Multiline),
        ],
    }

    def analyse_text(text):
        return ':-' in text
Beispiel #22
0
class PowerShellLexer(RegexLexer):
    """
    For Windows PowerShell code.

    .. versionadded:: 1.5
    """
    name = 'PowerShell'
    aliases = ['powershell', 'posh', 'ps1', 'psm1']
    filenames = ['*.ps1', '*.psm1']
    mimetypes = ['text/x-powershell']

    flags = re.DOTALL | re.IGNORECASE | re.MULTILINE

    keywords = (
        'while validateset validaterange validatepattern validatelength '
        'validatecount until trap switch return ref process param parameter in '
        'if global: function foreach for finally filter end elseif else '
        'dynamicparam do default continue cmdletbinding break begin alias \\? '
        '% #script #private #local #global mandatory parametersetname position '
        'valuefrompipeline valuefrompipelinebypropertyname '
        'valuefromremainingarguments helpmessage try catch throw').split()

    operators = (
        'and as band bnot bor bxor casesensitive ccontains ceq cge cgt cle '
        'clike clt cmatch cne cnotcontains cnotlike cnotmatch contains '
        'creplace eq exact f file ge gt icontains ieq ige igt ile ilike ilt '
        'imatch ine inotcontains inotlike inotmatch ireplace is isnot le like '
        'lt match ne not notcontains notlike notmatch or regex replace '
        'wildcard').split()

    verbs = (
        'write where wait use update unregister undo trace test tee take '
        'suspend stop start split sort skip show set send select scroll resume '
        'restore restart resolve resize reset rename remove register receive '
        'read push pop ping out new move measure limit join invoke import '
        'group get format foreach export expand exit enter enable disconnect '
        'disable debug cxnew copy convertto convertfrom convert connect '
        'complete compare clear checkpoint aggregate add').split()

    commenthelp = (
        'component description example externalhelp forwardhelpcategory '
        'forwardhelptargetname functionality inputs link '
        'notes outputs parameter remotehelprunspace role synopsis').split()

    tokens = {
        'root': [
            # we need to count pairs of parentheses for correct highlight
            # of '$(...)' blocks in strings
            (r'\(', Punctuation, 'child'),
            (r'\s+', Text),
            (r'^(\s*#[#\s]*)(\.(?:%s))([^\n]*$)' % '|'.join(commenthelp),
             bygroups(Comment, String.Doc, Comment)),
            (r'#[^\n]*?$', Comment),
            (r'(&lt;|<)#', Comment.Multiline, 'multline'),
            (r'@"\n', String.Heredoc, 'heredoc-double'),
            (r"@'\n.*?\n'@", String.Heredoc),
            # escaped syntax
            (r'`[\'"$@-]', Punctuation),
            (r'"', String.Double, 'string'),
            (r"'([^']|'')*'", String.Single),
            (r'(\$|@@|@)((global|script|private|env):)?\w+', Name.Variable),
            (r'(%s)\b' % '|'.join(keywords), Keyword),
            (r'-(%s)\b' % '|'.join(operators), Operator),
            (r'(%s)-[a-z_]\w*\b' % '|'.join(verbs), Name.Builtin),
            (r'\[[a-z_\[][\w. `,\[\]]*\]', Name.Constant),  # .net [type]s
            (r'-[a-z_]\w*', Name),
            (r'\w+', Name),
            (r'[.,;@{}\[\]$()=+*/\\&%!~?^`|<>-]|::', Punctuation),
        ],
        'child': [
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
        'multline': [
            (r'[^#&.]+', Comment.Multiline),
            (r'#(>|&gt;)', Comment.Multiline, '#pop'),
            (r'\.(%s)' % '|'.join(commenthelp), String.Doc),
            (r'[#&.]', Comment.Multiline),
        ],
        'string': [
            (r"`[0abfnrtv'\"$`]", String.Escape),
            (r'[^$`"]+', String.Double),
            (r'\$\(', Punctuation, 'child'),
            (r'""', String.Double),
            (r'[`$]', String.Double),
            (r'"', String.Double, '#pop'),
        ],
        'heredoc-double': [
            (r'\n"@', String.Heredoc, '#pop'),
            (r'\$\(', Punctuation, 'child'),
            (r'[^@\n]+"]', String.Heredoc),
            (r".", String.Heredoc),
        ]
    }
Beispiel #23
0
class TcshLexer(RegexLexer):
    """
    Lexer for tcsh scripts.

    .. versionadded:: 0.10
    """

    name = 'Tcsh'
    aliases = ['tcsh', 'csh']
    filenames = ['*.tcsh', '*.csh']
    mimetypes = ['application/x-csh']

    tokens = {
        'root': [
            include('basic'),
            (r'\$\(', Keyword, 'paren'),
            (r'\$\{#?', Keyword, 'curly'),
            (r'`', String.Backtick, 'backticks'),
            include('data'),
        ],
        'basic': [
            (r'\b(if|endif|else|while|then|foreach|case|default|'
             r'continue|goto|breaksw|end|switch|endsw)\s*\b', Keyword),
            (r'\b(alias|alloc|bg|bindkey|break|builtins|bye|caller|cd|chdir|'
             r'complete|dirs|echo|echotc|eval|exec|exit|fg|filetest|getxvers|'
             r'glob|getspath|hashstat|history|hup|inlib|jobs|kill|'
             r'limit|log|login|logout|ls-F|migrate|newgrp|nice|nohup|notify|'
             r'onintr|popd|printenv|pushd|rehash|repeat|rootnode|popd|pushd|'
             r'set|shift|sched|setenv|setpath|settc|setty|setxvers|shift|'
             r'source|stop|suspend|source|suspend|telltc|time|'
             r'umask|unalias|uncomplete|unhash|universe|unlimit|unset|unsetenv|'
             r'ver|wait|warp|watchlog|where|which)\s*\b', Name.Builtin),
            (r'#.*', Comment),
            (r'\\[\w\W]', String.Escape),
            (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)),
            (r'[\[\]{}()=]+', Operator),
            (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String),
            (r';', Punctuation),
        ],
        'data': [
            (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double),
            (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
            (r'\s+', Text),
            (r'[^=\s\[\]{}()$"\'`\\;#]+', Text),
            (r'\d+(?= |\Z)', Number),
            (r'\$#?(\w+|.)', Name.Variable),
        ],
        'curly': [
            (r'\}', Keyword, '#pop'),
            (r':-', Keyword),
            (r'\w+', Name.Variable),
            (r'[^}:"\'`$]+', Punctuation),
            (r':', Punctuation),
            include('root'),
        ],
        'paren': [
            (r'\)', Keyword, '#pop'),
            include('root'),
        ],
        'backticks': [
            (r'`', String.Backtick, '#pop'),
            include('root'),
        ],
    }
Beispiel #24
0
class BashLexer(RegexLexer):
    """
    Lexer for (ba|k|)sh shell scripts.

    .. versionadded:: 0.6
    """

    name = 'Bash'
    aliases = ['bash', 'sh', 'ksh', 'shell']
    filenames = [
        '*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass', '.bashrc', 'bashrc',
        '.bash_*', 'bash_*', 'PKGBUILD'
    ]
    mimetypes = ['application/x-sh', 'application/x-shellscript']

    tokens = {
        'root': [
            include('basic'),
            (r'`', String.Backtick, 'backticks'),
            include('data'),
            include('interp'),
        ],
        'interp': [
            (r'\$\(\(', Keyword, 'math'),
            (r'\$\(', Keyword, 'paren'),
            (r'\$\{#?', String.Interpol, 'curly'),
            (r'\$#?(\w+|.)', Name.Variable),
        ],
        'basic': [
            (r'\b(if|fi|else|while|do|done|for|then|return|function|case|'
             r'select|continue|until|esac|elif)(\s*)\b',
             bygroups(Keyword, Text)),
            (r'\b(alias|bg|bind|break|builtin|caller|cd|command|compgen|'
             r'complete|declare|dirs|disown|echo|enable|eval|exec|exit|'
             r'export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|'
             r'local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|'
             r'shopt|source|suspend|test|time|times|trap|true|type|typeset|'
             r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)', Name.Builtin),
            (r'#.*\n', Comment),
            (r'\\[\w\W]', String.Escape),
            (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)),
            (r'[\[\]{}()=]', Operator),
            (r'<<<', Operator),  # here-string
            (r'<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2', String),
            (r'&&|\|\|', Operator),
        ],
        'data': [
            (r'(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\$])*"', String.Double),
            (r'"', String.Double, 'string'),
            (r"(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
            (r"(?s)'.*?'", String.Single),
            (r';', Punctuation),
            (r'&', Punctuation),
            (r'\|', Punctuation),
            (r'\s+', Text),
            (r'\d+(?= |\Z)', Number),
            (r'[^=\s\[\]{}()$"\'`\\<&|;]+', Text),
            (r'<', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\$])+', String.Double),
            include('interp'),
        ],
        'curly': [
            (r'\}', String.Interpol, '#pop'),
            (r':-', Keyword),
            (r'\w+', Name.Variable),
            (r'[^}:"\'`$\\]+', Punctuation),
            (r':', Punctuation),
            include('root'),
        ],
        'paren': [
            (r'\)', Keyword, '#pop'),
            include('root'),
        ],
        'math': [
            (r'\)\)', Keyword, '#pop'),
            (r'[-+*/%^|&]|\*\*|\|\|', Operator),
            (r'\d+#\d+', Number),
            (r'\d+#(?! )', Number),
            (r'\d+', Number),
            include('root'),
        ],
        'backticks': [
            (r'`', String.Backtick, '#pop'),
            include('root'),
        ],
    }

    def analyse_text(text):
        if shebang_matches(text, r'(ba|z|)sh'):
            return 1
        if text.startswith('$ '):
            return 0.2
Beispiel #25
0
class JadeLexer(ExtendedRegexLexer):
    """
    For Jade markup.
    Jade is a variant of Scaml, see:
    http://scalate.fusesource.org/documentation/scaml-reference.html

    .. versionadded:: 1.4
    """

    name = 'Jade'
    aliases = ['jade']
    filenames = ['*.jade']
    mimetypes = ['text/x-jade']

    flags = re.IGNORECASE
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            (r'\|', Text, 'eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + '*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Beispiel #26
0
class CsoundLexer(RegexLexer):
    tokens = {
        'whitespace': [(r'[ \t]+', Text),
                       (r'/[*](?:.|\n)*?[*]/', Comment.Multiline),
                       (r'(?:;|//).*$', Comment.Single),
                       (r'(\\)(\n)', bygroups(Whitespace, Text))],
        'preprocessor directives':
        [(r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc),
         (r'#includestr', Comment.Preproc, 'includestr directive'),
         (r'#include', Comment.Preproc, 'include directive'),
         (r'#[ \t]*define', Comment.Preproc, 'define directive'),
         (r'#(?:ifn?def|undef)\b', Comment.Preproc, 'macro directive')],
        'include directive':
        [include('whitespace'), (r'([^ \t]).*?\1', String, '#pop')],
        'includestr directive':
        [include('whitespace'), (r'"', String, ('#pop', 'quoted string'))],
        'define directive': [
            (r'\n', Text),
            include('whitespace'),
            (r'([A-Z_a-z]\w*)(\()', bygroups(Comment.Preproc, Punctuation),
             ('#pop', 'macro parameter name list')),
            (r'[A-Z_a-z]\w*', Comment.Preproc, ('#pop', 'before macro body'))
        ],
        'macro parameter name list': [
            include('whitespace'), (r'[A-Z_a-z]\w*', Comment.Preproc),
            (r"['#]", Punctuation),
            (r'\)', Punctuation, ('#pop', 'before macro body'))
        ],
        'before macro body': [(r'\n', Text),
                              include('whitespace'),
                              (r'#', Punctuation, ('#pop', 'macro body'))],
        'macro body': [(r'(?:\\(?!#)|[^#\\]|\n)+', Comment.Preproc),
                       (r'\\#', Comment.Preproc),
                       (r'(?<!\\)#', Punctuation, '#pop')],
        'macro directive':
        [include('whitespace'), (r'[A-Z_a-z]\w*', Comment.Preproc, '#pop')],
        'macro uses': [(r'(\$[A-Z_a-z]\w*\.?)(\()',
                        bygroups(Comment.Preproc,
                                 Punctuation), 'macro parameter value list'),
                       (r'\$[A-Z_a-z]\w*(?:\.|\b)', Comment.Preproc)],
        'macro parameter value list': [
            (r'(?:[^\'#"{()]|\{(?!\{))+', Comment.Preproc),
            (r"['#]", Punctuation),
            (r'"', String, 'macro parameter value quoted string'),
            (r'\{\{', String, 'macro parameter value braced string'),
            (r'\(', Comment.Preproc, 'macro parameter value parenthetical'),
            (r'\)', Punctuation, '#pop')
        ],
        'macro parameter value quoted string': [(r"\\[#'()]", Comment.Preproc),
                                                (r"[#'()]", Error),
                                                include('quoted string')],
        'macro parameter value braced string': [(r"\\[#'()]", Comment.Preproc),
                                                (r"[#'()]", Error),
                                                include('braced string')],
        'macro parameter value parenthetical': [
            (r'(?:[^\\()]|\\\))+', Comment.Preproc),
            (r'\(', Comment.Preproc, '#push'), (r'\)', Comment.Preproc, '#pop')
        ],
        'whitespace and macro uses':
        [include('whitespace'), include('macro uses')],
        'numbers':
        [(r'\d+[Ee][+-]?\d+|(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?', Number.Float),
         (r'(0[Xx])([0-9A-Fa-f]+)', bygroups(Keyword.Type, Number.Hex)),
         (r'\d+', Number.Integer)],
        'quoted string': [(r'"', String, '#pop'), (r'[^"$]+', String),
                          include('macro uses'), (r'[$]', String)],
        'braced string': [
            # Do nothing. This must be defined in subclasses.
        ]
    }
Beispiel #27
0
 def _make_begin_state(compound, _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl, _punct=_punct, _string=_string,
                       _space=_space, _start_label=_start_label,
                       _stoken=_stoken, _token_terminator=_token_terminator,
                       _variable=_variable, _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix),
         (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'),
         (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this, state='text')),
          'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'), Keyword),
          ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Punctuation),
          'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'), Keyword),
          'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation),
          'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Beispiel #28
0
class AADLLexer(RegexLexer):
    """
    Pygments parser for AADL models. See <http://www.aadl.info> for
    more details.
    """

    name = 'AADL'
    aliases = ['aadl']
    filenames = ['*.aadl']
    mimetypes = ['text/x-aadl']

    flags = re.MULTILINE | re.DOTALL | re.IGNORECASE

    iden_rex = r'[a-zA-Z_][a-zA-Z0-9_\.]*'
    class_iden_rex = r'(' + iden_rex + r')(::)(' + iden_rex + r')'
    definition_rex = r'(' + iden_rex + r')' + r'(\s*:\s*)\b'
    component_category = r'(abstract|data|subprogram|subprogram\s+group|thread|thread\s+group|process|memory|processor|bus|device|virtual\s+processor|virtual\s+bus|system)\b'

    with_tuple = (r'(with)(\s+)', bygroups(Keyword, Whitespace), 'with-list')
    text_tuple = (r'([^\S\n]+)', Text)
    terminator_tuple = (r'(;)(\s*)', bygroups(Punctuation, Whitespace), '#pop')
    comment_tuple = (r'(--[^\n]*\n)', Comment.Single)
    comment_whitespace_tuple = (r'(--[^\n]*\n)(\s+)',
                                bygroups(Comment.Single, Whitespace))
    accesses_tuple = (
        r'(bus|subprogram|subprogram\s+group|data)(\s+)(access)\b',
        bygroups(Keyword, Whitespace, Keyword))
    features_tuple = (
        r'(feature|port|event\s+port|data\s+port|event\s+data\s+port|feature\s+group)\b',
        Keyword)

    tokens = {
        'packageOrSystem': [
            text_tuple,
            (r'(implementation)(\s+)(' + iden_rex + r')',
             bygroups(Name.Class, Whitespace, Name.Class), '#pop'),
            (iden_rex, Name.Class, '#pop'),
        ],
        'annex': [
            (r'(\s*)(' + iden_rex + r')(\s*)({\*\*.*\*\*})(\s*)(;)',
             bygroups(Whitespace, Name.Class, Whitespace, Comment.Multiline,
                      Whitespace, Punctuation)),
        ],
        'with-list': [
            (r'\s*(,)\s*', Punctuation),
            (r'[a-zA-Z_]\w*', Name.Namespace),
            terminator_tuple,
        ],
        'alias-body': [
            (component_category, Keyword.Declaration),
            (r'(\s+)', Whitespace),
            (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity)),
            terminator_tuple,
        ],
        'package-declaration': [
            text_tuple,
            (r'(implementation)', Keyword.Declaration),
            (r'(' + iden_rex + r')(;)', bygroups(Name.Class,
                                                 Punctuation), '#pop'),
            (class_iden_rex + r'(;)',
             bygroups(Name.Class, Punctuation, Name.Entity,
                      Punctuation), '#pop'),
            (r'(' + iden_rex + r')(\s*)(extends)(\s*)',
             bygroups(Name.Class, Whitespace, Keyword.Declaration,
                      Whitespace)),
            (class_iden_rex, bygroups(Name.Class, Punctuation,
                                      Name.Entity), '#pop'),
            (iden_rex, Name.Class, '#pop'),
        ],
        'declaration': [
            text_tuple,
            (r'(in|out|event|data)', Keyword),
            (r'(provides|requires)', Keyword),
            features_tuple,
            accesses_tuple,
            (r'(flow|path|thread|subprogram)', Keyword),
            (component_category, Keyword),
            (class_iden_rex, bygroups(Name.Class, Punctuation, Name)),
            (r'(' + iden_rex + r')(\s*)(->|<-|<->)(\s*)(' + iden_rex + r')',
             bygroups(Name, Whitespace, Operator, Whitespace, Name.Variable)),
            (iden_rex, Name.Function),
            (r'({)(\s+)', bygroups(Punctuation, Whitespace),
             'property-constant-declaration'),
            (r'}', Punctuation),
            terminator_tuple,
        ],
        'applies-to': [
            text_tuple,
            (r'\(', Punctuation),
            (r'\s*(,)\s*', Punctuation),
            (r'\s*(\*\*)\s*', Operator),
            features_tuple,
            accesses_tuple,
            (component_category, Keyword),
            (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Entity)),
            (r'(' + iden_rex + r')', Name.Class),
            (r'(\{)(' + iden_rex + r')(\})',
             bygroups(Punctuation, Name.Class, Punctuation)),
            (r'\)', Punctuation),
            (r';', Punctuation, '#pop:2'),
        ],
        'property-value': [
            (r'(true|false)', Keyword.Constant),
            (r'\(', Punctuation),
            (r'\)', Punctuation),
            (r',', Punctuation),
            (r'[0-9]+\.[0-9]*', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'(reference)(\s*)(\()(' + iden_rex + ')(\))',
             bygroups(Keyword.Declaration, Whitespace, Punctuation,
                      Name.Variable.Instance, Punctuation)),
            (r'"[^"]*"', Literal.String.Double),
            (r'(\s*)(\.\.)(\s+)', bygroups(Whitespace, Operator, Whitespace)),
            (class_iden_rex, bygroups(Name.Class, Punctuation, Name.Variable)),
            (r'(\s*)(applies)(\s+)(to)(\s+)',
             bygroups(Whitespace, Keyword.Declaration, Whitespace,
                      Keyword.Declaration, Whitespace), 'applies-to'),
            (r'(' + iden_rex + r')', Name.Constant),
            (r'(\[)(\s*)', bygroups(Punctuation, Whitespace), 'record_term'),
            (r'(\s+)', Whitespace),
            terminator_tuple,
        ],
        'record_term': [
            (r'(' + iden_rex + r')(\s*)(=>)(\s*)',
             bygroups(Name.Class, Whitespace, Operator,
                      Whitespace), 'property-value'),
            (r'(\])', Punctuation, '#pop'),
        ],
        'property-section-property-value': [
            include('property-value'),
            terminator_tuple,
        ],
        'property-constant-value': [
            include('property-value'),
            (r'(;)(\s+)', bygroups(Punctuation, Whitespace), '#pop:2')
        ],
        'aggregate-property-constant-list': [
            (r'(' + iden_rex + r')(\s*)(=>)(\s*)',
             bygroups(Name.Class, Whitespace, Operator, Whitespace)),
            (r'\s*;\s*', Punctuation),
            include('property-value'),
            (r'(\]\s*;)(\s+)', bygroups(Punctuation, Whitespace), '#pop:2'),
        ],
        'property-declaration': [
            comment_tuple,
            (r'(inherit|list\s+of)', Keyword.Declaration),
            # aadl property types
            (r'(aadlboolean|aadlinteger|aadlreal|aadlstring|enumeration|range\s+of|classifier|reference|record)',
             Keyword.Type),
            (r'(,|\(|\)|\+|-|\.\.|:|;)', Punctuation),
            (r'(units)(\s*)(\()',
             bygroups(Keyword.Declaration, Whitespace,
                      Punctuation), 'units-list'),
            (r'[0-9]+', Number.Integer),
            features_tuple,
            accesses_tuple,
            (component_category, Keyword.Type),
            (r'(=>)(\s*)', bygroups(Operator,
                                    Whitespace), 'applies-to-property-value'),
            (r'(applies)(\s+)(to)(\s+)',
             bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration,
                      Whitespace), 'applies-to'),
            (class_iden_rex, Name.Class),
            (r'(' + iden_rex + r')', Name.Class),
            (r'(\s+)', Whitespace),
        ],
        'units-list': [
            comment_tuple,
            (r'(' + iden_rex + r')', Name.Class),
            (r'(,|\*|=>)', Punctuation),
            (r'(\s+)', Whitespace),
            (r'(\))', Punctuation, '#pop'),
        ],
        'applies-to-property-value': [
            (r'(applies)(\s+)(to)(\s+)',
             bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration,
                      Whitespace), 'applies-to'),
            include('property-value'),
        ],
        'property-constant-declaration': [
            text_tuple,
            (class_iden_rex + r'(\s*)(=>)(\s*)(\[)(\s*)',
             bygroups(Name.Class, Punctuation, Name.Constant, Whitespace,
                      Operator, Whitespace, Punctuation,
                      Whitespace), 'aggregate-property-constant-list'),
            (r'(' + iden_rex + r')(\s*)(=>)(\s*)(\[)(\s*)',
             bygroups(Name.Class, Whitespace, Operator, Whitespace,
                      Punctuation,
                      Whitespace), 'aggregate-property-constant-list'),
            (class_iden_rex + r'(\s*)(=>)(\s*)',
             bygroups(Name.Class, Punctuation, Name.Constant, Whitespace,
                      Operator, Whitespace), 'property-constant-value'),
            (r'(' + iden_rex + r')(\s*)(=>)(\s*)',
             bygroups(Name.Class, Whitespace, Operator,
                      Whitespace), 'property-constant-value'),
        ],
        'property-set': [
            comment_tuple,
            with_tuple,
            (r'(' + iden_rex + r')(\s+)(is)(\s+)',
             bygroups(Name.Class, Whitespace, Keyword.Namespace, Whitespace)),
            (definition_rex + r'(constant)',
             bygroups(Name.Variable.Global, Punctuation,
                      Keyword), 'property-constant-declaration'),
            (definition_rex, bygroups(Name.Variable.Global,
                                      Punctuation), 'property-declaration'),
            (r'(end)(\s+)(' + iden_rex + r')(;)',
             bygroups(Keyword.Namespace, Whitespace, Name.Class,
                      Punctuation), '#pop'),
            (r'(\s+)', Whitespace),
        ],
        'property-section': [
            text_tuple,
            comment_whitespace_tuple,
            (class_iden_rex + r'(\s*)(=>)(\s*)',
             bygroups(Name.Class, Punctuation, Name.Entity, Whitespace,
                      Operator,
                      Whitespace), 'property-section-property-value'),
            (r'(' + iden_rex + r')(\s*)(=>)(\s*)',
             bygroups(Name.Class, Whitespace, Operator,
                      Whitespace), 'property-section-property-value'),
            (r'(\*\*})(\s*)(;)', bygroups(Punctuation, Whitespace,
                                          Punctuation), '#pop'),
            (r'([\s+])', Whitespace),
            (r'', Whitespace, '#pop'),
        ],
        'call-section': [
            text_tuple,
            comment_whitespace_tuple,
            (r'(' + iden_rex + r')(\s*)(:)(\s*)({)(\s*)',
             bygroups(Name.Class, Whitespace, Punctuation, Whitespace,
                      Punctuation, Whitespace)),
            (definition_rex, bygroups(Name.Variable,
                                      Punctuation), 'declaration'),
            (r'}', Punctuation),
            terminator_tuple,
        ],
        'id-or-classid': [
            (class_iden_rex, bygroups(Name.Class, Punctuation,
                                      Name.Entity), '#pop'),
            (r'(' + iden_rex + r')', Name.Entity, '#pop'),
        ],
        'semicolon': [
            (r'(\s*)(;)', bygroups(Whitespace, Punctuation), '#pop'),
        ],
        'emv2-annex': [
            (r'(use)(\s+)(types|type\s+equivalence|mappings|behavior)(\s+)',
             bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace,
                      Whitespace), ('semicolon', 'id-or-classid')),
            (r'(error\s+propagations)(\s+)',
             bygroups(Keyword.Namespace, Whitespace), 'emv2-propagations'),
            (r'(component\s+error\s+behavior)(\s+)',
             bygroups(Keyword.Namespace, Whitespace), 'emv2-component'),
            (r'(\*\*})(\s*)(;)', bygroups(Punctuation, Whitespace,
                                          Punctuation), '#pop'),
            (r'(\s+)', Whitespace),
        ],
        'emv2-propagations': [
            (r'(not|in|out|propagation)', Keyword.Namespace),
            (r'(:|{|\*|::|}|;)', Punctuation),
            (r'(end\s+propagations)(\s*)(;)',
             bygroups(Keyword.Namespace, Whitespace, Punctuation), '#pop'),
            (r'(' + iden_rex + r')', Name.Entity),
            (r'(\s+)', Whitespace),
        ],
        'emv2-component': [
            (r'(use)(\s+)(transformations)(\s+)',
             bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace,
                      Whitespace), ('semicolon', 'id-or-classid')),
            (r'(events|transitions|propagations|detections|mode\s+mappings)',
             Keyword.Namespace),
            (r'(all|noerror)', Keyword.Constant),
            (r'(:|;|{|}|\(|\))', Punctuation),
            (r'(-\[)(\s*)', bygroups(Punctuation,
                                     Whitespace), 'emv2-error-condition'),
            (r'(end\s+component)(\s*)(;)',
             bygroups(Keyword.Namespace, Whitespace, Punctuation), '#pop'),
            (r'(' + iden_rex + r')', Name.Entity),
            (r'(\s+)', Whitespace),
        ],
        'emv2-error-condition': [
            (r'(and|ormore|orless|or)', Keyword.Constant),
            (r'(\(|\)|\{|\}|::)', Punctuation),
            (r'[0-9]+', Number.Integer),
            (r'(\]->)', Punctuation, '#pop'),
            (r'(' + iden_rex + r')', Name.Entity),
            (r'(\s+)', Whitespace),
        ],
        'root': [
            (r'(\n\s*|\t)', Whitespace),
            comment_tuple,
            (r'(package)(\s+)', bygroups(Keyword.Namespace,
                                         Text), 'packageOrSystem'),
            (r'(public|private)', Keyword.Namespace),
            # import_declaration
            with_tuple,
            # alias_declaration
            (r'(' + iden_rex + r')(\s+)(renames)(\s+)',
             bygroups(Name.Namespace, Whitespace, Keyword,
                      Whitespace), 'alias-body'),
            (r'(annex)(\s+)(EMV2)(\s*)({\*\*)',
             bygroups(Keyword.Namespace, Whitespace, Name.Namespace,
                      Whitespace, Punctuation), 'emv2-annex'),
            (r'(annex)(\s+)', bygroups(Keyword.Namespace,
                                       Whitespace), 'annex'),
            (component_category + r'(\s+)', bygroups(Keyword.Type, Whitespace),
             'package-declaration'),
            (r'(calls)(\s+)', bygroups(Keyword.Namespace,
                                       Whitespace), 'call-section'),
            (r'(subcomponents|connections|features|flows)(\s+)',
             bygroups(Keyword.Namespace, Whitespace)),
            (definition_rex, bygroups(Name.Variable,
                                      Punctuation), 'declaration'),
            (r'(properties)(\s*)', bygroups(Keyword.Namespace,
                                            Whitespace), 'property-section'),
            (r'(end)(\s+)', bygroups(Keyword.Namespace,
                                     Whitespace), 'package-declaration'),
            (r'(property\s+set)(\s+)', bygroups(Keyword.Namespace,
                                                Whitespace), 'property-set'),
            (r'(\s+)', Whitespace),
        ]
    }
Beispiel #29
0
    def gen_crystalstrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ["interpolated-regex"])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[imsx]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ["interpolated-string"])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3) + i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states["strings"] = [
            (r"\:@{0,2}[a-zA-Z_]\w*[!?]?", String.Symbol),
            (words(CRYSTAL_OPERATORS, prefix=r"\:@{0,2}"), String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            # This allows arbitrary text after '\ for simplicity
            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
            (r':"', String.Symbol, "simple-sym"),
            # Crystal doesn't have "symbol:"s but this simplifies function args
            (r"([a-zA-Z_]\w*)(:)(?!:)", bygroups(String.Symbol, Punctuation)),
            (r'"', String.Double, "simple-string"),
            (r"(?<!\.)`", String.Backtick, "simple-backtick"),
        ]

        # double-quoted string and symbol
        for name, ttype, end in (
            ("string", String.Double, '"'),
            ("sym", String.Symbol, '"'),
            ("backtick", String.Backtick, "`"),
        ):
            states["simple-" + name] = [
                include("string-escaped" if name == "sym" else "string-intp-escaped"),
                (r"[^\\%s#]+" % end, ttype),
                (r"[\\#]", ttype),
                (end, ttype, "#pop"),
            ]

        # braced quoted strings
        for lbrace, rbrace, bracecc, name in (
            ("\\{", "\\}", "{}", "cb"),
            ("\\[", "\\]", "\\[\\]", "sb"),
            ("\\(", "\\)", "()", "pa"),
            ("<", ">", "<>", "ab"),
        ):
            states[name + "-intp-string"] = [
                (r"\\[" + lbrace + "]", String.Other),
                (lbrace, String.Other, "#push"),
                (rbrace, String.Other, "#pop"),
                include("string-intp-escaped"),
                (r"[\\#" + bracecc + "]", String.Other),
                (r"[^\\#" + bracecc + "]+", String.Other),
            ]
            states["strings"].append((r"%" + lbrace, String.Other, name + "-intp-string"))
            states[name + "-string"] = [
                (r"\\[\\" + bracecc + "]", String.Other),
                (lbrace, String.Other, "#push"),
                (rbrace, String.Other, "#pop"),
                (r"[\\#" + bracecc + "]", String.Other),
                (r"[^\\#" + bracecc + "]+", String.Other),
            ]
            # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
            states["strings"].append((r"%[wi]" + lbrace, String.Other, name + "-string"))
            states[name + "-regex"] = [
                (r"\\[\\" + bracecc + "]", String.Regex),
                (lbrace, String.Regex, "#push"),
                (rbrace + "[imsx]*", String.Regex, "#pop"),
                include("string-intp"),
                (r"[\\#" + bracecc + "]", String.Regex),
                (r"[^\\#" + bracecc + "]+", String.Regex),
            ]
            states["strings"].append((r"%r" + lbrace, String.Regex, name + "-regex"))

        # these must come after %<brace>!
        states["strings"] += [
            # %r regex
            (r"(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)", intp_regex_callback),
            # regular fancy strings with qsw
            (r"(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r"(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r"^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)", bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r"(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)", intp_string_callback),
        ]

        return states
Beispiel #30
0
class BetterTypeScriptLexer(RegexLexer):
    """
    For `TypeScript <https://www.typescriptlang.org/>`_ source code.
    """

    name = 'TypeScript'
    aliases = ['ts']
    filenames = ['*.ts']
    mimetypes = ['text/x-typescript']

    flags = re.DOTALL
    tokens = {
        'commentsandwhitespace': [(r'\s+', Text), (r'<!--', Comment),
                                  (r'//.*?\n', Comment.Single),
                                  (r'/\*.*?\*/', Comment.Multiline)],
        'slashstartsregex': [
            include('commentsandwhitespace'),
            (
                r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
                r'([gim]+\b|\B)', String.Regex, '#pop'
            ), (r'(?=/)', Text, ('#pop', 'badregex')), (r'', Text, '#pop')
        ],
        'badregex': [(r'\n', Text, '#pop')],
        'typeexp': [
            (r'[a-zA-Z0-9_?.$]+', Keyword.Type),
            (r'\s+', Text),
            (r'[|]', Text),
            (r'\n', Text, "#pop"),
            (r';', Text, "#pop"),
            (r'', Text, "#pop"),
        ],
        'root': [
            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
            include('commentsandwhitespace'),
            (
                r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
                r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator,
                'slashstartsregex'
            ),
            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
            (r'[})\].]', Punctuation),
            (
                r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
                r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
                r'this)\b', Keyword, 'slashstartsregex'
            ),
            (
                r'(var|let|const|with|function)\b', Keyword.Declaration,
                'slashstartsregex'
            ),
            (
                r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
                r'extends|final|float|goto|implements|import|int|interface|long|native|'
                r'package|private|protected|public|short|static|super|synchronized|throws|'
                r'transient|volatile)\b', Keyword.Reserved
            ),
            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
            (
                r'(Array|Boolean|Date|Error|Function|Math|netscape|'
                r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
                r'decodeURIComponent|encodeURI|encodeURIComponent|'
                r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
                r'window)\b', Name.Builtin
            ),
            # Match stuff like: module name {...}
            (
                r'\b(module)(\s*)(\s*[a-zA-Z0-9_?.$][\w?.$]*)(\s*)',
                bygroups(Keyword.Reserved, Text, Name.Other,
                         Text), 'slashstartsregex'
            ),
            # Match variable type keywords
            (r'\b(string|bool|number)\b', Keyword.Type),
            # Match stuff like: constructor
            (r'\b(constructor|declare|interface|as|AS)\b', Keyword.Reserved),
            # Match stuff like: super(argument, list)
            (
                r'(super)(\s*)\(([a-zA-Z0-9,_?.$\s]+\s*)\)',
                bygroups(Keyword.Reserved, Text), 'slashstartsregex'
            ),
            # Match stuff like: function() {...}
            (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'),
            # Match stuff like: (function: return type)
            (
                r'([a-zA-Z0-9_?.$][\w?.$]*)(\s*:\s*)',
                bygroups(Name.Other, Text), 'typeexp'
            ),
            # Match stuff like: type Foo = Bar | Baz
            (
                r'\b(type)(\s*)([a-zA-Z0-9_?.$]+)(\s*)(=)(\s*)',
                bygroups(
                    Keyword.Reserved, Text, Name.Other, Text, Operator, Text
                ), 'typeexp'
            ),
            (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-fA-F]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
        ]
    }
Beispiel #31
0
class TAPLexer(RegexLexer):
    """
    For Test Anything Protocol (TAP) output.

    .. versionadded:: 2.1
    """
    name = 'TAP'
    aliases = ['tap']
    filenames = ['*.tap']

    tokens = {
        'root': [
            # A TAP version may be specified.
            (r'^TAP version \d+\n', Name.Namespace),

            # Specify a plan with a plan line.
            (r'^1..\d+', Keyword.Declaration, 'plan'),

            # A test failure
            (r'^(not ok)([^\S\n]*)(\d*)',
             bygroups(Generic.Error, Text, Number.Integer), 'test'),

            # A test success
            (r'^(ok)([^\S\n]*)(\d*)',
             bygroups(Keyword.Reserved, Text, Number.Integer), 'test'),

            # Diagnostics start with a hash.
            (r'^#.*\n', Comment),

            # TAP's version of an abort statement.
            (r'^Bail out!.*\n', Generic.Error),

            # TAP ignores any unrecognized lines.
            (r'^.*\n', Text),
        ],
        'plan': [
            # Consume whitespace (but not newline).
            (r'[^\S\n]+', Text),

            # A plan may have a directive with it.
            (r'#', Comment, 'directive'),

            # Or it could just end.
            (r'\n', Comment, '#pop'),

            # Anything else is wrong.
            (r'.*\n', Generic.Error, '#pop'),
        ],
        'test': [
            # Consume whitespace (but not newline).
            (r'[^\S\n]+', Text),

            # A test may have a directive with it.
            (r'#', Comment, 'directive'),
            (r'\S+', Text),
            (r'\n', Text, '#pop'),
        ],
        'directive': [
            # Consume whitespace (but not newline).
            (r'[^\S\n]+', Comment),

            # Extract todo items.
            (r'(?i)\bTODO\b', Comment.Preproc),

            # Extract skip items.
            (r'(?i)\bSKIP\S*', Comment.Preproc),
            (r'\S+', Comment),
            (r'\n', Comment, '#pop:2'),
        ],
    }
Beispiel #32
0
class RstLexer(RegexLexer):
    """
    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.

    .. versionadded:: 0.7

    Additional options accepted:

    `handlecodeblocks`
        Highlight the contents of ``.. sourcecode:: language``,
        ``.. code:: language`` and ``.. code-block:: language``
        directives with a lexer for the given language (default:
        ``True``).

        .. versionadded:: 0.8
    """
    name = 'reStructuredText'
    aliases = ['rst', 'rest', 'restructuredtext']
    filenames = ['*.rst', '*.rest']
    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
    flags = re.MULTILINE

    def _handle_sourcecode(self, match):
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item

    # from docutils.parsers.rst.states
    closers = u'\'")]}>\u2019\u201d\xbb!?'
    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' %
                         (re.escape(unicode_delimiters), re.escape(closers)))

    tokens = {
        'root': [
            # Heading with overline
            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
             r'(.+)(\n)(\1)(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text,
                      Generic.Heading, Text)),
            # Plain heading
            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
            # Bulleted lists
            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered lists
            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered, but keep words at BOL from becoming lists
            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Line blocks
            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
             bygroups(Text, Operator, using(this, state='inline'))),
            # Sourcecode directives
            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
             _handle_sourcecode),
            # A directive
            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
                      using(this, state='inline'))),
            # A reference target
            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A footnote/citation target
            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A substitution def
            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
                      Punctuation, Text, using(this, state='inline'))),
            # Comments
            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
            # Field list
            (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
            (r'^( *)(:.*?:)([ \t]+)(.*?)$',
             bygroups(Text, Name.Class, Text, Name.Function)),
            # Definition list
            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
             bygroups(using(this, state='inline'), using(this,
                                                         state='inline'))),
            # Code blocks
            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
             bygroups(String.Escape, Text, String, String, Text, String)),
            include('inline'),
        ],
        'inline': [
            (r'\\.', Text),  # escape
            (r'``', String, 'literal'),  # code
            (
                r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
                bygroups(String, String.Interpol, String)),
            (r'`.+?`__?', String),  # reference
            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
             bygroups(Name.Variable, Name.Attribute)),  # role
            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
            (r'\*.+?\*', Generic.Emph),  # Emphasis
            (r'\[.*?\]_', String),  # Footnote or citation
            (r'<.+?>', Name.Tag),  # Hyperlink
            (r'[^\\\n\[*`:]+', Text),
            (r'.', Text),
        ],
        'literal': [
            (r'[^`]+', String),
            (r'``' + end_string_suffix, String, '#pop'),
            (r'`', String),
        ]
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)

    def analyse_text(text):
        if text[:2] == '..' and text[2:3] != '.':
            return 0.3
        p1 = text.find("\n")
        p2 = text.find("\n", p1 + 1)
        if (p2 > -1 and  # has two lines
                p1 * 2 + 1 == p2 and  # they are the same length
                text[p1 + 1] in '-='
                and  # the next line both starts and ends with
                text[p1 + 1] == text[p2 - 1]):  # ...a sufficiently high header
            return 0.5
Beispiel #33
0
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
# =============================================================================
import os
import re

# Monkey patch for pygments reporting an error when generator expressions are
# used.
# https://bitbucket.org/birkenfeld/pygments-main/issue/942/cmake-generator-expressions-not-handled
from pygments.lexers import CMakeLexer
from pygments.token import Name, Operator
from pygments.lexer import bygroups

CMakeLexer.tokens["args"].append(("(\\$<)(.+?)(>)", bygroups(Operator, Name.Variable, Operator)))

# Monkey patch for sphinx generating invalid content for qcollectiongenerator
# https://bitbucket.org/birkenfeld/sphinx/issue/1435/qthelp-builder-should-htmlescape-keywords
from sphinx.util.pycompat import htmlescape
from sphinx.builders.qthelp import QtHelpBuilder

old_build_keywords = QtHelpBuilder.build_keywords


def new_build_keywords(self, title, refs, subitems):
    old_items = old_build_keywords(self, title, refs, subitems)
    new_items = []
    for item in old_items:
        before, rest = item.split('ref="', 1)
        ref, after = rest.split('"')
Beispiel #34
0
class BooLexer(RegexLexer):
    """
    For `Boo <http://boo.codehaus.org/>`_ source code.
    """

    name = 'Boo'
    aliases = ['boo']
    filenames = ['*.boo']
    mimetypes = ['text/x-boo']

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(#|//).*$', Comment.Single),
            (r'/[*]', Comment.Multiline, 'comment'),
            (r'[]{}:(),.;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'/(\\\\|\\/|[^/\s])/', String.Regex),
            (r'@/(\\\\|\\/|[^/])*/', String.Regex),
            (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
            (r'(as|abstract|callable|constructor|destructor|do|import|'
             r'enum|event|final|get|interface|internal|of|override|'
             r'partial|private|protected|public|return|set|static|'
             r'struct|transient|virtual|yield|super|and|break|cast|'
             r'continue|elif|else|ensure|except|for|given|goto|if|in|'
             r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|'
             r'while|from|as)\b', Keyword),
            (r'def(?=\s+\(.*?\))', Keyword),
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|'
             r'assert|checked|enumerate|filter|getter|len|lock|map|'
             r'matrix|max|min|normalArrayIndexing|print|property|range|'
             r'rawArrayIndexing|required|typeof|unchecked|using|'
             r'yieldAll|zip)\b', Name.Builtin),
            (r'"""(\\\\|\\"|.*?)"""', String.Double),
            (r'"(\\\\|\\"|[^"]*?)"', String.Double),
            (r"'(\\\\|\\'|[^']*?)'", String.Single),
            (r'[a-zA-Z_]\w*', Name),
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
            (r'[0-9][0-9.]*(ms?|d|h|s)', Number),
            (r'0\d+', Number.Oct),
            (r'0x[a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+', Number.Integer),
        ],
        'comment': [
            ('/[*]', Comment.Multiline, '#push'),
            ('[*]/', Comment.Multiline, '#pop'),
            ('[^/*]', Comment.Multiline),
            ('[*/]', Comment.Multiline)
        ],
        'funcname': [
            (r'[a-zA-Z_]\w*', Name.Function, '#pop')
        ],
        'classname': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop')
        ],
        'namespace': [
            (r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')
        ]
    }
Beispiel #35
0
class NedLexer(RegexLexer):
    name = 'ned'
    filenames = ['*.ned']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(true|false)\b', Name.Builtin),
            (r'(<-->|-->|<--|\.\.)', Keyword),
            (r'(bool|double|int|xml)\b', Keyword.Type),
            (r'(inout|input|output)\b', Keyword.Type),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'#[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(("channel", "channelinterface", "simple", "module",
                    "network", "moduleinterface"),
                   suffix=r'\b'), Keyword),
            (words(
                ("parameters", "gates", "types", "submodules", "connections"),
                suffix=r'\b'), Keyword),
            (words(("volatile", "allowunconnected", "extends", "for", "if",
                    "import", "like", "package", "property"),
                   suffix=r'\b'), Keyword),
            (words(("sizeof", "const", "default", "ask", "this", "index",
                    "typename", "xmldoc"),
                   suffix=r'\b'), Keyword),
            (words(("acos", "asin", "atan", "atan2", "bernoulli", "beta",
                    "binomial", "cauchy", "ceil", "chi_square", "cos",
                    "erlang_k", "exp", "exponential", "fabs", "floor", "fmod",
                    "gamma_d", "genk_exponential", "genk_intuniform",
                    "genk_normal", "genk_truncnormal", "genk_uniform",
                    "geometric", "hypergeometric", "hypot", "intuniform",
                    "log", "log10", "lognormal", "max", "min", "negbinomial",
                    "normal", "pareto_shifted", "poisson", "pow", "simTime",
                    "sin", "sqrt", "student_t", "tan", "triang", "truncnormal",
                    "uniform", "weibull", "xml", "xmldoc"),
                   suffix=r'\b'), Name.Builtin),
            ('@[a-zA-Z_]\w*', Name.Builtin),
            ('[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ]
    }
Beispiel #36
0
class CSharpLexer(RegexLexer):
    """
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

      .. versionadded:: 0.8
    """

    name = 'C#'
    aliases = ['csharp', 'c#']
    filenames = ['*.cs']
    mimetypes = ['text/x-csharp']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none': r'@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full': ('@?(?:_|[^' +
                 uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
                 + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
                                        'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in levels.items():
        tokens[levelname] = {
            'root': [
                # method names
                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)'  # return type
                                             r'(' + cs_ident + ')'  # method name
                                                               r'(\s*)(\()',  # signature start
                 bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|async|await|base|break|by|case|catch|'
                 r'checked|const|continue|default|delegate|'
                 r'do|else|enum|event|explicit|extern|false|finally|'
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
                 r'internal|is|let|lock|new|null|on|operator|'
                 r'out|override|params|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|stackalloc|static|'
                 r'switch|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|add|remove|value|alias|ascending|'
                 r'descending|from|group|into|orderby|select|thenby|where|'
                 r'join|equals)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|'
                 r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [
                (cs_ident, Name.Class, '#pop'),
                default('#pop'),
            ],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'),
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Beispiel #37
0
from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
from pygments.lexer import (
    Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
)
from pygments.token import (
    Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
)
from pygments.util import get_bool_opt

# Local
from IPython.testing.skipdoctest import skip_doctest

line_re = re.compile('.*?\n')

ipython_tokens = [
  (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
                                       using(BashLexer), Text)),
  (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
  (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
]

def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.

    The lexer inherits from an appropriate Python lexer and then adds
    information about IPython specific keywords (i.e. magic commands,
    shell commands, etc.)

    Parameters
    ----------
    python3 : bool
        If `True`, then build an IPython lexer from a Python 3 lexer.
def build_ipy_lexer(python3):
    """Builds IPython lexers depending on the value of `python3`.

    The lexer inherits from an appropriate Python lexer and then adds
    information about IPython specific keywords (i.e. magic commands,
    shell commands, etc.)

    Parameters
    ----------
    python3 : bool
        If `True`, then build an IPython lexer from a Python 3 lexer.

    """
    # It would be nice to have a single IPython lexer class which takes
    # a boolean `python3`.  But since there are two Python lexer classes,
    # we will also have two IPython lexer classes.
    if python3:
        PyLexer = Python3Lexer
        name = 'IPython3'
        aliases = ['ipython3']
        doc = """IPython3 Lexer"""
    else:
        PyLexer = PythonLexer
        name = 'IPython'
        aliases = ['ipython2', 'ipython']
        doc = """IPython Lexer"""

    ipython_tokens = [
       (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
        (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
        (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
        (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
        (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
        (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
        (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
        (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
        (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
        (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
        (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
        (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
        (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
        (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                using(BashLexer), Text)),
        (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
        (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
        (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
        (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
        (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
    ]

    tokens = PyLexer.tokens.copy()
    tokens['root'] = ipython_tokens + tokens['root']

    attrs = {'name': name, 'aliases': aliases, 'filenames': [],
             '__doc__': doc, 'tokens': tokens}

    return type(name, (PyLexer,), attrs)
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import RegexLexer, bygroups, default, include, using, words
from pygments.token import Comment, Error, Keyword, Name, Number, Operator, Punctuation, \
    String, Text, Whitespace
from pygments.lexers._csound_builtins import OPCODES, DEPRECATED_OPCODES
from pygments.lexers.html import HtmlLexer
from pygments.lexers.python import PythonLexer
from pygments.lexers.scripting import LuaLexer

__all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer']

newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text))


class CsoundLexer(RegexLexer):
    tokens = {
        'whitespace': [
            (r'[ \t]+', Text),
            (r'/[*](?:.|\n)*?[*]/', Comment.Multiline),
            (r'(?:;|//).*$', Comment.Single),
            (r'(\\)(\n)', bygroups(Whitespace, Text))
        ],

        'preprocessor directives': [
            (r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc),
            (r'#include', Comment.Preproc, 'include directive'),
            (r'#[ \t]*define', Comment.Preproc, 'define directive'),
Beispiel #40
0
class NemerleLexer(RegexLexer):
    """
    For `Nemerle <http://nemerle.org>`_ source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

    .. versionadded:: 1.5
    """

    name = 'Nemerle'
    aliases = ['nemerle']
    filenames = ['*.n']
    mimetypes = ['text/x-nemerle']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none': r'@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full': ('@?(?:_|[^' +
                 uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
                 + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
                                        'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in levels.items():
        tokens[levelname] = {
            'root': [
                # method names
                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)'  # return type
                                             r'(' + cs_ident + ')'  # method name
                                                               r'(\s*)(\()',  # signature start
                 bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'\$\s*"', String, 'splice-string'),
                (r'\$\s*<#', String, 'splice-string2'),
                (r'<#', String, 'recursive-string'),

                (r'(<\[)\s*(' + cs_ident + ':)?', Keyword),
                (r'\]\>', Keyword),

                # quasiquotation only
                (r'\$' + cs_ident, Name),
                (r'(\$)(\()', bygroups(Name, Punctuation),
                 'splice-string-content'),

                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|and|as|base|catch|def|delegate|'
                 r'enum|event|extern|false|finally|'
                 r'fun|implements|interface|internal|'
                 r'is|macro|match|matches|module|mutable|new|'
                 r'null|out|override|params|partial|private|'
                 r'protected|public|ref|sealed|static|'
                 r'syntax|this|throw|true|try|type|typeof|'
                 r'virtual|volatile|when|where|with|'
                 r'assert|assert2|async|break|checked|continue|do|else|'
                 r'ensures|for|foreach|if|late|lock|new|nolate|'
                 r'otherwise|regexp|repeat|requires|return|surroundwith|'
                 r'unchecked|unless|using|while|yield)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
                 r'short|string|uint|ulong|ushort|void|array|list)\b\??',
                 Keyword.Type),
                (r'(:>?)\s*(' + cs_ident + r'\??)',
                 bygroups(Punctuation, Keyword.Type)),
                (r'(class|struct|variant|module)(\s+)',
                 bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword, Text),
                 'namespace'),
                (cs_ident, Name),
            ],
            'class': [
                (cs_ident, Name.Class, '#pop')
            ],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
            ],
            'splice-string': [
                (r'[^"$]', String),
                (r'\$' + cs_ident, Name),
                (r'(\$)(\()', bygroups(Name, Punctuation),
                 'splice-string-content'),
                (r'\\"', String),
                (r'"', String, '#pop')
            ],
            'splice-string2': [
                (r'[^#<>$]', String),
                (r'\$' + cs_ident, Name),
                (r'(\$)(\()', bygroups(Name, Punctuation),
                 'splice-string-content'),
                (r'<#', String, '#push'),
                (r'#>', String, '#pop')
            ],
            'recursive-string': [
                (r'[^#<>]', String),
                (r'<#', String, '#push'),
                (r'#>', String, '#pop')
            ],
            'splice-string-content': [
                (r'if|match', Keyword),
                (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation),
                (cs_ident, Name),
                (r'\d+', Number),
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop')
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Beispiel #41
0
class DtdLexer(RegexLexer):
    """
    A lexer for DTDs (Document Type Definitions).

    .. versionadded:: 1.5
    """

    flags = re.MULTILINE | re.DOTALL

    name = 'DTD'
    aliases = ['dtd']
    filenames = ['*.dtd']
    mimetypes = ['application/xml-dtd']

    tokens = {
        'root': [
            include('common'),
            (r'(<!ELEMENT)(\s+)(\S+)', bygroups(Keyword, Text,
                                                Name.Tag), 'element'),
            (r'(<!ATTLIST)(\s+)(\S+)', bygroups(Keyword, Text,
                                                Name.Tag), 'attlist'),
            (r'(<!ENTITY)(\s+)(\S+)', bygroups(Keyword, Text,
                                               Name.Entity), 'entity'),
            (r'(<!NOTATION)(\s+)(\S+)', bygroups(Keyword, Text,
                                                 Name.Tag), 'notation'),
            (
                r'(<!\[)([^\[\s]+)(\s*)(\[)',  # conditional sections
                bygroups(Keyword, Name.Entity, Text, Keyword)),
            (r'(<!DOCTYPE)(\s+)([^>\s]+)', bygroups(Keyword, Text, Name.Tag)),
            (r'PUBLIC|SYSTEM', Keyword.Constant),
            (r'[\[\]>]', Keyword),
        ],
        'common': [
            (r'\s+', Text),
            (r'(%|&)[^;]*;', Name.Entity),
            ('<!--', Comment, 'comment'),
            (r'[(|)*,?+]', Operator),
            (r'"[^"]*"', String.Double),
            (r'\'[^\']*\'', String.Single),
        ],
        'comment': [
            ('[^-]+', Comment),
            ('-->', Comment, '#pop'),
            ('-', Comment),
        ],
        'element': [
            include('common'),
            (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Tag),
            (r'>', Keyword, '#pop'),
        ],
        'attlist': [
            include('common'),
            (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
             Keyword.Constant),
            (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
            (r'xml:space|xml:lang', Keyword.Reserved),
            (r'[^>\s|()?+*,]+', Name.Attribute),
            (r'>', Keyword, '#pop'),
        ],
        'entity': [
            include('common'),
            (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Entity),
            (r'>', Keyword, '#pop'),
        ],
        'notation': [
            include('common'),
            (r'SYSTEM|PUBLIC', Keyword.Constant),
            (r'[^>\s|()?+*,]+', Name.Attribute),
            (r'>', Keyword, '#pop'),
        ],
    }

    def analyse_text(text):
        if not looks_like_xml(text) and \
           ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
            return 0.8
Beispiel #42
0
def check_lexer(lexer_name, cls, mod_path, min_level, output_stream=sys.stdout):
    #print lexer_name
    #print cls().tokens
    has_errors = False

    bygroups_callback = func_code(bygroups(1))
    for state, pats in cls().tokens.items():
        if not isinstance(pats, list):
            # This is for Inform7Lexer
            print(lexer_name, 'WEIRD', file=output_stream)
            return output_stream

        for i, pat in enumerate(pats):
            if hasattr(pat, 'state'):
                # new 'default'
                continue

            try:
                if isinstance(pat[0], Future):
                    pat = (pat[0].get(),) + pat[1:]
                reg = Regex.get_parse_tree(pat[0], cls.flags)
            except TypeError:
                # Doesn't support _inherit yet.
                continue
            except Exception:
                try:
                    print(pat[0], cls, file=output_stream)
                except: pass
                raise
            # Special problem: display an error if count of args to
            # bygroups(...) doesn't match the number of capture groups
            if callable(pat[1]) and func_code(pat[1]) is bygroups_callback:
                by_groups = func_closure(pat[1])
            else:
                by_groups = None

            if ONLY_FUNC:
                errs = []
                getattr(regexlint.checkers, ONLY_FUNC)(reg, errs)
            else:
                errs = run_all_checkers(reg, by_groups)
                # Special case for empty string, since it needs action.
                manual_check_for_empty_string_match(reg, errs, pat)

            errs.sort(key=lambda k: (k[1], k[0]))
            if errs:
                #print "Errors in", lexer_name, state, "pattern", i
                for num, severity, pos1, text in errs:
                    if severity < min_level: continue

                    # Only set this if we're going to output something --
                    # otherwise the [Lexer] OK won't print
                    has_errors = True

                    foo = find_offending_line(mod_path, lexer_name, state, i,
                                              pos1)
                    if foo:
                        line = 'L' + str(foo[0])
                    else:
                        line = 'pat#' + str(i+1)
                    print('%s%s:%s:%s:%s: %s' % (
                        logging.getLevelName(severity)[0], num,
                        lexer_name, state, line, text), file=output_stream)
                    if foo:
                        mark(*(foo + (output_stream,)))
                    else:
                        mark_str(pos1, pos1+1, pat[0], output_stream)
    if not has_errors:
        print(lexer_name, 'OK', file=output_stream)

    return output_stream
Beispiel #43
0
def check_lexer(lexer_name, cls, mod_path, min_level, output_stream=sys.stdout):
    #print lexer_name
    #print cls().tokens
    has_errors = False

    bygroups_callback = bygroups(1).func_code
    for state, pats in cls().tokens.iteritems():
        if not isinstance(pats, list):
            # This is for Inform7Lexer
            print >>output_stream, lexer_name, 'WEIRD'
            return output_stream

        for i, pat in enumerate(pats):
            if hasattr(pat, 'state'):
                # new 'default'
                continue

            try:
                if isinstance(pat[0], Future):
                    pat = (pat[0].get(),) + pat[1:]
                reg = Regex.get_parse_tree(pat[0], cls.flags)
            except TypeError:
                # Doesn't support _inherit yet.
                continue
            except Exception:
                try:
                    print >>output_stream, pat[0], cls
                except: pass
                raise
            # Special problem: display an error if count of args to
            # bygroups(...) doesn't match the number of capture groups
            if callable(pat[1]) and pat[1].func_code is bygroups_callback:
                by_groups = pat[1].func_closure[0].cell_contents
            else:
                by_groups = None

            if ONLY_FUNC:
                errs = []
                getattr(regexlint.checkers, ONLY_FUNC)(reg, errs)
            else:
                errs = run_all_checkers(reg, by_groups)
                # Special case for empty string, since it needs action.
                manual_check_for_empty_string_match(reg, errs, pat)

            errs.sort(key=lambda k: (k[1], k[0]))
            if errs:
                #print "Errors in", lexer_name, state, "pattern", i
                for num, severity, pos1, text in errs:
                    if severity < min_level: continue

                    # Only set this if we're going to output something --
                    # otherwise the [Lexer] OK won't print
                    has_errors = True

                    foo = find_offending_line(mod_path, lexer_name, state, i,
                                              pos1)
                    if foo:
                        line = 'L' + str(foo[0])
                    else:
                        line = 'pat#' + str(i+1)
                    print >>output_stream, '%s%s:%s:%s:%s: %s' % (
                        logging.getLevelName(severity)[0], num,
                        lexer_name, state, line, text)
                    if foo:
                        mark(*(foo + (output_stream,)))
                    else:
                        # Substract one for closing quote
                        start = len(consistent_repr(pat[0][:pos1])) - 1
                        end = len(consistent_repr(pat[0][:pos1+1])) - 1
                        if start == end:
                            # This handles the case where pos1 points to the end
                            # of the string. Regex "|" with pos1 = 1.
                            end += 1
                        assert end > start
                        text, start, end = shorten(repr(pat[0]), start, end)
                        mark(-1, start, end, text, output_stream)
    if not has_errors:
        print >>output_stream, lexer_name, "OK"

    return output_stream
Beispiel #44
0
class GherkinLexer(RegexLexer):
    """
    For `Gherkin <http://github.com/aslakhellesoy/gherkin/>` syntax.

    .. versionadded:: 1.2
    """
    name = 'Gherkin'
    aliases = ['cucumber', 'gherkin']
    filenames = ['*.feature']
    mimetypes = ['text/x-gherkin']

    feature_keywords = '^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$'
    feature_element_keywords = '^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$'
    examples_keywords = '^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$'
    step_keywords = '^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )'

    tokens = {
        'comments': [
            (r'^\s*#.*$', Comment),
        ],
        'feature_elements': [
            (step_keywords, Keyword, "step_content_stack"),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'feature_elements_on_stack': [
            (step_keywords, Keyword, "#pop:2"),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'examples_table': [
            (r"\s+\|", Keyword, 'examples_table_header'),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'examples_table_header': [
            (r"\s+\|\s*$", Keyword, "#pop:2"),
            include('comments'),
            (r"\\\|", Name.Variable),
            (r"\s*\|", Keyword),
            (r"[^|]", Name.Variable),
        ],
        'scenario_sections_on_stack': [
            (feature_element_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), "feature_elements_on_stack"),
        ],
        'narrative': [
            include('scenario_sections_on_stack'),
            include('comments'),
            (r"(\s|.)", Name.Function),
        ],
        'table_vars': [
            (r'(<[^>]+>)', Name.Variable),
        ],
        'numbers': [
            (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', String),
        ],
        'string': [
            include('table_vars'),
            (r'(\s|.)', String),
        ],
        'py_string': [
            (r'"""', Keyword, "#pop"),
            include('string'),
        ],
        'step_content_root': [
            (r"$", Keyword, "#pop"),
            include('step_content'),
        ],
        'step_content_stack': [
            (r"$", Keyword, "#pop:2"),
            include('step_content'),
        ],
        'step_content': [
            (r'"', Name.Function, "double_string"),
            include('table_vars'),
            include('numbers'),
            include('comments'),
            (r'(\s|.)', Name.Function),
        ],
        'table_content': [
            (r"\s+\|\s*$", Keyword, "#pop"),
            include('comments'),
            (r"\\\|", String),
            (r"\s*\|", Keyword),
            include('string'),
        ],
        'double_string': [
            (r'"', Name.Function, "#pop"),
            include('string'),
        ],
        'root': [
            (r'\n', Name.Function),
            include('comments'),
            (r'"""', Keyword, "py_string"),
            (r'\s+\|', Keyword, 'table_content'),
            (r'"', Name.Function, "double_string"),
            include('table_vars'),
            include('numbers'),
            (r'(\s*)(@[^@\r\n\t ]+)', bygroups(Name.Function, Name.Tag)),
            (step_keywords, bygroups(Name.Function,
                                     Keyword), 'step_content_root'),
            (feature_keywords, bygroups(Keyword, Keyword,
                                        Name.Function), 'narrative'),
            (feature_element_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), 'feature_elements'),
            (examples_keywords,
             bygroups(Name.Function, Keyword, Keyword,
                      Name.Function), 'examples_table'),
            (r'(\s|.)', Name.Function),
        ]
    }
Beispiel #45
0
class MarkdownLexer(RegexLexer):
    """
    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.

    .. versionadded:: 2.2
    """
    name = 'markdown'
    aliases = ['md']
    filenames = ['*.md']
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)

    tokens = {
        'root': [
            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
            # task list
            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
             bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)([0-9]+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            # code block with language
            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # italics
            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph,
                                                      Text)),
            # bold
            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
            (r'(\s)((\*\*|__).*\3)((?=\W|\n))',
             bygroups(Text, Generic.Strong, None, Text)),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
            # strikethrough
            (r'(\s)(~~[^~]+~~)((?=\W|\n))',
             bygroups(Text, Generic.Deleted, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r'[@#][\w/:]+', Name.Entity),
            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
             bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
            # reference-style links, e.g.:
            #   [an example][id]
            #   [id]: http://example.com/
            (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
             bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
            (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
             bygroups(Text, Name.Label, Text, Name.Attribute)),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)
Beispiel #46
0
class VbNetLexer(RegexLexer):
    """
    For
    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
    source code.
    """

    name = 'VB.net'
    aliases = ['vb.net', 'vbnet']
    filenames = ['*.vb', '*.bas']
    mimetypes = ['text/x-vbnet', 'text/x-vba']  # (?)

    uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \
               '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                 'Cf', 'Mn', 'Mc') + ']*'

    flags = re.MULTILINE | re.IGNORECASE
    tokens = {
        'root': [
            (r'^\s*<.*?>', Name.Attribute),
            (r'\s+', Text),
            (r'\n', Text),
            (r'rem\b.*?\n', Comment),
            (r"'.*?\n", Comment),
            (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#Else|#End\s+If|#Const|'
             r'#ExternalSource.*?\n|#End\s+ExternalSource|'
             r'#Region.*?\n|#End\s+Region|#ExternalChecksum',
             Comment.Preproc),
            (r'[(){}!#,.:]', Punctuation),
            (r'Option\s+(Strict|Explicit|Compare)\s+'
             r'(On|Off|Binary|Text)', Keyword.Declaration),
            (words((
                'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case',
                'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl',
                'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng',
                'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare',
                'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else',
                'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False',
                'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo',
                'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let',
                'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase',
                'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing',
                'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator',
                'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides',
                'ParamArray', 'Partial', 'Private', 'Protected', 'Public',
                'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume',
                'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single',
                'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To',
                'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While',
                'Widening', 'With', 'WithEvents', 'WriteOnly'),
                prefix=r'(?<!\.)', suffix=r'\b'), Keyword),
            (r'(?<!\.)End\b', Keyword, 'end'),
            (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'),
            (r'(?<!\.)(Function|Sub|Property)(\s+)',
             bygroups(Keyword, Text), 'funcname'),
            (r'(?<!\.)(Class|Structure|Enum)(\s+)',
             bygroups(Keyword, Text), 'classname'),
            (r'(?<!\.)(Module|Namespace|Imports)(\s+)',
             bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|'
             r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|'
             r'UShort)\b', Keyword.Type),
            (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|'
             r'Or|OrElse|TypeOf|Xor)\b', Operator.Word),
            (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|'
             r'<=|>=|<>|[-&*/\\^+=<>\[\]]',
             Operator),
            ('"', String, 'string'),
            (r'_\n', Text),  # Line continuation  (must be before Name)
            (uni_name + '[%&@!#$]?', Name),
            ('#.*?#', Literal.Date),
            (r'(\d+\.\d*|\d*\.\d+)(F[+-]?[0-9]+)?', Number.Float),
            (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer),
            (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer),
        ],
        'string': [
            (r'""', String),
            (r'"C?', String, '#pop'),
            (r'[^"]+', String),
        ],
        'dim': [
            (uni_name, Name.Variable, '#pop'),
            default('#pop'),  # any other syntax
        ],
        'funcname': [
            (uni_name, Name.Function, '#pop'),
        ],
        'classname': [
            (uni_name, Name.Class, '#pop'),
        ],
        'namespace': [
            (uni_name, Name.Namespace),
            (r'\.', Name.Namespace),
            default('#pop'),
        ],
        'end': [
            (r'\s+', Text),
            (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b',
             Keyword, '#pop'),
            default('#pop'),
        ]
    }

    def analyse_text(text):
        if re.search(r'^\s*(#If|Module|Namespace)', text, re.MULTILINE):
            return 0.5
Beispiel #47
0
class ThingsDBLexer(RegexLexer):
    """
    Lexer for the ThingsDB programming language.

    .. versionadded:: 2.9
    """
    name = 'ThingsDB'
    aliases = ['ti', 'thingsdb']
    filenames = ['*.ti']

    tokens = {
        'root': [
            include('expression'),
        ],
        'expression': [
            include('comments'),
            include('whitespace'),

            # numbers
            (r'[-+]?0b[01]+', Number.Bin),
            (r'[-+]?0o[0-8]+', Number.Oct),
            (r'([-+]?0x[0-9a-fA-F]+)', Number.Hex),
            (r'[-+]?[0-9]+', Number.Integer),
            (r'[-+]?((inf|nan)([^0-9A-Za-z_]|$)|[0-9]*\.[0-9]+(e[+-][0-9]+)?)',
             Number.Float),

            # strings
            (r'(?:"(?:[^"]*)")+', String.Double),
            (r"(?:'(?:[^']*)')+", String.Single),

            # literals
            (r'(true|false|nil)\b', Keyword.Constant),

            # regular expressions
            (r'(/[^/\\]*(?:\\.[^/\\]*)*/i?)', String.Regex),

            # thing id's
            (r'#[0-9]+', Comment.Preproc),

            # name, assignments and functions
            include('names'),
            (r'[(){}\[\],;]', Punctuation),
            (r'[+\-*/%&|<>^!~@=:?]', Operator),
        ],
        'names': [
            (r'(\.)'
             r'(add|call|contains|del|endswith|extend|filter|find|findindex|'
             r'get|has|id|indexof|keys|len|lower|map|pop|push|remove|set|sort|'
             r'splice|startswith|test|unwrap|upper|values|wrap)'
             r'(\()', bygroups(Name.Function, Name.Function,
                               Punctuation), 'arguments'),
            (r'(array|assert|assert_err|auth_err|backup_info|backups_info|'
             r'bad_data_err|bool|closure|collection_info|collections_info|'
             r'counters|deep|del_backup|del_collection|del_expired|del_node|'
             r'del_procedure|del_token|del_type|del_user|err|float|'
             r'forbidden_err|grant|int|isarray|isascii|isbool|isbytes|iserr|'
             r'isfloat|isinf|isint|islist|isnan|isnil|israw|isset|isstr|'
             r'isthing|istuple|isutf8|lookup_err|max_quota_err|mod_type|new|'
             r'new_backup|new_collection|new_node|new_procedure|new_token|'
             r'new_type|new_user|node_err|node_info|nodes_info|now|'
             r'num_arguments_err|operation_err|overflow_err|procedure_doc|'
             r'procedure_info|procedures_info|raise|refs|rename_collection|'
             r'rename_user|reset_counters|return|revoke|run|set_log_level|set|'
             r'set_quota|set_type|shutdown|str|syntax_err|thing|try|type|'
             r'type_err|type_count|type_info|types_info|user_info|users_info|'
             r'value_err|wse|zero_div_err)'
             r'(\()', bygroups(Name.Function, Punctuation), 'arguments'),
            (r'(\.[A-Za-z_][0-9A-Za-z_]*)'
             r'(\s*)(=)', bygroups(Name.Attribute, Text, Operator)),
            (r'\.[A-Za-z_][0-9A-Za-z_]*', Name.Attribute),
            (r'([A-Za-z_][0-9A-Za-z_]*)(\s*)(=)',
             bygroups(Name.Variable, Text, Operator)),
            (r'[A-Za-z_][0-9A-Za-z_]*', Name.Variable),
        ],
        'whitespace': [
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
        ],
        'comments': [
            (r'//(.*?)\n', Comment.Single),
            (r'/\*', Comment.Multiline, 'comment'),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'arguments': [
            include('expression'),
            (',', Punctuation),
            (r'\(', Punctuation, '#push'),
            (r'\)', Punctuation, '#pop'),
        ],
    }
class LimboLexer(RegexLexer):
    """
    Lexer for `Limbo programming language <http://www.vitanuova.com/inferno/limbo.html>`_

    TODO:
        - maybe implement better var declaration highlighting
        - some simple syntax error highlighting

    .. versionadded:: 2.0
    """
    name = 'Limbo'
    aliases = ['limbo']
    filenames = ['*.b']
    mimetypes = ['text/limbo']

    tokens = {
        'whitespace': [
            (r'^(\s*)([a-zA-Z_]\w*:(\s*)\n)',
             bygroups(Text, Name.Label)),
            (r'\n', Text),
            (r'\s+', Text),
            (r'#(\n|(.|\n)*?[^\\]\n)', Comment.Single),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String), # all other characters
            (r'\\', String), # stray backslash
        ],
        'statements': [
            (r'"', String, 'string'),
            (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])', Number.Float),
            (r'16r[0-9a-fA-F]+', Number.Hex),
            (r'8r[0-7]+', Number.Oct),
            (r'((([1-3]\d)|([2-9]))r)?(\d+)', Number.Integer),
            (r'[()\[\],.]', Punctuation),
            (r'[~!%^&*+=|?:<>/-]|(->)|(<-)|(=>)|(::)', Operator),
            (r'(alt|break|case|continue|cyclic|do|else|exit'
             r'for|hd|if|implement|import|include|len|load|or'
             r'pick|return|spawn|tagof|tl|to|while)\b', Keyword),
            (r'(byte|int|big|real|string|array|chan|list|adt'
             r'|fn|ref|of|module|self|type)\b', Keyword.Type),
            (r'(con|iota|nil)\b', Keyword.Constant),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'statement' : [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'root': [
            include('whitespace'),
            default('statement'),
        ],
    }

    def analyse_text(text):
        # Any limbo module implements something
        if re.search(r'^implement \w+;', text, re.MULTILINE):
            return 0.7
Beispiel #49
0
from pygments.lexer import RegexLexer, bygroups
from pygments.token import *


# See http://pygments.org/docs/lexerdevelopment/ for background.
ROOT_TOKENS = [
  (r'\"[^\"]*\"',                      Literal.String),
  (r'##.*\n',                          Comment),
  (r'#.*\n',                           Comment.Single),
  (r'[$]+([A-Za-z][A-Za-z0-9_:]*)?',   Name.Variable),
  (r'[@]+([A-Za-z][A-Za-z0-9_:]*)?',   Name.Class),
  (r'[A-Za-z0-9_]+:',                  Name.Tag),
  (r'(\.)([A-Za-z][A-Za-z0-9_]*)',     bygroups(Text, Name.Function)),
  (r'(:=|=>)',                         Punctuation),
  (r'(\.?)([!+=<>/*%-]+)',             bygroups(Text, Operator)),
  (r'\b(true|false|null)\b',           Keyword.Constant),
  (r'\b(type|def|var|import)\b',       Keyword.Declaration),
  (r'\b[A-Za-z][A-Za-z0-9_]+\b',       Keyword),
  (r'[0-9]+',                          Number),
  (r'.',                               Text)
]


class NeutrinoLexer(RegexLexer):
  name = 'Neutrino'
  aliases = ['neutrino']
  filenames = ['*.n']
  tokens = {'root': ROOT_TOKENS}
Beispiel #50
0
class TerraformLexer(RegexLexer):
    """
    Lexer for `terraformi .tf files <https://www.terraform.io/>`_.

    .. versionadded:: 2.1
    """

    name = 'Terraform'
    aliases = ['terraform', 'tf']
    filenames = ['*.tf']
    mimetypes = ['application/x-tf', 'application/x-terraform']

    embedded_keywords = ('ingress', 'egress', 'listener', 'default',
                         'connection', 'alias', 'terraform', 'tags', 'vars',
                         'config', 'lifecycle', 'timeouts')

    tokens = {
        'root': [
            include('string'),
            include('punctuation'),
            include('curly'),
            include('basic'),
            include('whitespace'),
            (r'[0-9]+', Number),
        ],
        'basic': [
            (words(('true', 'false'), prefix=r'\b', suffix=r'\b'), Keyword.Type),
            (r'\s*/\*', Comment.Multiline, 'comment'),
            (r'\s*#.*\n', Comment.Single),
            (r'(.*?)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)),
            (words(('variable', 'resource', 'provider', 'provisioner', 'module',
                    'backend', 'table_s', 'output'), prefix=r'\b', suffix=r'\b'),
             Keyword.Reserved, 'function'),
            (words(embedded_keywords, prefix=r'\b', suffix=r'\b'),
             Keyword.Declaration),
            (r'\$\{', String.Interpol, 'var_builtin'),
        ],
        'function': [
            (r'(\s+)(".*")(\s+)', bygroups(Text, String, Text)),
            include('punctuation'),
            include('curly'),
        ],
        'var_builtin': [
            (r'\$\{', String.Interpol, '#push'),
            (words(('concat', 'file', 'join', 'lookup', 'element'),
                   prefix=r'\b', suffix=r'\b'), Name.Builtin),
            include('string'),
            include('punctuation'),
            (r'\s+', Text),
            (r'\}', String.Interpol, '#pop'),
        ],
        'string': [
            (r'(".*")', bygroups(String.Double)),
        ],
        'punctuation': [
            (r'[\[\](),.]', Punctuation),
        ],
        # Keep this seperate from punctuation - we sometimes want to use different
        # Tokens for { }
        'curly': [
            (r'\{', Text.Punctuation),
            (r'\}', Text.Punctuation),
        ],
        'comment': [
            (r'[^*/]', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline)
        ],
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),
        ],
    }
Beispiel #51
0
Datei: dsls.py Projekt: axil/blog
 def _stringescapes(lexer, match, ctx):
     lexer._start = match.group(3)
     lexer._end = match.group(5)
     return bygroups(Keyword.Reserved, Text, String.Escape, Text,
                     String.Escape)(lexer, match, ctx)
Beispiel #52
0
class VCLLexer(RegexLexer):
    """
    For Varnish Configuration Language (VCL).

    .. versionadded:: 2.2
    """
    name = 'VCL'
    aliases = ['vcl']
    filenames = ['*.vcl']
    mimetypes = ['text/x-vclsrc']

    def analyse_text(text):
        # If the very first line is 'vcl 4.0;' it's pretty much guaranteed
        # that this is VCL
        if text.startswith('vcl 4.0;'):
            return 1.0
        # Skip over comments and blank lines
        # This is accurate enough that returning 0.9 is reasonable.
        # Almost no VCL files start without some comments.
        elif '\nvcl 4.0;' in text[:1000]:
            return 0.9

    tokens = {
        'probe': [
            include('whitespace'),
            include('comments'),
            (r'(\.\w+)(\s*=\s*)([^;]*)(;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'acl': [
            include('whitespace'),
            include('comments'),
            (r'[!/]+', Operator),
            (r';', Punctuation),
            (r'\d+', Number),
            (r'\}', Punctuation, '#pop'),
        ],
        'backend': [
            include('whitespace'),
            (r'(\.probe)(\s*=\s*)(\w+)(;)',
             bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)),
            (r'(\.probe)(\s*=\s*)(\{)',
             bygroups(Name.Attribute, Operator, Punctuation), 'probe'),
            (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'statements': [
            (r'(\d\.)?\d+[sdwhmy]', Literal.Date),
            (r'(\d\.)?\d+ms', Literal.Date),
            (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|'
             r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|'
             r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function),
            (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|'
             r'miss|fetch|restart)\b', Name.Constant),
            (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable),
            (words((
                'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level',
                'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits',
                'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid',
                'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip',
                'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now',
                'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto',
                'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi',
                'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout',
                'beresp.do_gzip',  'resp.status', 'beresp.do_gunzip',
                'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream',
                'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status',
                'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries',
                'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip',
                'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint',
                'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304',
                'server.ip', 'bereq.uncacheable'), suffix=r'\b'),
             Name.Variable),
            (r'[!%&+*\-,/<.}{>=|~]+', Operator),
            (r'[();]', Punctuation),

            (r'[,]+', Punctuation),
            (words(('hash_data', 'regsub', 'regsuball', 'if', 'else',
                    'elsif', 'elif', 'synth', 'synthetic', 'ban',
                    'return', 'set', 'unset', 'import', 'include', 'new',
                    'rollback', 'call'), suffix=r'\b'),
             Keyword),
            (r'storage\.\w+\.\w+\b', Name.Variable),
            (words(('true', 'false')), Name.Builtin),
            (r'\d+\b', Number),
            (r'(backend)(\s+\w+)(\s*\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'),
            (r'(probe\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'),
            (r'(acl\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'),
            (r'(vcl )(4.0)(;)$',
             bygroups(Keyword.Reserved, Name.Constant, Punctuation)),
            (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)',
                bygroups(Keyword, Name.Function, Punctuation)),
            (r'([a-zA-Z_]\w*)'
             r'(\.)'
             r'([a-zA-Z_]\w*)'
             r'(\s*\(.*\))',
             bygroups(Name.Function, Punctuation, Name.Function, using(this))),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*$', Comment),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'[^"\n]+', String),  # all other characters
        ],
        'multistring': [
            (r'[^"}]', String),
            (r'"\}', String, '#pop'),
            (r'["}]', String),
        ],
        'whitespace': [
            (r'L?"', String, 'string'),
            (r'\{"', String, 'multistring'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
        ],
        'root': [
            include('whitespace'),
            include('comments'),
            include('statements'),
            (r'\s+', Text),
        ],
    }
Beispiel #53
0
    def gen_rubystrings_rules():
        def intp_regex_callback(self, match, ctx):
            yield match.start(1), String.Regex, match.group(1)  # begin
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Regex, match.group(4)  # end[mixounse]*
            ctx.pos = match.end()

        def intp_string_callback(self, match, ctx):
            yield match.start(1), String.Other, match.group(1)
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
                yield match.start(3)+i, t, v
            yield match.start(4), String.Other, match.group(4)  # end
            ctx.pos = match.end()

        states = {}
        states['strings'] = [
            # easy ones
            (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
             r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r':"', String.Symbol, 'simple-sym'),
            (r'([a-zA-Z_]\w*)(:)(?!:)',
             bygroups(String.Symbol, Punctuation)),  # Since Ruby 1.9
            (r'"', String.Double, 'simple-string'),
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
        ]

        # double-quoted string and symbol
        for name, ttype, end in ('string', String.Double, '"'), \
                                ('sym', String.Symbol, '"'), \
                                ('backtick', String.Backtick, '`'):
            states['simple-'+name] = [
                include('string-intp-escaped'),
                (r'[^\\%s#]+' % end, ttype),
                (r'[\\#]', ttype),
                (end, ttype, '#pop'),
            ]

        # braced quoted strings
        for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
                                    ('\\[', '\\]', 'sb'), \
                                    ('\\(', '\\)', 'pa'), \
                                    ('<', '>', 'ab'):
            states[name+'-intp-string'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
                include('string-intp-escaped'),
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
            ]
            states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
                                      name+'-intp-string'))
            states[name+'-string'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
            ]
            states['strings'].append((r'%[qsw]' + lbrace, String.Other,
                                      name+'-string'))
            states[name+'-regex'] = [
                (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
                (r'(?<!\\)' + lbrace, String.Regex, '#push'),
                (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
                include('string-intp'),
                (r'[\\#' + lbrace + rbrace + ']', String.Regex),
                (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
            ]
            states['strings'].append((r'%r' + lbrace, String.Regex,
                                      name+'-regex'))

        # these must come after %<brace>!
        states['strings'] += [
            # %r regex
            (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
             intp_regex_callback),
            # regular fancy strings with qsw
            (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other),
            (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
            # special forms of fancy strings after operators or
            # in method calls with braces
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # and because of fixed width lookbehinds the whole thing a
            # second time for line startings...
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
             bygroups(Text, String.Other, None)),
            # all regular fancy strings without qsw
            (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
             intp_string_callback),
        ]

        return states
Beispiel #54
0
class BlitzBasicLexer(RegexLexer):
    """
    For `BlitzBasic <http://blitzbasic.com>`_ source code.

    .. versionadded:: 2.0
    """

    name = 'BlitzBasic'
    aliases = ['blitzbasic', 'b3d', 'bplus']
    filenames = ['*.bb', '*.decls']
    mimetypes = ['text/x-bb']

    bb_sktypes = r'@{1,2}|[#$%]'
    bb_name = r'[a-z]\w*'
    bb_var = (r'(%s)(?:([ \t]*)(%s)|([ \t]*)([.])([ \t]*)(?:(%s)))?') % \
             (bb_name, bb_sktypes, bb_name)

    flags = re.MULTILINE | re.IGNORECASE
    tokens = {
        'root': [
            # Text
            (r'[ \t]+', Text),
            # Comments
            (r";.*?\n", Comment.Single),
            # Data types
            ('"', String.Double, 'string'),
            # Numbers
            (r'[0-9]+\.[0-9]*(?!\.)', Number.Float),
            (r'\.[0-9]+(?!\.)', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'\$[0-9a-f]+', Number.Hex),
            (r'\%[10]+', Number.Bin),
            # Other
            (words(('Shl', 'Shr', 'Sar', 'Mod', 'Or', 'And', 'Not', 'Abs',
                    'Sgn', 'Handle', 'Int', 'Float', 'Str', 'First', 'Last',
                    'Before', 'After'),
                   prefix=r'\b',
                   suffix=r'\b'), Operator),
            (r'([+\-*/~=<>^])', Operator),
            (r'[(),:\[\]\\]', Punctuation),
            (r'\.([ \t]*)(%s)' % bb_name, Name.Label),
            # Identifiers
            (r'\b(New)\b([ \t]+)(%s)' % (bb_name),
             bygroups(Keyword.Reserved, Text, Name.Class)),
            (r'\b(Gosub|Goto)\b([ \t]+)(%s)' % (bb_name),
             bygroups(Keyword.Reserved, Text, Name.Label)),
            (r'\b(Object)\b([ \t]*)([.])([ \t]*)(%s)\b' % (bb_name),
             bygroups(Operator, Text, Punctuation, Text, Name.Class)),
            (r'\b%s\b([ \t]*)(\()' % bb_var,
             bygroups(Name.Function, Text, Keyword.Type, Text, Punctuation,
                      Text, Name.Class, Text, Punctuation)),
            (r'\b(Function)\b([ \t]+)%s' % bb_var,
             bygroups(Keyword.Reserved, Text, Name.Function, Text,
                      Keyword.Type, Text, Punctuation, Text, Name.Class)),
            (r'\b(Type)([ \t]+)(%s)' % (bb_name),
             bygroups(Keyword.Reserved, Text, Name.Class)),
            # Keywords
            (r'\b(Pi|True|False|Null)\b', Keyword.Constant),
            (r'\b(Local|Global|Const|Field|Dim)\b', Keyword.Declaration),
            (words(('End', 'Return', 'Exit', 'Chr', 'Len', 'Asc', 'New',
                    'Delete', 'Insert', 'Include', 'Function', 'Type', 'If',
                    'Then', 'Else', 'ElseIf', 'EndIf', 'For', 'To', 'Next',
                    'Step', 'Each', 'While', 'Wend', 'Repeat', 'Until',
                    'Forever', 'Select', 'Case', 'Default', 'Goto', 'Gosub',
                    'Data', 'Read', 'Restore'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword.Reserved),
            # Final resolve (for variable names and such)
            # (r'(%s)' % (bb_name), Name.Variable),
            (bb_var,
             bygroups(Name.Variable, Text, Keyword.Type, Text, Punctuation,
                      Text, Name.Class)),
        ],
        'string': [
            (r'""', String.Double),
            (r'"C?', String.Double, '#pop'),
            (r'[^"]+', String.Double),
        ],
    }
Beispiel #55
0
            'upper-alpha', 'upper-latin', 'upper-roman', 'uppercase', 'url',
            'visible', 'w-resize', 'wait', 'wider', 'x-fast', 'x-high', 'x-large', 'x-loud',
            'x-low', 'x-small', 'x-soft', 'xx-large', 'xx-small', 'yes'), suffix=r'\b'),
         Name.Constant),
        (words(_color_keywords, suffix=r'\b'), Name.Entity),
        (words((
            'black', 'silver', 'gray', 'white', 'maroon', 'red', 'purple', 'fuchsia', 'green',
            'lime', 'olive', 'yellow', 'navy', 'blue', 'teal', 'aqua'), suffix=r'\b'),
         Name.Builtin),
        (r'\!(important|default)', Name.Exception),
        (r'(true|false)', Name.Pseudo),
        (r'(and|or|not)', Operator.Word),
        (r'/\*', Comment.Multiline, 'inline-comment'),
        (r'//[^\n]*', Comment.Single),
        (r'\#[a-z0-9]{1,6}', Number.Hex),
        (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)),
        (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)),
        (r'#\{', String.Interpol, 'interpolation'),
        (r'[~^*!&%<>|+=@:,./?-]+', Operator),
        (r'[\[\]()]+', Punctuation),
        (r'"', String.Double, 'string-double'),
        (r"'", String.Single, 'string-single'),
        (r'[a-z_-][\w-]*', Name),
    ],

    'interpolation': [
        (r'\}', String.Interpol, '#pop'),
        include('value'),
    ],

    'selector': [
Beispiel #56
0
class MonkeyLexer(RegexLexer):
    """
    For
    `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_
    source code.

    .. versionadded:: 1.6
    """

    name = 'Monkey'
    aliases = ['monkey']
    filenames = ['*.monkey']
    mimetypes = ['text/x-monkey']

    name_variable = r'[a-z_]\w*'
    name_function = r'[A-Z]\w*'
    name_constant = r'[A-Z_][A-Z0-9_]*'
    name_class = r'[A-Z]\w*'
    name_module = r'[a-z0-9_]*'

    keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)'
    # ? == Bool // % == Int // # == Float // $ == String
    keyword_type_special = r'[?%#$]'

    flags = re.MULTILINE

    tokens = {
        'root': [
            # Text
            (r'\s+', Text),
            # Comments
            (r"'.*", Comment),
            (r'(?i)^#rem\b', Comment.Multiline, 'comment'),
            # preprocessor directives
            (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b',
             Comment.Preproc),
            # preprocessor variable (any line starting with '#' that is not a directive)
            (r'^#', Comment.Preproc, 'variables'),
            # String
            ('"', String.Double, 'string'),
            # Numbers
            (r'[0-9]+\.[0-9]*(?!\.)', Number.Float),
            (r'\.[0-9]+(?!\.)', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'\$[0-9a-fA-Z]+', Number.Hex),
            (r'\%[10]+', Number.Bin),
            # Native data types
            (r'\b%s\b' % keyword_type, Keyword.Type),
            # Exception handling
            (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved),
            (r'Throwable', Name.Exception),
            # Builtins
            (r'(?i)\b(?:Null|True|False)\b', Name.Builtin),
            (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo),
            (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant),
            # Keywords
            (r'(?i)^(Import)(\s+)(.*)(\n)',
             bygroups(Keyword.Namespace, Text, Name.Namespace, Text)),
            (r'(?i)^Strict\b.*\n', Keyword.Reserved),
            (r'(?i)(Const|Local|Global|Field)(\s+)',
             bygroups(Keyword.Declaration, Text), 'variables'),
            (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)',
             bygroups(Keyword.Reserved, Text), 'classname'),
            (r'(?i)(Function|Method)(\s+)', bygroups(Keyword.Reserved,
                                                     Text), 'funcname'),
            (r'(?i)(?:End|Return|Public|Private|Extern|Property|'
             r'Final|Abstract)\b', Keyword.Reserved),
            # Flow Control stuff
            (r'(?i)(?:If|Then|Else|ElseIf|EndIf|'
             r'Select|Case|Default|'
             r'While|Wend|'
             r'Repeat|Until|Forever|'
             r'For|To|Until|Step|EachIn|Next|'
             r'Exit|Continue)\s+', Keyword.Reserved),
            # not used yet
            (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved),
            # Array
            (r'[\[\]]', Punctuation),
            # Other
            (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator),
            (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word),
            (r'[(){}!#,.:]', Punctuation),
            # catch the rest
            (r'%s\b' % name_constant, Name.Constant),
            (r'%s\b' % name_function, Name.Function),
            (r'%s\b' % name_variable, Name.Variable),
        ],
        'funcname': [(r'(?i)%s\b' % name_function, Name.Function),
                     (r':', Punctuation, 'classname'), (r'\s+', Text),
                     (r'\(', Punctuation, 'variables'),
                     (r'\)', Punctuation, '#pop')],
        'classname': [
            (r'%s\.' % name_module, Name.Namespace),
            (r'%s\b' % keyword_type, Keyword.Type),
            (r'%s\b' % name_class, Name.Class),
            # array (of given size)
            (r'(\[)(\s*)(\d*)(\s*)(\])',
             bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)),
            # generics
            (r'\s+(?!<)', Text, '#pop'),
            (r'<', Punctuation, '#push'),
            (r'>', Punctuation, '#pop'),
            (r'\n', Text, '#pop'),
            default('#pop')
        ],
        'variables': [(r'%s\b' % name_constant, Name.Constant),
                      (r'%s\b' % name_variable, Name.Variable),
                      (r'%s' % keyword_type_special, Keyword.Type),
                      (r'\s+', Text), (r':', Punctuation, 'classname'),
                      (r',', Punctuation, '#push'),
                      default('#pop')],
        'string': [
            (r'[^"~]+', String.Double),
            (r'~q|~n|~r|~t|~z|~~', String.Escape),
            (r'"', String.Double, '#pop'),
        ],
        'comment': [
            (r'(?i)^#rem.*?', Comment.Multiline, "#push"),
            (r'(?i)^#end.*?', Comment.Multiline, "#pop"),
            (r'\n', Comment.Multiline),
            (r'.+', Comment.Multiline),
        ],
    }
Beispiel #57
0
#    Grant U54 EB005149.
#
#  * Kitware, Inc.
# #
#=============================================================================
import os
import re

# Monkey patch for pygments reporting an error when generator expressions are
# used.
# https://bitbucket.org/birkenfeld/pygments-main/issue/942/cmake-generator-expressions-not-handled
from pygments.lexers import CMakeLexer
from pygments.token import Name, Operator
from pygments.lexer import bygroups
CMakeLexer.tokens["args"].append(('(\\$<)(.+?)(>)',
                                  bygroups(Operator, Name.Variable, Operator)))

# Monkey patch for sphinx generating invalid content for qcollectiongenerator
# https://bitbucket.org/birkenfeld/sphinx/issue/1435/qthelp-builder-should-htmlescape-keywords
from sphinx.util.pycompat import htmlescape
from sphinx.builders.qthelp import QtHelpBuilder
old_build_keywords = QtHelpBuilder.build_keywords
def new_build_keywords(self, title, refs, subitems):
  old_items = old_build_keywords(self, title, refs, subitems)
  new_items = []
  for item in old_items:
    before, rest = item.split("ref=\"", 1)
    ref, after = rest.split("\"")
    if ("<" in ref and ">" in ref):
      new_items.append(before + "ref=\"" + htmlescape(ref) + "\"" + after)
    else:
Beispiel #58
0
#------------------------------------------------------------------------------
# Copyright (c) 2013-2018, Nucleic Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#------------------------------------------------------------------------------
from pygments.lexer import ExtendedRegexLexer, bygroups
from pygments.lexers.agile import Python3Lexer

from pygments.token import Text, Keyword, Name, Punctuation, Operator


ENAMLDEF_START = (
    r'^(enamldef)([ \t]+)([a-zA-Z_][a-zA-Z0-9_]*)([ \t]*)(\()',
    bygroups(Keyword, Text, Name.Class, Text, Punctuation),
    'enamldef_base',
)


ENAMLDEF_BASE = (
    r'(\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*)',
    bygroups(Text, Text, Text),
    'enamldef_end',
)


ENAMLDEF_END = (
    r'(\))([ \t]*)(:)([ \t]*\n)',
    bygroups(Punctuation, Text, Punctuation, Text),
    '#pop:2',
Beispiel #59
0
class XonshSubprocLexer(BashLexer):
    """Lexer for xonsh subproc mode."""
    name = 'Xonsh subprocess lexer'
    tokens = {'root': [(SearchPath, String.Backtick), inherit, ]}


ROOT_TOKENS = [(r'\?', Keyword),
               (r'\$\w+', Name.Variable),
               (r'\$\{', Keyword, ('pymode', )),
               (r'[\!\$]\(', Keyword, ('subproc', )),
               (r'[\!\$]\[', Keyword, ('subproc', )),
               (r'@\$\(', Keyword, ('subproc', )),
               (r'@\(', Keyword, ('pymode', )),
               inherit, ]

PYMODE_TOKENS = [(r'(.+)(\))', bygroups(using(this), Keyword), '#pop'),
                 (r'(.+)(\})', bygroups(using(this), Keyword), '#pop'), ]

SUBPROC_TOKENS = [
    (r'(.+)(\))', bygroups(using(XonshSubprocLexer), Keyword), '#pop'),
    (r'(.+)(\])', bygroups(using(XonshSubprocLexer), Keyword), '#pop'),
]


class XonshLexer(PythonLexer):
    """Xonsh console lexer for pygments."""

    name = 'Xonsh lexer'
    aliases = ['xonsh', 'xsh']
    filenames = ['*.xsh', '*xonshrc']
Beispiel #60
0
class HamlLexer(ExtendedRegexLexer):
    """
    For Haml markup.

    .. versionadded:: 1.3
    """

    name = 'Haml'
    aliases = ['haml']
    filenames = ['*.haml']
    mimetypes = ['text/x-haml']

    flags = re.IGNORECASE
    # Haml can include " |\n" anywhere,
    # which is ignored and used to wrap long lines.
    # To accomodate this, use this custom faux dot instead.
    _dot = r'(?: \|\n(?=.* \|)|.)'

    # In certain places, a comma at the end of the line
    # allows line wrapping as well.
    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'),
            (r'(-)(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + ')*?\}', using(RubyLexer)),
            (r'\[' + _dot + '*?\]', using(RubyLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'haml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + '*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }