Ejemplo n.º 1
0
    def get_tokens_unprocessed(self, data):
        sql = SqlLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(data):
            line = match.group()
            if line.startswith('sqlite> ') or line.startswith('   ...> '):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:8])]))
                curcode += line[8:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions, sql.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('SQL error: '):
                    yield (match.start(), Generic.Traceback, line)
                else:
                    yield (match.start(), Generic.Output, line)
        if curcode:
            for item in do_insertions(insertions,
                                      sql.get_tokens_unprocessed(curcode)):
                yield item
Ejemplo n.º 2
0
    def get_tokens_unprocessed(self, text):
        erlexer = ErlangLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
            else:
                if curcode:
                    yield from do_insertions(
                        insertions, erlexer.get_tokens_unprocessed(curcode))
                    curcode = ''
                    insertions = []
                if line.startswith('*'):
                    yield match.start(), Generic.Traceback, line
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            yield from do_insertions(insertions,
                                     erlexer.get_tokens_unprocessed(curcode))
Ejemplo n.º 3
0
    def get_tokens_unprocessed(self, text):
        slexer = SLexer(**self.options)

        current_code_block = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>') or line.startswith('+'):
                # Colorize the prompt as such,
                # then put rest of line into current_code_block
                insertions.append(
                    (len(current_code_block), [(0, Generic.Prompt, line[:2])]))
                current_code_block += line[2:]
            else:
                # We have reached a non-prompt line!
                # If we have stored prompt lines, need to process them first.
                if current_code_block:
                    # Weave together the prompts and highlight code.
                    yield from do_insertions(
                        insertions,
                        slexer.get_tokens_unprocessed(current_code_block))
                    # Reset vars for next code block.
                    current_code_block = ''
                    insertions = []
                # Now process the actual line itself, this is output from R.
                yield match.start(), Generic.Output, line

        # If we happen to end on a code block with nothing after it, need to
        # process the last code block. This is neither elegant nor DRY so
        # should be changed.
        if current_code_block:
            yield from do_insertions(
                insertions, slexer.get_tokens_unprocessed(current_code_block))
Ejemplo n.º 4
0
    def get_tokens_unprocessed(self, text):
        dylexer = DylanLexer(**self.options)

        curcode = ''
        insertions = []
        for match in self._line_re.finditer(text):
            line = match.group()
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            dylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      dylexer.get_tokens_unprocessed(curcode)):
                yield item
Ejemplo n.º 5
0
    def get_tokens_unprocessed(self, text):
        exlexer = ElixirLexer(**self.options)

        curcode = ''
        in_error = False
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('** '):
                in_error = True
                insertions.append(
                    (len(curcode), [(0, Generic.Error, line[:-1])]))
                curcode += line[-1:]
            else:
                m = self._prompt_re.match(line)
                if m is not None:
                    in_error = False
                    end = m.end()
                    insertions.append(
                        (len(curcode), [(0, Generic.Prompt, line[:end])]))
                    curcode += line[end:]
                else:
                    if curcode:
                        yield from do_insertions(
                            insertions,
                            exlexer.get_tokens_unprocessed(curcode))
                        curcode = ''
                        insertions = []
                    token = Generic.Error if in_error else Generic.Output
                    yield match.start(), token, line
        if curcode:
            yield from do_insertions(insertions,
                                     exlexer.get_tokens_unprocessed(curcode))
Ejemplo n.º 6
0
    def get_tokens_unprocessed(self, text):
        mlexer = MatlabLexer(**self.options)

        curcode = ''
        insertions = []
        continuation = False

        for match in line_re.finditer(text):
            line = match.group()

            if line.startswith('>> '):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:3])]))
                curcode += line[3:]

            elif line.startswith('>>'):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:2])]))
                curcode += line[2:]

            elif line.startswith('???'):

                idx = len(curcode)

                # without is showing error on same line as before...?
                # line = "\n" + line
                token = (0, Generic.Traceback, line)
                insertions.append((idx, [token]))
            elif continuation:
                # line_start is the length of the most recent prompt symbol
                line_start = len(insertions[-1][-1][-1])
                # Set leading spaces with the length of the prompt to be a generic prompt
                # This keeps code aligned when prompts are removed, say with some Javascript
                if line.startswith(' ' * line_start):
                    insertions.append((len(curcode), [(0, Generic.Prompt,
                                                       line[:line_start])]))
                    curcode += line[line_start:]
                else:
                    curcode += line
            else:
                if curcode:
                    yield from do_insertions(
                        insertions, mlexer.get_tokens_unprocessed(curcode))
                    curcode = ''
                    insertions = []

                yield match.start(), Generic.Output, line

            # Does not allow continuation if a comment is included after the ellipses.
            # Continues any line that ends with ..., even comments (lines that start with %)
            if line.strip().endswith('...'):
                continuation = True
            else:
                continuation = False

        if curcode:  # or item:
            yield from do_insertions(insertions,
                                     mlexer.get_tokens_unprocessed(curcode))
Ejemplo n.º 7
0
    def get_tokens_unprocessed(self, text):
        if self.python3:
            pylexer = Python3Lexer(**self.options)
            tblexer = Python3TracebackLexer(**self.options)
        else:
            pylexer = PythonLexer(**self.options)
            tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith(u'>>> ') or line.startswith(u'... '):
                tb = 0
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:4])]))
                curcode += line[4:]
            elif line.rstrip() == u'...' and not tb:
                # only a new >>> prompt can end an exception block
                # otherwise an ellipsis in place of the traceback frames
                # will be mishandled
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, u'...')]))
                curcode += line[3:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if (line.startswith(u'Traceback (most recent call last):')
                        or re.match(u'  File "[^"]+", line \\d+\\n$', line)):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif line == 'KeyboardInterrupt\n':
                    yield match.start(), Name.Class, line
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == u'...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex + i, t, v
                        curtb = ''
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
        if curtb:
            for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                yield tbindex + i, t, v
Ejemplo n.º 8
0
    def get_tokens_unprocessed(self, text):
        innerlexer = self._innerLexerCls(**self.options)

        pos = 0
        curcode = ''
        insertions = []
        backslash_continuation = False

        for match in line_re.finditer(text):
            line = match.group()
            if backslash_continuation:
                curcode += line
                backslash_continuation = curcode.endswith('\\\n')
                continue

            venv_match = self._venv.match(line)
            if venv_match:
                venv = venv_match.group(1)
                venv_whitespace = venv_match.group(2)
                insertions.append((len(curcode),
                    [(0, Generic.Prompt.VirtualEnv, venv)]))
                if venv_whitespace:
                    insertions.append((len(curcode),
                        [(0, Text, venv_whitespace)]))
                line = line[venv_match.end():]

            m = self._ps1rgx.match(line)
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
                backslash_continuation = curcode.endswith('\\\n')
            elif line.startswith(self._ps2):
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:len(self._ps2)])]))
                curcode += line[len(self._ps2):]
                backslash_continuation = curcode.endswith('\\\n')
            else:
                if insertions:
                    toks = innerlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos+i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(insertions,
                                         innerlexer.get_tokens_unprocessed(curcode)):
                yield pos+i, t, v
Ejemplo n.º 9
0
    def _handle_cssblock(self, match):
        """
        match args: 1:style tag 2:newline, 3:code, 4:closing style tag
        """
        from typecode._vendor.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)

        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name('css')
            except ClassNotFound:
                pass
        code = match.group(3)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(3), String, code
            return

        yield from do_insertions([], lexer.get_tokens_unprocessed(code))

        yield match.start(4), String, match.group(4)
Ejemplo n.º 10
0
    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from typecode._vendor.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)
Ejemplo n.º 11
0
    def get_tokens_unprocessed(self, data):
        sql = PsqlRegexLexer(**self.options)

        lines = lookahead(line_re.findall(data))

        # prompt-output cycle
        while 1:

            # consume the lines of the command: start with an optional prompt
            # and continue until the end of command is detected
            curcode = ''
            insertions = []
            for line in lines:
                # Identify a shell prompt in case of psql commandline example
                if line.startswith('$') and not curcode:
                    lexer = get_lexer_by_name('console', **self.options)
                    yield from lexer.get_tokens_unprocessed(line)
                    break

                # Identify a psql prompt
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    insertions.append(
                        (len(curcode), [(0, Generic.Prompt, mprompt.group())]))
                    curcode += line[len(mprompt.group()):]
                else:
                    curcode += line

                # Check if this is the end of the command
                # TODO: better handle multiline comments at the end with
                # a lexer with an external state?
                if re_psql_command.match(curcode) \
                   or re_end_command.search(curcode):
                    break

            # Emit the combined stream of command and prompt(s)
            yield from do_insertions(insertions,
                                     sql.get_tokens_unprocessed(curcode))

            # Emit the output lines
            out_token = Generic.Output
            for line in lines:
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    # push the line back to have it processed by the prompt
                    lines.send(line)
                    break

                mmsg = re_message.match(line)
                if mmsg is not None:
                    if mmsg.group(1).startswith("ERROR") \
                       or mmsg.group(1).startswith("FATAL"):
                        out_token = Generic.Error
                    yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
                    yield (mmsg.start(2), out_token, mmsg.group(2))
                else:
                    yield (0, out_token, line)
            else:
                return
Ejemplo n.º 12
0
    def get_tokens_unprocessed(self, text):
        jllexer = JuliaLexer(**self.options)
        start = 0
        curcode = ''
        insertions = []
        output = False
        error = False

        for line in text.splitlines(True):
            if line.startswith('julia>'):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:6])]))
                curcode += line[6:]
                output = False
                error = False
            elif line.startswith('help?>') or line.startswith('shell>'):
                yield start, Generic.Prompt, line[:6]
                yield start + 6, Text, line[6:]
                output = False
                error = False
            elif line.startswith('      ') and not output:
                insertions.append((len(curcode), [(0, Text, line[:6])]))
                curcode += line[6:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            jllexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('ERROR: ') or error:
                    yield start, Generic.Error, line
                    error = True
                else:
                    yield start, Generic.Output, line
                output = True
            start += len(line)

        if curcode:
            for item in do_insertions(insertions,
                                      jllexer.get_tokens_unprocessed(curcode)):
                yield item
Ejemplo n.º 13
0
    def get_tokens_unprocessed(self, text):
        mlexer = MatlabLexer(**self.options)

        curcode = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()

            if line.startswith('>> '):
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:3])]))
                curcode += line[3:]

            elif line.startswith('>>'):
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:2])]))
                curcode += line[2:]

            elif line.startswith('???'):

                idx = len(curcode)

                # without is showing error on same line as before...?
                # line = "\n" + line
                token = (0, Generic.Traceback, line)
                insertions.append((idx, [token]))

            else:
                if curcode:
                    for item in do_insertions(
                            insertions, mlexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []

                yield match.start(), Generic.Output, line

        if curcode:  # or item:
            for item in do_insertions(
                    insertions, mlexer.get_tokens_unprocessed(curcode)):
                yield item
Ejemplo n.º 14
0
 def get_tokens_unprocessed(self, text):
     phplexer = PhpLexer(**self.options)
     curcode = ''
     insertions = []
     for match in line_re.finditer(text):
         line = match.group()
         if line.startswith('>>> ') or line.startswith('... '):
             insertions.append(
                 (len(curcode), [(0, Generic.Prompt, line[:4])]))
             curcode += line[4:]
         elif line.rstrip() == '...':
             insertions.append((len(curcode), [(0, Generic.Prompt, '...')]))
             curcode += line[3:]
         else:
             if curcode:
                 yield from do_insertions(
                     insertions, phplexer.get_tokens_unprocessed(curcode))
                 curcode = ''
                 insertions = []
             yield match.start(), Generic.Output, line
     if curcode:
         yield from do_insertions(insertions,
                                  phplexer.get_tokens_unprocessed(curcode))
Ejemplo n.º 15
0
 def get_tokens_unprocessed(self, text):
     ins = []
     lines = text.splitlines(True)
     done = ''
     lex = BaseMakefileLexer(**self.options)
     backslashflag = False
     for line in lines:
         if self.r_special.match(line) or backslashflag:
             ins.append((len(done), [(0, Comment.Preproc, line)]))
             backslashflag = line.strip().endswith('\\')
         elif self.r_comment.match(line):
             ins.append((len(done), [(0, Comment, line)]))
         else:
             done += line
     yield from do_insertions(ins, lex.get_tokens_unprocessed(done))
Ejemplo n.º 16
0
    def get_tokens_unprocessed(self, text):
        style = self.options.get('litstyle')
        if style is None:
            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

        code = ''
        insertions = []
        if style == 'bird':
            # bird-style
            for match in line_re.finditer(text):
                line = match.group()
                m = self.bird_re.match(line)
                if m:
                    insertions.append(
                        (len(code), [(0, Comment.Special, m.group(1))]))
                    code += m.group(2)
                else:
                    insertions.append((len(code), [(0, Text, line)]))
        else:
            # latex-style
            from typecode._vendor.pygments.lexers.markup import TexLexer
            lxlexer = TexLexer(**self.options)
            codelines = 0
            latex = ''
            for match in line_re.finditer(text):
                line = match.group()
                if codelines:
                    if line.lstrip().startswith('\\end{code}'):
                        codelines = 0
                        latex += line
                    else:
                        code += line
                elif line.lstrip().startswith('\\begin{code}'):
                    codelines = 1
                    latex += line
                    insertions.append(
                        (len(code),
                         list(lxlexer.get_tokens_unprocessed(latex))))
                    latex = ''
                else:
                    latex += line
            insertions.append(
                (len(code), list(lxlexer.get_tokens_unprocessed(latex))))
        for item in do_insertions(insertions,
                                  self.baselexer.get_tokens_unprocessed(code)):
            yield item
Ejemplo n.º 17
0
 def get_tokens_unprocessed(self, text):
     # find and remove all the escape tokens (replace with an empty string)
     # this is very similar to DelegatingLexer.get_tokens_unprocessed.
     buffered = ''
     insertions = []
     insertion_buf = []
     for i, t, v in self._find_safe_escape_tokens(text):
         if t is None:
             if insertion_buf:
                 insertions.append((len(buffered), insertion_buf))
                 insertion_buf = []
             buffered += v
         else:
             insertion_buf.append((i, t, v))
     if insertion_buf:
         insertions.append((len(buffered), insertion_buf))
     return do_insertions(insertions,
                          self.lang.get_tokens_unprocessed(buffered))
Ejemplo n.º 18
0
    def _handle_sourcecode(self, match):
        from typecode._vendor.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item