Esempio n. 1
0
    def get_tokens_unprocessed(self, data):
        sql = SqlLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(data):
            line = match.group()
            if line.startswith('sqlite> ') or line.startswith('   ...> '):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:8])]))
                curcode += line[8:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions, sql.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('SQL error: '):
                    yield (match.start(), Generic.Traceback, line)
                else:
                    yield (match.start(), Generic.Output, line)
        if curcode:
            for item in do_insertions(insertions,
                                      sql.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 2
0
    def get_tokens_unprocessed(self, text):
        exlexer = ElixirLexer(**self.options)

        curcode = ''
        in_error = False
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('** '):
                in_error = True
                insertions.append((len(curcode),
                                   [(0, Generic.Error, line[:-1])]))
                curcode += line[-1:]
            else:
                m = self._prompt_re.match(line)
                if m is not None:
                    in_error = False
                    end = m.end()
                    insertions.append((len(curcode),
                                       [(0, Generic.Prompt, line[:end])]))
                    curcode += line[end:]
                else:
                    if curcode:
                        yield from do_insertions(
                            insertions, exlexer.get_tokens_unprocessed(curcode))
                        curcode = ''
                        insertions = []
                    token = Generic.Error if in_error else Generic.Output
                    yield match.start(), token, line
        if curcode:
            yield from do_insertions(
                insertions, exlexer.get_tokens_unprocessed(curcode))
Esempio n. 3
0
    def get_tokens_unprocessed(self, text):
        erlexer = ErlangLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                    erlexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('*'):
                    yield match.start(), Generic.Traceback, line
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      erlexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 4
0
    def get_tokens_unprocessed(self, data):
        sql = SqlLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(data):
            line = match.group()
            if line.startswith('sqlite> ') or line.startswith('   ...> '):
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:8])]))
                curcode += line[8:]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                              sql.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('SQL error: '):
                    yield (match.start(), Generic.Traceback, line)
                else:
                    yield (match.start(), Generic.Output, line)
        if curcode:
            for item in do_insertions(insertions,
                                      sql.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 5
0
    def get_tokens_unprocessed(self, text):
        langlexer = self.LangLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()

            # prompt
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
                continue
            elif curcode:
                for item in do_insertions(insertions,
                                          langlexer.get_tokens_unprocessed(curcode)):
                    yield item
                curcode = ''
                insertions = []

            # comments
            m = self._comment_re.match(line)
            if m is not None:
                yield match.start(), Comment.Single, line
                continue

            yield match.start(), Generic.Output, line

        if curcode:
            for item in do_insertions(insertions,
                                      langlexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 6
0
File: sql.py Progetto: ZekriSara/pfe
    def get_tokens_unprocessed(self, data):
        sql = SqlLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(data):
            line = match.group()
            prompt_match = sqlite_prompt_re.match(line)
            if prompt_match is not None:
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:7])]))
                insertions.append((len(curcode), [(7, Whitespace, ' ')]))
                curcode += line[8:]
            else:
                if curcode:
                    yield from do_insertions(
                        insertions, sql.get_tokens_unprocessed(curcode))
                    curcode = ''
                    insertions = []
                if line.startswith('SQL error: '):
                    yield (match.start(), Generic.Traceback, line)
                else:
                    yield (match.start(), Generic.Output, line)
        if curcode:
            yield from do_insertions(insertions,
                                     sql.get_tokens_unprocessed(curcode))
Esempio n. 7
0
    def get_tokens_unprocessed(self, text):
        assert self.lexer_class
        lexer = self.lexer_class(**self.options)

        curcode = ""
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            prompt = None
            for p in self.prompts:
                if line.startswith(p):
                    prompt = p
            if prompt:
                prompt_len = len(prompt)
                insertions.append((len(curcode), [(0, token.Generic.Prompt,
                                                   line[:prompt_len])]))
                curcode += line[prompt_len:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions, lexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ""
                    insertions = []
                yield match.start(), token.Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      lexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 8
0
    def get_tokens_unprocessed(self, text):
        erlexer = ErlangLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
            else:
                if curcode:
                    yield from do_insertions(insertions,
                                             erlexer.get_tokens_unprocessed(curcode))
                    curcode = ''
                    insertions = []
                if line.startswith('*'):
                    yield match.start(), Generic.Traceback, line
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            yield from do_insertions(insertions,
                                     erlexer.get_tokens_unprocessed(curcode))
Esempio n. 9
0
    def get_tokens_unprocessed(self, text):
        slexer = SLexer(**self.options)

        current_code_block = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>') or line.startswith('+'):
                # Colorize the prompt as such,
                # then put rest of line into current_code_block
                insertions.append((len(current_code_block),
                                   [(0, Generic.Prompt, line[:2])]))
                current_code_block += line[2:]
            else:
                # We have reached a non-prompt line!
                # If we have stored prompt lines, need to process them first.
                if current_code_block:
                    # Weave together the prompts and highlight code.
                    for item in do_insertions(insertions,
                          slexer.get_tokens_unprocessed(current_code_block)):
                        yield item
                    # Reset vars for next code block.
                    current_code_block = ''
                    insertions = []
                # Now process the actual line itself, this is output from R.
                yield match.start(), Generic.Output, line

        # If we happen to end on a code block with nothing after it, need to
        # process the last code block. This is neither elegant nor DRY so
        # should be changed.
        if current_code_block:
            for item in do_insertions(insertions,
                    slexer.get_tokens_unprocessed(current_code_block)):
                yield item
Esempio n. 10
0
 def get_tokens_unprocessed(self, text):
     phplexer = PhpLexer(**self.options)
     curcode = ''
     insertions = []
     for match in line_re.finditer(text):
         line = match.group()
         if line.startswith(u'>>> ') or line.startswith(u'... '):
             insertions.append(
                 (len(curcode), [(0, Generic.Prompt, line[:4])]))
             curcode += line[4:]
         elif line.rstrip() == u'...':
             insertions.append(
                 (len(curcode), [(0, Generic.Prompt, u'...')]))
             curcode += line[3:]
         else:
             if curcode:
                 for item in do_insertions(
                         insertions,
                         phplexer.get_tokens_unprocessed(curcode)):
                     yield item
                 curcode = ''
                 insertions = []
             yield match.start(), Generic.Output, line
     if curcode:
         for item in do_insertions(
                 insertions, phplexer.get_tokens_unprocessed(curcode)):
             yield item
Esempio n. 11
0
    def get_tokens_unprocessed(self, text):
        slexer = SLexer(**self.options)

        current_code_block = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>') or line.startswith('+'):
                # Colorize the prompt as such,
                # then put rest of line into current_code_block
                insertions.append((len(current_code_block),
                                   [(0, Generic.Prompt, line[:2])]))
                current_code_block += line[2:]
            else:
                # We have reached a non-prompt line!
                # If we have stored prompt lines, need to process them first.
                if current_code_block:
                    # Weave together the prompts and highlight code.
                    for item in do_insertions(
                            insertions, slexer.get_tokens_unprocessed(current_code_block)):
                        yield item
                    # Reset vars for next code block.
                    current_code_block = ''
                    insertions = []
                # Now process the actual line itself, this is output from R.
                yield match.start(), Generic.Output, line

        # If we happen to end on a code block with nothing after it, need to
        # process the last code block. This is neither elegant nor DRY so
        # should be changed.
        if current_code_block:
            for item in do_insertions(
                    insertions, slexer.get_tokens_unprocessed(current_code_block)):
                yield item
    def get_tokens_unprocessed(self, text):
        exlexer = ElixirLexer(**self.options)

        curcode = ''
        in_error = False
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith(u'** '):
                in_error = True
                insertions.append((len(curcode),
                                   [(0, Generic.Error, line[:-1])]))
                curcode += line[-1:]
            else:
                m = self._prompt_re.match(line)
                if m is not None:
                    in_error = False
                    end = m.end()
                    insertions.append((len(curcode),
                                       [(0, Generic.Prompt, line[:end])]))
                    curcode += line[end:]
                else:
                    if curcode:
                        for item in do_insertions(
                                insertions, exlexer.get_tokens_unprocessed(curcode)):
                            yield item
                        curcode = ''
                        insertions = []
                    token = Generic.Error if in_error else Generic.Output
                    yield match.start(), token, line
        if curcode:
            for item in do_insertions(
                    insertions, exlexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 13
0
    def get_tokens_unprocessed(self, text):
        bashlexer = BashLexer(**self.options)

        pos = 0
        curcode = ""
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            m = re.match(
                r"^((?:\(\S+\))?(?:|sh\S*?|\w+\S+[@:]\S+(?:\s+\S+)" r"?|\[\S+[@:][^\n]+\].+)[$#%])(.*\n?)", line
            )
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
            elif line.startswith(">"):
                insertions.append((len(curcode), [(0, Generic.Prompt, line[:1])]))
                curcode += line[1:]
            else:
                if insertions:
                    toks = bashlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos + i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ""
        if insertions:
            for i, t, v in do_insertions(insertions, bashlexer.get_tokens_unprocessed(curcode)):
                yield pos + i, t, v
Esempio n. 14
0
    def get_tokens_unprocessed(self, text):
        rblexer = RubyLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            m = self._prompt_re.match(line)
            if m is not None:
                end = m.end()
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:end])]))
                curcode += line[end:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            rblexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      rblexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 15
0
    def get_tokens_unprocessed(self, text):
        mlexer = MatlabLexer(**self.options)

        curcode = ""
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()

            if line.startswith(">>"):
                insertions.append((len(curcode), [(0, Generic.Prompt, line[:3])]))
                curcode += line[3:]

            elif line.startswith("???"):

                idx = len(curcode)

                # without is showing error on same line as before...?
                line = "\n" + line
                token = (0, Generic.Traceback, line)
                insertions.append((idx, [token]))

            else:
                if curcode:
                    for item in do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ""
                    insertions = []

                yield match.start(), Generic.Output, line

        if curcode:  # or item:
            for item in do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode)):
                yield item
    def get_tokens_unprocessed(self, text):
        bashlexer = BashLexer(**self.options)

        pos = 0
        curcode = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            m = re.match(r'^((?:\[?\S+@[^$#%]+)[$#%])(.*\n?)', line)
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
            else:
                if insertions:
                    toks = bashlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos+i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(insertions,
                                         bashlexer.get_tokens_unprocessed(curcode)):
                yield pos+i, t, v
Esempio n. 17
0
    def get_tokens_unprocessed(self, text):
        bashlexer = BashLexer(**self.options)

        pos = 0
        curcode = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            m = re.match(r'^((?:\[?\S+@[^$#%]+)[$#%])(.*\n?)', line)
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
            else:
                if insertions:
                    toks = bashlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos+i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(insertions,
                                         bashlexer.get_tokens_unprocessed(curcode)):
                yield pos+i, t, v
Esempio n. 18
0
    def get_tokens_unprocessed(self, text):
        mlexer = MatlabLexer(**self.options)

        curcode = ''
        insertions = []
        continuation = False

        for match in line_re.finditer(text):
            line = match.group()

            if line.startswith('>> '):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:3])]))
                curcode += line[3:]

            elif line.startswith('>>'):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:2])]))
                curcode += line[2:]

            elif line.startswith('???'):

                idx = len(curcode)

                # without is showing error on same line as before...?
                # line = "\n" + line
                token = (0, Generic.Traceback, line)
                insertions.append((idx, [token]))
            elif continuation:
                # line_start is the length of the most recent prompt symbol
                line_start = len(insertions[-1][-1][-1])
                # Set leading spaces with the length of the prompt to be a generic prompt
                # This keeps code aligned when prompts are removed, say with some Javascript
                if line.startswith(' ' * line_start):
                    insertions.append((len(curcode), [(0, Generic.Prompt,
                                                       line[:line_start])]))
                    curcode += line[line_start:]
                else:
                    curcode += line
            else:
                if curcode:
                    yield from do_insertions(
                        insertions, mlexer.get_tokens_unprocessed(curcode))
                    curcode = ''
                    insertions = []

                yield match.start(), Generic.Output, line

            # Does not allow continuation if a comment is included after the ellipses.
            # Continues any line that ends with ..., even comments (lines that start with %)
            if line.strip().endswith('...'):
                continuation = True
            else:
                continuation = False

        if curcode:  # or item:
            yield from do_insertions(insertions,
                                     mlexer.get_tokens_unprocessed(curcode))
Esempio n. 19
0
    def get_tokens_unprocessed(self, text):
        if self.python3:
            pylexer = PythonLexer(**self.options)
            tblexer = PythonTracebackLexer(**self.options)
        else:
            pylexer = Python2Lexer(**self.options)
            tblexer = Python2TracebackLexer(**self.options)

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith(u'>>> ') or line.startswith(u'... '):
                tb = 0
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:4])]))
                curcode += line[4:]
            elif line.rstrip() == u'...' and not tb:
                # only a new >>> prompt can end an exception block
                # otherwise an ellipsis in place of the traceback frames
                # will be mishandled
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, u'...')]))
                curcode += line[3:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if (line.startswith(u'Traceback (most recent call last):')
                        or re.match(u'  File "[^"]+", line \\d+\\n$', line)):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif line == 'KeyboardInterrupt\n':
                    yield match.start(), Name.Class, line
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == u'...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex + i, t, v
                        curtb = ''
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
        if curtb:
            for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                yield tbindex + i, t, v
Esempio n. 20
0
    def get_tokens_unprocessed(self, text):
        if self.python3:
            pylexer = Python3Lexer(**self.options)
            tblexer = Python3TracebackLexer(**self.options)
        else:
            pylexer = PythonLexer(**self.options)
            tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith(u'>>> ') or line.startswith(u'... '):
                tb = 0
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:4])]))
                curcode += line[4:]
            elif line.rstrip() == u'...' and not tb:
                # only a new >>> prompt can end an exception block
                # otherwise an ellipsis in place of the traceback frames
                # will be mishandled
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, u'...')]))
                curcode += line[3:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions, pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if (line.startswith(u'Traceback (most recent call last):') or
                        re.match(u'  File "[^"]+", line \\d+\\n$', line)):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif line == 'KeyboardInterrupt\n':
                    yield match.start(), Name.Class, line
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == u'...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex+i, t, v
                        curtb = ''
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
        if curtb:
            for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                yield tbindex+i, t, v
Esempio n. 21
0
    def get_tokens_unprocessed(self, text):
        innerlexer = self._innerLexerCls(**self.options)

        pos = 0
        curcode = ''
        insertions = []
        backslash_continuation = False

        for match in line_re.finditer(text):
            line = match.group()
            if backslash_continuation:
                curcode += line
                backslash_continuation = curcode.endswith('\\\n')
                continue

            venv_match = self._venv.match(line)
            if venv_match:
                venv = venv_match.group(1)
                venv_whitespace = venv_match.group(2)
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt.VirtualEnv, venv)]))
                if venv_whitespace:
                    insertions.append(
                        (len(curcode), [(0, Text, venv_whitespace)]))
                line = line[venv_match.end():]

            m = self._ps1rgx.match(line)
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
                backslash_continuation = curcode.endswith('\\\n')
            elif line.startswith(self._ps2):
                insertions.append((len(curcode), [(0, Generic.Prompt,
                                                   line[:len(self._ps2)])]))
                curcode += line[len(self._ps2):]
                backslash_continuation = curcode.endswith('\\\n')
            else:
                if insertions:
                    toks = innerlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos + i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(
                    insertions, innerlexer.get_tokens_unprocessed(curcode)):
                yield pos + i, t, v
    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String        , match.group(1)
        yield match.start(2), String        , match.group(2)
        yield match.start(3), Text          , match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name( match.group(2).strip() )
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String        , match.group(5)
Esempio n. 23
0
    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)
Esempio n. 24
0
    def _handle_cssblock(self, match):
        """
        match args: 1:style tag 2:newline, 3:code, 4:closing style tag
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)

        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name('css')
            except ClassNotFound:
                pass
        code = match.group(3)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(3), String, code
            return

        yield from do_insertions([], lexer.get_tokens_unprocessed(code))

        yield match.start(4), String, match.group(4)
    def get_tokens_unprocessed(self, data):
        sql = PsqlRegexLexer(**self.options)

        lines = lookahead(line_re.findall(data))

        # prompt-output cycle
        while 1:

            # consume the lines of the command: start with an optional prompt
            # and continue until the end of command is detected
            curcode = ''
            insertions = []
            for line in lines:
                # Identify a shell prompt in case of psql commandline example
                if line.startswith('$') and not curcode:
                    lexer = get_lexer_by_name('console', **self.options)
                    yield from lexer.get_tokens_unprocessed(line)
                    break

                # Identify a psql prompt
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    insertions.append(
                        (len(curcode), [(0, Generic.Prompt, mprompt.group())]))
                    curcode += line[len(mprompt.group()):]
                else:
                    curcode += line

                # Check if this is the end of the command
                # TODO: better handle multiline comments at the end with
                # a lexer with an external state?
                if re_psql_command.match(curcode) \
                   or re_end_command.search(curcode):
                    break

            # Emit the combined stream of command and prompt(s)
            yield from do_insertions(insertions,
                                     sql.get_tokens_unprocessed(curcode))

            # Emit the output lines
            out_token = Generic.Output
            for line in lines:
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    # push the line back to have it processed by the prompt
                    lines.send(line)
                    break

                mmsg = re_message.match(line)
                if mmsg is not None:
                    if mmsg.group(1).startswith("ERROR") \
                       or mmsg.group(1).startswith("FATAL"):
                        out_token = Generic.Error
                    yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
                    yield (mmsg.start(2), out_token, mmsg.group(2))
                else:
                    yield (0, out_token, line)
            else:
                return
Esempio n. 26
0
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            input_prompt = self.input_prompt.match(line)
            continue_prompt = self.continue_prompt.match(line.rstrip())
            output_prompt = self.output_prompt.match(line)
            if line.startswith("#"):
                insertions.append((len(curcode), [(0, Comment, line)]))
            elif input_prompt is not None:
                insertions.append(
                    (len(curcode), [(0, Other, input_prompt.group())]))
                curcode += line[input_prompt.end():]
            elif continue_prompt is not None:
                insertions.append(
                    (len(curcode), [(0, Other, continue_prompt.group())]))
                curcode += line[continue_prompt.end():]
            elif output_prompt is not None:
                # Use the 'error' token for output.  We should probably make
                # our own token, but error is typicaly in a bright color like
                # red, so it works fine for our output prompts.
                insertions.append((
                    len(curcode),
                    [(
                        0,
                        Other,  #Generic.Error,
                        output_prompt.group())]))
                curcode += line[output_prompt.end():]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                        curcode = ''
                        insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 27
0
    def get_tokens_unprocessed(self, text):
        pylexer = Python3Lexer(**self.options)
        tblexer = Python3TracebackLexer(**self.options)
        if ">>>" not in text:
            if (text.startswith('Traceback (most recent call last):') or
                        re.match(r'^  File "[^"]+", line \d+\n$', text)):
                yield from tblexer.get_tokens_unprocessed(text)
            else:
                yield from pylexer.get_tokens_unprocessed(text)
            return

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0

        def do_current_code():
            nonlocal curcode
            nonlocal insertions
            if curcode:
                for item in do_insertions(insertions,
                                          pylexer.get_tokens_unprocessed(curcode)):
                    yield item
                curcode = ''
                insertions = []

        section = ""
        code = list(line_re.finditer(text))
        while code:
            match = code.pop(0)
            line = match.group()
            if line.startswith(">>>"):
                insertions = []
                insertions.append((0,
                                   [(0, Generic.Prompt, line[:4])]))
                section = line[4:]
                secindex = match.start()
                while code and code[0].group().startswith("   "):
                    line = code.pop(0).group()
                    if not line.strip():
                        break
                    insertions.append((len(section),
                                        [(0, Generic.Prompt, line[:4])]))
                    section += line[4:]
                for i, t, v in do_insertions(insertions,
                                            pylexer.get_tokens_unprocessed(section)):
                    yield secindex+i, t, v
            elif line.startswith('Traceback (most recent call last):') or re.match(r' *File "[^"]+", line \d+\n$', line):
                tb = line
                tbindex = match.start()
                while code and not code[0].group().startswith(">>>"):
                    tb += code.pop(0).group()
                for i, t, v in tblexer.get_tokens_unprocessed(tb):
                    yield tbindex+i, t, v
            else:
                yield match.start(), Generic.Output, line
Esempio n. 28
0
 def do_current_code():
     nonlocal curcode
     nonlocal insertions
     if curcode:
         for item in do_insertions(insertions,
                                   pylexer.get_tokens_unprocessed(curcode)):
             yield item
         curcode = ''
         insertions = []
Esempio n. 29
0
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>>> ') or line.startswith('... '):
                tb = 0
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:4])]))
                curcode += line[4:]
            elif line.rstrip() == '...':
                tb = 0
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, '...')]))
                curcode += line[3:]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                    pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('Traceback (most recent call last):'):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == '...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex+i, t, v
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 30
0
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        curtb = ''
        tbindex = 0
        tb = 0
        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>>> ') or line.startswith('... '):
                tb = 0
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:4])]))
                curcode += line[4:]
            elif line.rstrip() == '...':
                tb = 0
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, '...')]))
                curcode += line[3:]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                    pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('Traceback (most recent call last):'):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == '...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex+i, t, v
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 31
0
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            input_prompt = self.input_prompt.match(line)
            continue_prompt = self.continue_prompt.match(line.rstrip())
            output_prompt = self.output_prompt.match(line)
            if line.startswith("#"):
                insertions.append((len(curcode),
                                   [(0, Comment, line)]))
            elif line.startswith("<warning>"):
                insertions.append((len(curcode),
                                   [(0, Generic.Error, line[9:])]))
            elif input_prompt is not None:
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, input_prompt.group())]))
                curcode += line[input_prompt.end():]
            elif continue_prompt is not None:
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, continue_prompt.group())]))
                curcode += line[continue_prompt.end():]
            elif output_prompt is not None:
                # Use the 'error' token for output.  We should probably make
                # our own token, but error is typicaly in a bright color like
                # red, so it works fine for our output prompts.
                insertions.append((len(curcode),
                                   [(0, Generic.Error, output_prompt.group())]))
                curcode += line[output_prompt.end():]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                              pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                        curcode = ''
                        insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 32
0
    def get_tokens_unprocessed(self, text):
        jllexer = JuliaLexer(**self.options)
        start = 0
        curcode = ''
        insertions = []
        output = False
        error = False

        for line in text.splitlines(True):
            if line.startswith('julia>'):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:6])]))
                curcode += line[6:]
                output = False
                error = False
            elif line.startswith('help?>') or line.startswith('shell>'):
                yield start, Generic.Prompt, line[:6]
                yield start + 6, Text, line[6:]
                output = False
                error = False
            elif line.startswith('      ') and not output:
                insertions.append((len(curcode), [(0, Text, line[:6])]))
                curcode += line[6:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            jllexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('ERROR: ') or error:
                    yield start, Generic.Error, line
                    error = True
                else:
                    yield start, Generic.Output, line
                output = True
            start += len(line)

        if curcode:
            for item in do_insertions(insertions,
                                      jllexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 33
0
    def get_tokens_unprocessed(self, text):
        mlexer = MatlabLexer(**self.options)

        curcode = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()

            if line.startswith('>> '):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:3])]))
                curcode += line[3:]

            elif line.startswith('>>'):
                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, line[:2])]))
                curcode += line[2:]

            elif line.startswith('???'):

                idx = len(curcode)

                # without is showing error on same line as before...?
                # line = "\n" + line
                token = (0, Generic.Traceback, line)
                insertions.append((idx, [token]))

            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            mlexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []

                yield match.start(), Generic.Output, line

        if curcode:  # or item:
            for item in do_insertions(insertions,
                                      mlexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 34
0
    def get_tokens_unprocessed(self, text):
        innerlexer = self._innerLexerCls(**self.options)

        pos = 0
        curcode = ''
        insertions = []
        backslash_continuation = False

        for match in line_re.finditer(text):
            line = match.group()
            m = re.match(self._ps1rgx, line)
            if backslash_continuation:
                curcode += line
                backslash_continuation = curcode.endswith('\\\n')
            elif m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
                backslash_continuation = curcode.endswith('\\\n')
            elif line.startswith(self._ps2):
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, line[:len(self._ps2)])]))
                curcode += line[len(self._ps2):]
                backslash_continuation = curcode.endswith('\\\n')
            else:
                if insertions:
                    toks = innerlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos+i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(insertions,
                                         innerlexer.get_tokens_unprocessed(curcode)):
                yield pos+i, t, v
Esempio n. 35
0
    def get_tokens_unprocessed(self, text):
        jllexer = JuliaLexer(**self.options)
        start = 0
        curcode = ''
        insertions = []
        output = False
        error = False

        for line in text.splitlines(True):
            if line.startswith('julia>'):
                insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
                curcode += line[6:]
                output = False
                error = False
            elif line.startswith('help?>') or line.startswith('shell>'):
                yield start, Generic.Prompt, line[:6]
                yield start + 6, Text, line[6:]
                output = False
                error = False
            elif line.startswith('      ') and not output:
                insertions.append((len(curcode), [(0, Text, line[:6])]))
                curcode += line[6:]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions, jllexer.get_tokens_unprocessed(curcode)):
                        yield item
                    curcode = ''
                    insertions = []
                if line.startswith('ERROR: ') or error:
                    yield start, Generic.Error, line
                    error = True
                else:
                    yield start, Generic.Output, line
                output = True
            start += len(line)

        if curcode:
            for item in do_insertions(
                    insertions, jllexer.get_tokens_unprocessed(curcode)):
                yield item
Esempio n. 36
0
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            input_prompt = self.input_prompt.match(line)
            continue_prompt = self.continue_prompt.match(line.rstrip())
            output_prompt = self.output_prompt.match(line)
            if line.startswith("#"):
                insertions.append((len(curcode), [(0, Comment, line)]))
            elif input_prompt is not None:
                insertions.append((len(curcode), [(0, Generic.Prompt,
                                                   input_prompt.group())]))
                curcode += line[input_prompt.end():]
            elif continue_prompt is not None:
                insertions.append((len(curcode), [(0, Generic.Prompt,
                                                   continue_prompt.group())]))
                curcode += line[continue_prompt.end():]
            elif output_prompt is not None:
                insertions.append((len(curcode), [(0, Generic.Output,
                                                   output_prompt.group())]))
                curcode += line[output_prompt.end():]
            else:
                if curcode:
                    for item in do_insertions(
                            insertions,
                            pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                        curcode = ''
                        insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
    def get_tokens_unprocessed(self, text):
        pylexer = PythonLexer(**self.options)
        tblexer = PythonTracebackLexer(**self.options)

        curcode = ''
        insertions = []
        for match in line_re.finditer(text):
            line = match.group()
            input_prompt = self.input_prompt.match(line)
            continue_prompt = self.continue_prompt.match(line.rstrip())
            output_prompt = self.output_prompt.match(line)
            if line.startswith("#"):
                insertions.append((len(curcode),
                                   [(0, Comment, line)]))
            elif input_prompt is not None:
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, input_prompt.group())]))
                curcode += line[input_prompt.end():]
            elif continue_prompt is not None:
                insertions.append((len(curcode),
                                   [(0, Generic.Prompt, continue_prompt.group())]))
                curcode += line[continue_prompt.end():]
            elif output_prompt is not None:
                insertions.append((len(curcode),
                                   [(0, Generic.Output, output_prompt.group())]))
                curcode += line[output_prompt.end():]
            else:
                if curcode:
                    for item in do_insertions(insertions,
                                              pylexer.get_tokens_unprocessed(curcode)):
                        yield item
                        curcode = ''
                        insertions = []
                yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item
	def get_tokens_unprocessed(self, text):
		bashlexer = shell.BashLexer(**self.options)
		pos = 0
		curcode = ''
		insertions = []
		for match in line_re.finditer(text):
			line = match.group()
			m = re.match(r'^((?:[^@#$%\s]+@[^]#$%\s]+(?: ?:? ?\[?[^]#$%]+\]?)? *)?[$#%])(.*\n?)', line)
			if line.startswith('#'):
				yield match.start(), Comment, line
				insertions = []
				curcode = ''
			elif m:
				# To support output lexers (say diff output), the output
				# needs to be broken by prompts whenever the output lexer
				# changes.
				if not insertions:
					pos = match.start()
				insertions.append((len(curcode),
				                  [(0, Generic.Prompt, m.group(1))]))
				curcode += m.group(2)
			elif line.startswith('>'):
				insertions.append((len(curcode),
				                  [(0, Generic.Prompt, line[:1])]))
				curcode += line[1:]
			else:
				if insertions:
					toks = bashlexer.get_tokens_unprocessed(curcode)
					for i, t, v in do_insertions(insertions, toks):
						yield pos+i, t, v
				yield match.start(), Generic.Output, line
				insertions = []
				curcode = ''
		if insertions:
			for i, t, v in do_insertions(insertions,
			                             bashlexer.get_tokens_unprocessed(curcode)):
				yield pos+i, t, v
Esempio n. 39
0
    def get_tokens_unprocessed(self, text):
        hslexer = HaskellLexer(**self.options)

        style = self.options.get('litstyle')
        if style is None:
            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

        code = ''
        insertions = []
        if style == 'bird':
            # bird-style
            for match in line_re.finditer(text):
                line = match.group()
                m = bird_re.match(line)
                if m:
                    insertions.append(
                        (len(code), [(0, Comment.Special, m.group(1))]))
                    code += m.group(2)
                else:
                    insertions.append((len(code), [(0, Text, line)]))
        else:
            # latex-style
            from pygments.lexers.text import TexLexer
            lxlexer = TexLexer(**self.options)

            codelines = 0
            latex = ''
            for match in line_re.finditer(text):
                line = match.group()
                if codelines:
                    if line.lstrip().startswith('\\end{code}'):
                        codelines = 0
                        latex += line
                    else:
                        code += line
                elif line.lstrip().startswith('\\begin{code}'):
                    codelines = 1
                    latex += line
                    insertions.append(
                        (len(code),
                         list(lxlexer.get_tokens_unprocessed(latex))))
                    latex = ''
                else:
                    latex += line
            insertions.append(
                (len(code), list(lxlexer.get_tokens_unprocessed(latex))))
        for item in do_insertions(insertions,
                                  hslexer.get_tokens_unprocessed(code)):
            yield item
Esempio n. 40
0
 def get_tokens_unprocessed(self, text):
     ins = []
     lines = text.splitlines(True)
     done = ''
     lex = BaseMakefileLexer(**self.options)
     backslashflag = False
     for line in lines:
         if self.r_special.match(line) or backslashflag:
             ins.append((len(done), [(0, Comment.Preproc, line)]))
             backslashflag = line.strip().endswith('\\')
         elif self.r_comment.match(line):
             ins.append((len(done), [(0, Comment, line)]))
         else:
             done += line
     yield from do_insertions(ins, lex.get_tokens_unprocessed(done))
Esempio n. 41
0
    def get_tokens_unprocessed(self, text):
        innerlexer = self._innerLexerCls(**self.options)

        pos = 0
        curcode = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            m = re.match(self._ps1rgx, line)
            if m:
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = match.start()

                insertions.append(
                    (len(curcode), [(0, Generic.Prompt, m.group(1))]))
                curcode += m.group(2)
            elif line.startswith(self._ps2):
                insertions.append((len(curcode), [(0, Generic.Prompt,
                                                   line[:len(self._ps2)])]))
                curcode += line[len(self._ps2):]
            else:
                if insertions:
                    toks = innerlexer.get_tokens_unprocessed(curcode)
                    for i, t, v in do_insertions(insertions, toks):
                        yield pos + i, t, v
                yield match.start(), Generic.Output, line
                insertions = []
                curcode = ''
        if insertions:
            for i, t, v in do_insertions(
                    insertions, innerlexer.get_tokens_unprocessed(curcode)):
                yield pos + i, t, v
Esempio n. 42
0
 def get_tokens_unprocessed(self, text):
     ins = []
     lines = text.splitlines(True)
     done = ''
     lex = BaseMakefileLexer(**self.options)
     backslashflag = False
     for line in lines:
         if self.r_special.match(line) or backslashflag:
             ins.append((len(done), [(0, Comment.Preproc, line)]))
             backslashflag = line.strip().endswith('\\')
         elif self.r_comment.match(line):
             ins.append((len(done), [(0, Comment, line)]))
         else:
             done += line
     for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
         yield item
Esempio n. 43
0
 def get_tokens_unprocessed(self, text):
     buffered = ''
     insertions = []
     lng_buffer = []
     for i, t, v in self.language_lexer.get_tokens_unprocessed(text):
         if t is Comment.Single and v.startswith(PY_EXEC_SYM) and i == 0:
             if lng_buffer:
                 insertions.append((len(buffered), lng_buffer))
                 lng_buffer = []
             buffered += v
         else:
             lng_buffer.append((i, t, v))
     if lng_buffer:
         insertions.append((len(buffered), lng_buffer))
     return do_insertions(insertions,
                          self.root_lexer.get_tokens_unprocessed(buffered))
Esempio n. 44
0
    def get_tokens_unprocessed(self, text):
        hslexer = HaskellLexer(**self.options)

        style = self.options.get('litstyle')
        if style is None:
            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

        code = ''
        insertions = []
        if style == 'bird':
            # bird-style
            for match in line_re.finditer(text):
                line = match.group()
                m = bird_re.match(line)
                if m:
                    insertions.append((len(code),
                                       [(0, Comment.Special, m.group(1))]))
                    code += m.group(2)
                else:
                    insertions.append((len(code), [(0, Text, line)]))
        else:
            # latex-style
            from pygments.lexers.text import TexLexer
            lxlexer = TexLexer(**self.options)

            codelines = 0
            latex = ''
            for match in line_re.finditer(text):
                line = match.group()
                if codelines:
                    if line.lstrip().startswith('\\end{code}'):
                        codelines = 0
                        latex += line
                    else:
                        code += line
                elif line.lstrip().startswith('\\begin{code}'):
                    codelines = 1
                    latex += line
                    insertions.append((len(code),
                                       list(lxlexer.get_tokens_unprocessed(latex))))
                    latex = ''
                else:
                    latex += line
            insertions.append((len(code),
                               list(lxlexer.get_tokens_unprocessed(latex))))
        for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
            yield item
Esempio n. 45
0
 def get_tokens_unprocessed(self, text):
     # find and remove all the escape tokens (replace with an empty string)
     # this is very similar to DelegatingLexer.get_tokens_unprocessed.
     buffered = ''
     insertions = []
     insertion_buf = []
     for i, t, v in self._find_safe_escape_tokens(text):
         if t is None:
             if insertion_buf:
                 insertions.append((len(buffered), insertion_buf))
                 insertion_buf = []
             buffered += v
         else:
             insertion_buf.append((i, t, v))
     if insertion_buf:
         insertions.append((len(buffered), insertion_buf))
     return do_insertions(insertions,
                          self.lang.get_tokens_unprocessed(buffered))
Esempio n. 46
0
    def _handle_sourcecode(self, match):
        from pygments.lexers import get_lexer_by_name
        from pygments.util import ClassNotFound

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item
Esempio n. 47
0
    def _handle_sourcecode(self, match):
        from pygments.lexers import get_lexer_by_name
        from pygments.util import ClassNotFound

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item
    def buffered_tokens(self):
        """
        Generator of unprocessed tokens after doing insertions and before
        changing to a new state.

        """
        if self.mode == 'output':
            tokens = [(0, Generic.Output, self.buffer)]
        elif self.mode == 'input':
            tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
        else: # traceback
            tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

        for i, t, v in do_insertions(self.insertions, tokens):
            # All token indexes are relative to the buffer.
            yield self.index + i, t, v

        # Clear it all
        self.index += len(self.buffer)
        self.buffer = u''
        self.insertions = []
    def buffered_tokens(self):
        """
        Generator of unprocessed tokens after doing insertions and before
        changing to a new state.

        """
        if self.mode == 'output':
            tokens = [(0, Generic.Output, self.buffer)]
        elif self.mode == 'input':
            tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
        else:  # traceback
            tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

        for i, t, v in do_insertions(self.insertions, tokens):
            # All token indexes are relative to the buffer.
            yield self.index + i, t, v

        # Clear it all
        self.index += len(self.buffer)
        self.buffer = u''
        self.insertions = []
    def get_tokens_unprocessed(self, text):
        lualexer = LuaLexer(**self.options)
        ymllexer = YamlLexer(**self.options)
        shslexer = BashSessionLexer(**self.options)

        curcode = ''
        insertions = []
        code = False
        prompt_len = 0

        curyml = ''
        yml = False
        ymlcnt = 0

        curshs = ''
        shs = False

        for match in line_re.finditer(text):
            line = match.group()
            # print line
            # print code, yml, shs

            # First part - if output starts from '$ ' then it's BASH session
            # - We must only check that we're not inside of YAML
            # code can't start with '$ '
            # if output (not inside YAML) starts with '$ ' - it's not our problem
            # Also, we can match multiline commands only if line ends with '\'
            check_shs = (line.startswith('$ ') and not yml) or shs
            if check_shs:
                curshs += line
                if line.endswith('\\'):
                    shs = True
                    continue
                for item in shslexer.get_tokens_unprocessed(curshs):
                    yield item
                curshs = ''
                shs = False
                continue

            # Second part - check for YAML
            # 1) It's begin, means (yml == False) and line.strip() == '---'
            # 2) It's middle. (yml == True) and line.strip() not in ('---', '...')
            # 3) It's end - then (yml == False) and line.strip() == '...']
            check_yml_begin  = (yml == False and line.strip()     in (yml_beg, ))
            # check_yml_middle = (yml == True  and line.strip() not in (yml_beg, yml_end))
            check_yml_end    = (yml == True  and line.strip() == yml_end and ymlcnt == 0)
            if (check_yml_begin or yml):
                # print check_yml_begin, check_yml_middle, check_yml_end
                # Flush previous code buffers
                if (yml is True and line.strip() == yml_beg):
                    ymlcnt += 1
                if (not check_yml_end and line.strip() == yml_end):
                    ymlcnt += 1
                if check_yml_begin and curcode:
                    for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)):
                        yield item
                    code = False
                    curcode = ''
                    insertions = []
                curyml += line
                # We finished reading YAML output, so push it to user
                if check_yml_end:
                    for item in ymllexer.get_tokens_unprocessed(curyml):
                        yield item
                    curyml = ''
                yml = False if check_yml_end else True
                # print 'yaml gotcha %d' % yml
                continue

            # Third part - check for Tarantool's Lua
            # It's combination of:
            # prompt: tarantool> or localhost> or localhost:{port}>
            # lua: body after prompt + space
            prompt_pos_flexible = find_prompt(line)
            prompt_pos_strict   = prompt_pos_flexible if not code else None
            if prompt_pos_strict:
                prompt_len = prompt_pos_strict + 2

            check_code_begin = bool(prompt_pos_strict)
            check_code_middle = code and line.startswith(' ' * (prompt_len - 2) + '> ')
            check_code_flexible = False
            # e.g. we have two 'tarantool> ' in a row - code is True and
            # check_code_middle is False then we have to do something about it,
            # otherwise it will be like Generic.Output
            if code and check_code_middle is False and bool(prompt_pos_flexible):
                prompt_len = prompt_pos_flexible + 2
                check_code_flexible = True
            if (check_code_begin or check_code_middle or check_code_flexible):
                code = True
                insertions.append((len(curcode), [(0, Generic.Prompt, line[:prompt_len])]))
                curcode += line[prompt_len:]
                continue

            # If it's not something before - then we must check for code
            # and push that line as 'Generic.Output'
            if curcode:
                for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)):
                    yield item
                code = False
                curcode = ''
                insertions = []
            yield match.start(), Generic.Output, line

        if curcode:
            for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)):
                yield item
        if curyml:
            for item in ymllexer.get_tokens_unprocessed(curyml):
                yield item
        if curshs:
            for item in shslexer.get_tokens_unprocessed(curshs):
                yield item
Esempio n. 51
0
    def get_tokens_unprocessed(self, text):
        cl_lexer = CommonLispLexer(**self.options)

        pos = 0
        curcode = ""
        insertions = []
        iterator = line_re.finditer(text)

        while True:
            try:
                match = iterator.next()
                line = match.group()
                start = match.start()
            except StopIteration:
                # the line_re expression won't work in the case of the
                # string "a\nb" so make sure that the remainder of the
                # output is processed
                if match and match.end() < len(text):
                    start = match.end()
                    line = text[match.end() :]
                    match = None
                else:
                    raise StopIteration()
            m = re.match(r"^((?:[^\s*?>:]*[*?>:]) )(.*\n?)", line)

            if m:
                line = ""
                # To support output lexers (say diff output), the output
                # needs to be broken by prompts whenever the output lexer
                # changes.
                if not insertions:
                    pos = start

                insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))]))

                # skip parser when line is empty
                if re.match(r"^\s*$", m.group(2)):
                    line = m.group(2)
                else:
                    # read out the whole sexp after the prompt, if it's a
                    # muliline expression then keep reading until there is
                    # a complete expression
                    string_stream = StringIO(m.group(2))
                    reader = sexp_reader()
                    while reader.read(string_stream):
                        try:
                            match = iterator.next()
                            string_stream = StringIO(match.group())
                        except StopIteration:
                            break
                    curcode += reader.sexp()
                    line = reader.rest()

            if insertions:
                if curcode:
                    toks = cl_lexer.get_tokens_unprocessed(curcode)
                for i, t, v in do_insertions(insertions, toks):
                    yield pos + i, t, v
            if line:
                yield start, Generic.Output, line
            insertions = []
            curcode = ""
Esempio n. 52
0
    def get_tokens_unprocessed(self, data):
        sql = PsqlRegexLexer(**self.options)

        lines = lookahead(line_re.findall(data))

        # prompt-output cycle
        while 1:

            # consume the lines of the command: start with an optional prompt
            # and continue until the end of command is detected
            curcode = ''
            insertions = []
            while 1:
                try:
                    line = next(lines)
                except StopIteration:
                    # allow the emission of partially collected items
                    # the repl loop will be broken below
                    break

                # Identify a shell prompt in case of psql commandline example
                if line.startswith('$') and not curcode:
                    lexer = get_lexer_by_name('console', **self.options)
                    for x in lexer.get_tokens_unprocessed(line):
                        yield x
                    break

                # Identify a psql prompt
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    insertions.append((len(curcode),
                                       [(0, Generic.Prompt, mprompt.group())]))
                    curcode += line[len(mprompt.group()):]
                else:
                    curcode += line

                # Check if this is the end of the command
                # TODO: better handle multiline comments at the end with
                # a lexer with an external state?
                if re_psql_command.match(curcode) \
                or re_end_command.search(curcode):
                    break

            # Emit the combined stream of command and prompt(s)
            for item in do_insertions(insertions,
                    sql.get_tokens_unprocessed(curcode)):
                yield item

            # Emit the output lines
            out_token = Generic.Output
            while 1:
                line = next(lines)
                mprompt = re_prompt.match(line)
                if mprompt is not None:
                    # push the line back to have it processed by the prompt
                    lines.send(line)
                    break

                mmsg = re_message.match(line)
                if mmsg is not None:
                    if mmsg.group(1).startswith("ERROR") \
                    or mmsg.group(1).startswith("FATAL"):
                        out_token = Generic.Error
                    yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
                    yield (mmsg.start(2), out_token, mmsg.group(2))
                else:
                    yield (0, out_token, line)
Esempio n. 53
0
                    re.match(ur'  File "[^"]+", line \d+\n$', line)):
                    tb = 1
                    curtb = line
                    tbindex = match.start()
                elif line == 'KeyboardInterrupt\n':
                    yield match.start(), Name.Class, line
                elif tb:
                    curtb += line
                    if not (line.startswith(' ') or line.strip() == u'...'):
                        tb = 0
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
                            yield tbindex+i, t, v
                else:
                    yield match.start(), Generic.Output, line
        if curcode:
            for item in do_insertions(insertions,
                                      pylexer.get_tokens_unprocessed(curcode)):
                yield item


class PythonTracebackLexer(RegexLexer):
    """
    For Python tracebacks.

    *New in Pygments 0.7.*
    """

    name = 'Python Traceback'
    aliases = ['pytb']
    filenames = ['*.pytb']
    mimetypes = ['text/x-python-traceback']