def get_tokens_unprocessed(self, data): sql = SqlLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(data): line = match.group() if line.startswith('sqlite> ') or line.startswith(' ...> '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:8])])) curcode += line[8:] else: if curcode: for item in do_insertions( insertions, sql.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('SQL error: '): yield (match.start(), Generic.Traceback, line) else: yield (match.start(), Generic.Output, line) if curcode: for item in do_insertions(insertions, sql.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): exlexer = ElixirLexer(**self.options) curcode = '' in_error = False insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('** '): in_error = True insertions.append((len(curcode), [(0, Generic.Error, line[:-1])])) curcode += line[-1:] else: m = self._prompt_re.match(line) if m is not None: in_error = False end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: yield from do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] token = Generic.Error if in_error else Generic.Output yield match.start(), token, line if curcode: yield from do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): erlexer = ErlangLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('*'): yield match.start(), Generic.Traceback, line else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, data): sql = SqlLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(data): line = match.group() if line.startswith('sqlite> ') or line.startswith(' ...> '): insertions.append((len(curcode), [(0, Generic.Prompt, line[:8])])) curcode += line[8:] else: if curcode: for item in do_insertions(insertions, sql.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('SQL error: '): yield (match.start(), Generic.Traceback, line) else: yield (match.start(), Generic.Output, line) if curcode: for item in do_insertions(insertions, sql.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): langlexer = self.LangLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() # prompt m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] continue elif curcode: for item in do_insertions(insertions, langlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] # comments m = self._comment_re.match(line) if m is not None: yield match.start(), Comment.Single, line continue yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, langlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, data): sql = SqlLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(data): line = match.group() prompt_match = sqlite_prompt_re.match(line) if prompt_match is not None: insertions.append( (len(curcode), [(0, Generic.Prompt, line[:7])])) insertions.append((len(curcode), [(7, Whitespace, ' ')])) curcode += line[8:] else: if curcode: yield from do_insertions( insertions, sql.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] if line.startswith('SQL error: '): yield (match.start(), Generic.Traceback, line) else: yield (match.start(), Generic.Output, line) if curcode: yield from do_insertions(insertions, sql.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): assert self.lexer_class lexer = self.lexer_class(**self.options) curcode = "" insertions = [] for match in line_re.finditer(text): line = match.group() prompt = None for p in self.prompts: if line.startswith(p): prompt = p if prompt: prompt_len = len(prompt) insertions.append((len(curcode), [(0, token.Generic.Prompt, line[:prompt_len])])) curcode += line[prompt_len:] else: if curcode: for item in do_insertions( insertions, lexer.get_tokens_unprocessed(curcode)): yield item curcode = "" insertions = [] yield match.start(), token.Generic.Output, line if curcode: for item in do_insertions(insertions, lexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): erlexer = ErlangLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: yield from do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] if line.startswith('*'): yield match.start(), Generic.Traceback, line else: yield match.start(), Generic.Output, line if curcode: yield from do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): slexer = SLexer(**self.options) current_code_block = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>') or line.startswith('+'): # Colorize the prompt as such, # then put rest of line into current_code_block insertions.append((len(current_code_block), [(0, Generic.Prompt, line[:2])])) current_code_block += line[2:] else: # We have reached a non-prompt line! # If we have stored prompt lines, need to process them first. if current_code_block: # Weave together the prompts and highlight code. for item in do_insertions(insertions, slexer.get_tokens_unprocessed(current_code_block)): yield item # Reset vars for next code block. current_code_block = '' insertions = [] # Now process the actual line itself, this is output from R. yield match.start(), Generic.Output, line # If we happen to end on a code block with nothing after it, need to # process the last code block. This is neither elegant nor DRY so # should be changed. if current_code_block: for item in do_insertions(insertions, slexer.get_tokens_unprocessed(current_code_block)): yield item
def get_tokens_unprocessed(self, text): phplexer = PhpLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith(u'>>> ') or line.startswith(u'... '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == u'...': insertions.append( (len(curcode), [(0, Generic.Prompt, u'...')])) curcode += line[3:] else: if curcode: for item in do_insertions( insertions, phplexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions( insertions, phplexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): slexer = SLexer(**self.options) current_code_block = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>') or line.startswith('+'): # Colorize the prompt as such, # then put rest of line into current_code_block insertions.append((len(current_code_block), [(0, Generic.Prompt, line[:2])])) current_code_block += line[2:] else: # We have reached a non-prompt line! # If we have stored prompt lines, need to process them first. if current_code_block: # Weave together the prompts and highlight code. for item in do_insertions( insertions, slexer.get_tokens_unprocessed(current_code_block)): yield item # Reset vars for next code block. current_code_block = '' insertions = [] # Now process the actual line itself, this is output from R. yield match.start(), Generic.Output, line # If we happen to end on a code block with nothing after it, need to # process the last code block. This is neither elegant nor DRY so # should be changed. if current_code_block: for item in do_insertions( insertions, slexer.get_tokens_unprocessed(current_code_block)): yield item
def get_tokens_unprocessed(self, text): exlexer = ElixirLexer(**self.options) curcode = '' in_error = False insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith(u'** '): in_error = True insertions.append((len(curcode), [(0, Generic.Error, line[:-1])])) curcode += line[-1:] else: m = self._prompt_re.match(line) if m is not None: in_error = False end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] token = Generic.Error if in_error else Generic.Output yield match.start(), token, line if curcode: for item in do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): bashlexer = BashLexer(**self.options) pos = 0 curcode = "" insertions = [] for match in line_re.finditer(text): line = match.group() m = re.match( r"^((?:\(\S+\))?(?:|sh\S*?|\w+\S+[@:]\S+(?:\s+\S+)" r"?|\[\S+[@:][^\n]+\].+)[$#%])(.*\n?)", line ) if m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) elif line.startswith(">"): insertions.append((len(curcode), [(0, Generic.Prompt, line[:1])])) curcode += line[1:] else: if insertions: toks = bashlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos + i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = "" if insertions: for i, t, v in do_insertions(insertions, bashlexer.get_tokens_unprocessed(curcode)): yield pos + i, t, v
def get_tokens_unprocessed(self, text): rblexer = RubyLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append( (len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions( insertions, rblexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, rblexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): mlexer = MatlabLexer(**self.options) curcode = "" insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith(">>"): insertions.append((len(curcode), [(0, Generic.Prompt, line[:3])])) curcode += line[3:] elif line.startswith("???"): idx = len(curcode) # without is showing error on same line as before...? line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) else: if curcode: for item in do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode)): yield item curcode = "" insertions = [] yield match.start(), Generic.Output, line if curcode: # or item: for item in do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): bashlexer = BashLexer(**self.options) pos = 0 curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = re.match(r'^((?:\[?\S+@[^$#%]+)[$#%])(.*\n?)', line) if m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) else: if insertions: toks = bashlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos+i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions(insertions, bashlexer.get_tokens_unprocessed(curcode)): yield pos+i, t, v
def get_tokens_unprocessed(self, text): mlexer = MatlabLexer(**self.options) curcode = '' insertions = [] continuation = False for match in line_re.finditer(text): line = match.group() if line.startswith('>> '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:3])])) curcode += line[3:] elif line.startswith('>>'): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:2])])) curcode += line[2:] elif line.startswith('???'): idx = len(curcode) # without is showing error on same line as before...? # line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) elif continuation: # line_start is the length of the most recent prompt symbol line_start = len(insertions[-1][-1][-1]) # Set leading spaces with the length of the prompt to be a generic prompt # This keeps code aligned when prompts are removed, say with some Javascript if line.startswith(' ' * line_start): insertions.append((len(curcode), [(0, Generic.Prompt, line[:line_start])])) curcode += line[line_start:] else: curcode += line else: if curcode: yield from do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] yield match.start(), Generic.Output, line # Does not allow continuation if a comment is included after the ellipses. # Continues any line that ends with ..., even comments (lines that start with %) if line.strip().endswith('...'): continuation = True else: continuation = False if curcode: # or item: yield from do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): if self.python3: pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) else: pylexer = Python2Lexer(**self.options) tblexer = Python2TracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith(u'>>> ') or line.startswith(u'... '): tb = 0 insertions.append( (len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == u'...' and not tb: # only a new >>> prompt can end an exception block # otherwise an ellipsis in place of the traceback frames # will be mishandled insertions.append( (len(curcode), [(0, Generic.Prompt, u'...')])) curcode += line[3:] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if (line.startswith(u'Traceback (most recent call last):') or re.match(u' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == u'...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex + i, t, v curtb = '' else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item if curtb: for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex + i, t, v
def get_tokens_unprocessed(self, text): if self.python3: pylexer = Python3Lexer(**self.options) tblexer = Python3TracebackLexer(**self.options) else: pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith(u'>>> ') or line.startswith(u'... '): tb = 0 insertions.append((len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == u'...' and not tb: # only a new >>> prompt can end an exception block # otherwise an ellipsis in place of the traceback frames # will be mishandled insertions.append((len(curcode), [(0, Generic.Prompt, u'...')])) curcode += line[3:] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if (line.startswith(u'Traceback (most recent call last):') or re.match(u' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == u'...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v curtb = '' else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item if curtb: for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v
def get_tokens_unprocessed(self, text): innerlexer = self._innerLexerCls(**self.options) pos = 0 curcode = '' insertions = [] backslash_continuation = False for match in line_re.finditer(text): line = match.group() if backslash_continuation: curcode += line backslash_continuation = curcode.endswith('\\\n') continue venv_match = self._venv.match(line) if venv_match: venv = venv_match.group(1) venv_whitespace = venv_match.group(2) insertions.append( (len(curcode), [(0, Generic.Prompt.VirtualEnv, venv)])) if venv_whitespace: insertions.append( (len(curcode), [(0, Text, venv_whitespace)])) line = line[venv_match.end():] m = self._ps1rgx.match(line) if m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append( (len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) backslash_continuation = curcode.endswith('\\\n') elif line.startswith(self._ps2): insertions.append((len(curcode), [(0, Generic.Prompt, line[:len(self._ps2)])])) curcode += line[len(self._ps2):] backslash_continuation = curcode.endswith('\\\n') else: if insertions: toks = innerlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos + i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions( insertions, innerlexer.get_tokens_unprocessed(curcode)): yield pos + i, t, v
def _handle_codeblock(self, match): """ match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks """ from pygments.lexers import get_lexer_by_name # section header yield match.start(1), String , match.group(1) yield match.start(2), String , match.group(2) yield match.start(3), Text , match.group(3) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name( match.group(2).strip() ) except ClassNotFound: pass code = match.group(4) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(4), String, code else: for item in do_insertions([], lexer.get_tokens_unprocessed(code)): yield item yield match.start(5), String , match.group(5)
def _handle_codeblock(self, match): """ match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks """ from pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) yield match.start(3), Text, match.group(3) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(2).strip()) except ClassNotFound: pass code = match.group(4) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(4), String, code else: for item in do_insertions([], lexer.get_tokens_unprocessed(code)): yield item yield match.start(5), String, match.group(5)
def _handle_cssblock(self, match): """ match args: 1:style tag 2:newline, 3:code, 4:closing style tag """ from pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name('css') except ClassNotFound: pass code = match.group(3) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(3), String, code return yield from do_insertions([], lexer.get_tokens_unprocessed(code)) yield match.start(4), String, match.group(4)
def get_tokens_unprocessed(self, data): sql = PsqlRegexLexer(**self.options) lines = lookahead(line_re.findall(data)) # prompt-output cycle while 1: # consume the lines of the command: start with an optional prompt # and continue until the end of command is detected curcode = '' insertions = [] for line in lines: # Identify a shell prompt in case of psql commandline example if line.startswith('$') and not curcode: lexer = get_lexer_by_name('console', **self.options) yield from lexer.get_tokens_unprocessed(line) break # Identify a psql prompt mprompt = re_prompt.match(line) if mprompt is not None: insertions.append( (len(curcode), [(0, Generic.Prompt, mprompt.group())])) curcode += line[len(mprompt.group()):] else: curcode += line # Check if this is the end of the command # TODO: better handle multiline comments at the end with # a lexer with an external state? if re_psql_command.match(curcode) \ or re_end_command.search(curcode): break # Emit the combined stream of command and prompt(s) yield from do_insertions(insertions, sql.get_tokens_unprocessed(curcode)) # Emit the output lines out_token = Generic.Output for line in lines: mprompt = re_prompt.match(line) if mprompt is not None: # push the line back to have it processed by the prompt lines.send(line) break mmsg = re_message.match(line) if mmsg is not None: if mmsg.group(1).startswith("ERROR") \ or mmsg.group(1).startswith("FATAL"): out_token = Generic.Error yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) yield (mmsg.start(2), out_token, mmsg.group(2)) else: yield (0, out_token, line) else: return
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif input_prompt is not None: insertions.append( (len(curcode), [(0, Other, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append( (len(curcode), [(0, Other, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: # Use the 'error' token for output. We should probably make # our own token, but error is typicaly in a bright color like # red, so it works fine for our output prompts. insertions.append(( len(curcode), [( 0, Other, #Generic.Error, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): pylexer = Python3Lexer(**self.options) tblexer = Python3TracebackLexer(**self.options) if ">>>" not in text: if (text.startswith('Traceback (most recent call last):') or re.match(r'^ File "[^"]+", line \d+\n$', text)): yield from tblexer.get_tokens_unprocessed(text) else: yield from pylexer.get_tokens_unprocessed(text) return curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 def do_current_code(): nonlocal curcode nonlocal insertions if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] section = "" code = list(line_re.finditer(text)) while code: match = code.pop(0) line = match.group() if line.startswith(">>>"): insertions = [] insertions.append((0, [(0, Generic.Prompt, line[:4])])) section = line[4:] secindex = match.start() while code and code[0].group().startswith(" "): line = code.pop(0).group() if not line.strip(): break insertions.append((len(section), [(0, Generic.Prompt, line[:4])])) section += line[4:] for i, t, v in do_insertions(insertions, pylexer.get_tokens_unprocessed(section)): yield secindex+i, t, v elif line.startswith('Traceback (most recent call last):') or re.match(r' *File "[^"]+", line \d+\n$', line): tb = line tbindex = match.start() while code and not code[0].group().startswith(">>>"): tb += code.pop(0).group() for i, t, v in tblexer.get_tokens_unprocessed(tb): yield tbindex+i, t, v else: yield match.start(), Generic.Output, line
def do_current_code(): nonlocal curcode nonlocal insertions if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = []
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith('>>> ') or line.startswith('... '): tb = 0 insertions.append((len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == '...': tb = 0 insertions.append((len(curcode), [(0, Generic.Prompt, '...')])) curcode += line[3:] else: if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('Traceback (most recent call last):'): tb = 1 curtb = line tbindex = match.start() elif tb: curtb += line if not (line.startswith(' ') or line.strip() == '...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif line.startswith("<warning>"): insertions.append((len(curcode), [(0, Generic.Error, line[9:])])) elif input_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: # Use the 'error' token for output. We should probably make # our own token, but error is typicaly in a bright color like # red, so it works fine for our output prompts. insertions.append((len(curcode), [(0, Generic.Error, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): jllexer = JuliaLexer(**self.options) start = 0 curcode = '' insertions = [] output = False error = False for line in text.splitlines(True): if line.startswith('julia>'): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:6])])) curcode += line[6:] output = False error = False elif line.startswith('help?>') or line.startswith('shell>'): yield start, Generic.Prompt, line[:6] yield start + 6, Text, line[6:] output = False error = False elif line.startswith(' ') and not output: insertions.append((len(curcode), [(0, Text, line[:6])])) curcode += line[6:] else: if curcode: for item in do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('ERROR: ') or error: yield start, Generic.Error, line error = True else: yield start, Generic.Output, line output = True start += len(line) if curcode: for item in do_insertions(insertions, jllexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): mlexer = MatlabLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>> '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:3])])) curcode += line[3:] elif line.startswith('>>'): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:2])])) curcode += line[2:] elif line.startswith('???'): idx = len(curcode) # without is showing error on same line as before...? # line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) else: if curcode: for item in do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: # or item: for item in do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): innerlexer = self._innerLexerCls(**self.options) pos = 0 curcode = '' insertions = [] backslash_continuation = False for match in line_re.finditer(text): line = match.group() m = re.match(self._ps1rgx, line) if backslash_continuation: curcode += line backslash_continuation = curcode.endswith('\\\n') elif m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) backslash_continuation = curcode.endswith('\\\n') elif line.startswith(self._ps2): insertions.append((len(curcode), [(0, Generic.Prompt, line[:len(self._ps2)])])) curcode += line[len(self._ps2):] backslash_continuation = curcode.endswith('\\\n') else: if insertions: toks = innerlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos+i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions(insertions, innerlexer.get_tokens_unprocessed(curcode)): yield pos+i, t, v
def get_tokens_unprocessed(self, text): jllexer = JuliaLexer(**self.options) start = 0 curcode = '' insertions = [] output = False error = False for line in text.splitlines(True): if line.startswith('julia>'): insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])])) curcode += line[6:] output = False error = False elif line.startswith('help?>') or line.startswith('shell>'): yield start, Generic.Prompt, line[:6] yield start + 6, Text, line[6:] output = False error = False elif line.startswith(' ') and not output: insertions.append((len(curcode), [(0, Text, line[:6])])) curcode += line[6:] else: if curcode: for item in do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('ERROR: ') or error: yield start, Generic.Error, line error = True else: yield start, Generic.Output, line output = True start += len(line) if curcode: for item in do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif input_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: insertions.append((len(curcode), [(0, Generic.Output, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() input_prompt = self.input_prompt.match(line) continue_prompt = self.continue_prompt.match(line.rstrip()) output_prompt = self.output_prompt.match(line) if line.startswith("#"): insertions.append((len(curcode), [(0, Comment, line)])) elif input_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, input_prompt.group())])) curcode += line[input_prompt.end():] elif continue_prompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, continue_prompt.group())])) curcode += line[continue_prompt.end():] elif output_prompt is not None: insertions.append((len(curcode), [(0, Generic.Output, output_prompt.group())])) curcode += line[output_prompt.end():] else: if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): bashlexer = shell.BashLexer(**self.options) pos = 0 curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = re.match(r'^((?:[^@#$%\s]+@[^]#$%\s]+(?: ?:? ?\[?[^]#$%]+\]?)? *)?[$#%])(.*\n?)', line) if line.startswith('#'): yield match.start(), Comment, line insertions = [] curcode = '' elif m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) elif line.startswith('>'): insertions.append((len(curcode), [(0, Generic.Prompt, line[:1])])) curcode += line[1:] else: if insertions: toks = bashlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos+i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions(insertions, bashlexer.get_tokens_unprocessed(curcode)): yield pos+i, t, v
def get_tokens_unprocessed(self, text): hslexer = HaskellLexer(**self.options) style = self.options.get('litstyle') if style is None: style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' code = '' insertions = [] if style == 'bird': # bird-style for match in line_re.finditer(text): line = match.group() m = bird_re.match(line) if m: insertions.append( (len(code), [(0, Comment.Special, m.group(1))])) code += m.group(2) else: insertions.append((len(code), [(0, Text, line)])) else: # latex-style from pygments.lexers.text import TexLexer lxlexer = TexLexer(**self.options) codelines = 0 latex = '' for match in line_re.finditer(text): line = match.group() if codelines: if line.lstrip().startswith('\\end{code}'): codelines = 0 latex += line else: code += line elif line.lstrip().startswith('\\begin{code}'): codelines = 1 latex += line insertions.append( (len(code), list(lxlexer.get_tokens_unprocessed(latex)))) latex = '' else: latex += line insertions.append( (len(code), list(lxlexer.get_tokens_unprocessed(latex)))) for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)): yield item
def get_tokens_unprocessed(self, text): ins = [] lines = text.splitlines(True) done = '' lex = BaseMakefileLexer(**self.options) backslashflag = False for line in lines: if self.r_special.match(line) or backslashflag: ins.append((len(done), [(0, Comment.Preproc, line)])) backslashflag = line.strip().endswith('\\') elif self.r_comment.match(line): ins.append((len(done), [(0, Comment, line)])) else: done += line yield from do_insertions(ins, lex.get_tokens_unprocessed(done))
def get_tokens_unprocessed(self, text): innerlexer = self._innerLexerCls(**self.options) pos = 0 curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = re.match(self._ps1rgx, line) if m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append( (len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) elif line.startswith(self._ps2): insertions.append((len(curcode), [(0, Generic.Prompt, line[:len(self._ps2)])])) curcode += line[len(self._ps2):] else: if insertions: toks = innerlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos + i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions( insertions, innerlexer.get_tokens_unprocessed(curcode)): yield pos + i, t, v
def get_tokens_unprocessed(self, text): ins = [] lines = text.splitlines(True) done = '' lex = BaseMakefileLexer(**self.options) backslashflag = False for line in lines: if self.r_special.match(line) or backslashflag: ins.append((len(done), [(0, Comment.Preproc, line)])) backslashflag = line.strip().endswith('\\') elif self.r_comment.match(line): ins.append((len(done), [(0, Comment, line)])) else: done += line for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): yield item
def get_tokens_unprocessed(self, text): buffered = '' insertions = [] lng_buffer = [] for i, t, v in self.language_lexer.get_tokens_unprocessed(text): if t is Comment.Single and v.startswith(PY_EXEC_SYM) and i == 0: if lng_buffer: insertions.append((len(buffered), lng_buffer)) lng_buffer = [] buffered += v else: lng_buffer.append((i, t, v)) if lng_buffer: insertions.append((len(buffered), lng_buffer)) return do_insertions(insertions, self.root_lexer.get_tokens_unprocessed(buffered))
def get_tokens_unprocessed(self, text): hslexer = HaskellLexer(**self.options) style = self.options.get('litstyle') if style is None: style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' code = '' insertions = [] if style == 'bird': # bird-style for match in line_re.finditer(text): line = match.group() m = bird_re.match(line) if m: insertions.append((len(code), [(0, Comment.Special, m.group(1))])) code += m.group(2) else: insertions.append((len(code), [(0, Text, line)])) else: # latex-style from pygments.lexers.text import TexLexer lxlexer = TexLexer(**self.options) codelines = 0 latex = '' for match in line_re.finditer(text): line = match.group() if codelines: if line.lstrip().startswith('\\end{code}'): codelines = 0 latex += line else: code += line elif line.lstrip().startswith('\\begin{code}'): codelines = 1 latex += line insertions.append((len(code), list(lxlexer.get_tokens_unprocessed(latex)))) latex = '' else: latex += line insertions.append((len(code), list(lxlexer.get_tokens_unprocessed(latex)))) for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)): yield item
def get_tokens_unprocessed(self, text): # find and remove all the escape tokens (replace with an empty string) # this is very similar to DelegatingLexer.get_tokens_unprocessed. buffered = '' insertions = [] insertion_buf = [] for i, t, v in self._find_safe_escape_tokens(text): if t is None: if insertion_buf: insertions.append((len(buffered), insertion_buf)) insertion_buf = [] buffered += v else: insertion_buf.append((i, t, v)) if insertion_buf: insertions.append((len(buffered), insertion_buf)) return do_insertions(insertions, self.lang.get_tokens_unprocessed(buffered))
def _handle_sourcecode(self, match): from pygments.lexers import get_lexer_by_name from pygments.util import ClassNotFound # section header yield match.start(1), Punctuation, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator.Word, match.group(3) yield match.start(4), Punctuation, match.group(4) yield match.start(5), Text, match.group(5) yield match.start(6), Keyword, match.group(6) yield match.start(7), Text, match.group(7) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(6).strip()) except ClassNotFound: pass indention = match.group(8) indention_size = len(indention) code = (indention + match.group(9) + match.group(10) + match.group(11)) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(8), String, code return # highlight the lines with the lexer. ins = [] codelines = code.splitlines(True) code = '' for line in codelines: if len(line) > indention_size: ins.append((len(code), [(0, Text, line[:indention_size])])) code += line[indention_size:] else: code += line for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item
def buffered_tokens(self): """ Generator of unprocessed tokens after doing insertions and before changing to a new state. """ if self.mode == 'output': tokens = [(0, Generic.Output, self.buffer)] elif self.mode == 'input': tokens = self.pylexer.get_tokens_unprocessed(self.buffer) else: # traceback tokens = self.tblexer.get_tokens_unprocessed(self.buffer) for i, t, v in do_insertions(self.insertions, tokens): # All token indexes are relative to the buffer. yield self.index + i, t, v # Clear it all self.index += len(self.buffer) self.buffer = u'' self.insertions = []
def get_tokens_unprocessed(self, text): lualexer = LuaLexer(**self.options) ymllexer = YamlLexer(**self.options) shslexer = BashSessionLexer(**self.options) curcode = '' insertions = [] code = False prompt_len = 0 curyml = '' yml = False ymlcnt = 0 curshs = '' shs = False for match in line_re.finditer(text): line = match.group() # print line # print code, yml, shs # First part - if output starts from '$ ' then it's BASH session # - We must only check that we're not inside of YAML # code can't start with '$ ' # if output (not inside YAML) starts with '$ ' - it's not our problem # Also, we can match multiline commands only if line ends with '\' check_shs = (line.startswith('$ ') and not yml) or shs if check_shs: curshs += line if line.endswith('\\'): shs = True continue for item in shslexer.get_tokens_unprocessed(curshs): yield item curshs = '' shs = False continue # Second part - check for YAML # 1) It's begin, means (yml == False) and line.strip() == '---' # 2) It's middle. (yml == True) and line.strip() not in ('---', '...') # 3) It's end - then (yml == False) and line.strip() == '...'] check_yml_begin = (yml == False and line.strip() in (yml_beg, )) # check_yml_middle = (yml == True and line.strip() not in (yml_beg, yml_end)) check_yml_end = (yml == True and line.strip() == yml_end and ymlcnt == 0) if (check_yml_begin or yml): # print check_yml_begin, check_yml_middle, check_yml_end # Flush previous code buffers if (yml is True and line.strip() == yml_beg): ymlcnt += 1 if (not check_yml_end and line.strip() == yml_end): ymlcnt += 1 if check_yml_begin and curcode: for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)): yield item code = False curcode = '' insertions = [] curyml += line # We finished reading YAML output, so push it to user if check_yml_end: for item in ymllexer.get_tokens_unprocessed(curyml): yield item curyml = '' yml = False if check_yml_end else True # print 'yaml gotcha %d' % yml continue # Third part - check for Tarantool's Lua # It's combination of: # prompt: tarantool> or localhost> or localhost:{port}> # lua: body after prompt + space prompt_pos_flexible = find_prompt(line) prompt_pos_strict = prompt_pos_flexible if not code else None if prompt_pos_strict: prompt_len = prompt_pos_strict + 2 check_code_begin = bool(prompt_pos_strict) check_code_middle = code and line.startswith(' ' * (prompt_len - 2) + '> ') check_code_flexible = False # e.g. we have two 'tarantool> ' in a row - code is True and # check_code_middle is False then we have to do something about it, # otherwise it will be like Generic.Output if code and check_code_middle is False and bool(prompt_pos_flexible): prompt_len = prompt_pos_flexible + 2 check_code_flexible = True if (check_code_begin or check_code_middle or check_code_flexible): code = True insertions.append((len(curcode), [(0, Generic.Prompt, line[:prompt_len])])) curcode += line[prompt_len:] continue # If it's not something before - then we must check for code # and push that line as 'Generic.Output' if curcode: for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)): yield item code = False curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, lualexer.get_tokens_unprocessed(curcode)): yield item if curyml: for item in ymllexer.get_tokens_unprocessed(curyml): yield item if curshs: for item in shslexer.get_tokens_unprocessed(curshs): yield item
def get_tokens_unprocessed(self, text): cl_lexer = CommonLispLexer(**self.options) pos = 0 curcode = "" insertions = [] iterator = line_re.finditer(text) while True: try: match = iterator.next() line = match.group() start = match.start() except StopIteration: # the line_re expression won't work in the case of the # string "a\nb" so make sure that the remainder of the # output is processed if match and match.end() < len(text): start = match.end() line = text[match.end() :] match = None else: raise StopIteration() m = re.match(r"^((?:[^\s*?>:]*[*?>:]) )(.*\n?)", line) if m: line = "" # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = start insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) # skip parser when line is empty if re.match(r"^\s*$", m.group(2)): line = m.group(2) else: # read out the whole sexp after the prompt, if it's a # muliline expression then keep reading until there is # a complete expression string_stream = StringIO(m.group(2)) reader = sexp_reader() while reader.read(string_stream): try: match = iterator.next() string_stream = StringIO(match.group()) except StopIteration: break curcode += reader.sexp() line = reader.rest() if insertions: if curcode: toks = cl_lexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos + i, t, v if line: yield start, Generic.Output, line insertions = [] curcode = ""
def get_tokens_unprocessed(self, data): sql = PsqlRegexLexer(**self.options) lines = lookahead(line_re.findall(data)) # prompt-output cycle while 1: # consume the lines of the command: start with an optional prompt # and continue until the end of command is detected curcode = '' insertions = [] while 1: try: line = next(lines) except StopIteration: # allow the emission of partially collected items # the repl loop will be broken below break # Identify a shell prompt in case of psql commandline example if line.startswith('$') and not curcode: lexer = get_lexer_by_name('console', **self.options) for x in lexer.get_tokens_unprocessed(line): yield x break # Identify a psql prompt mprompt = re_prompt.match(line) if mprompt is not None: insertions.append((len(curcode), [(0, Generic.Prompt, mprompt.group())])) curcode += line[len(mprompt.group()):] else: curcode += line # Check if this is the end of the command # TODO: better handle multiline comments at the end with # a lexer with an external state? if re_psql_command.match(curcode) \ or re_end_command.search(curcode): break # Emit the combined stream of command and prompt(s) for item in do_insertions(insertions, sql.get_tokens_unprocessed(curcode)): yield item # Emit the output lines out_token = Generic.Output while 1: line = next(lines) mprompt = re_prompt.match(line) if mprompt is not None: # push the line back to have it processed by the prompt lines.send(line) break mmsg = re_message.match(line) if mmsg is not None: if mmsg.group(1).startswith("ERROR") \ or mmsg.group(1).startswith("FATAL"): out_token = Generic.Error yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) yield (mmsg.start(2), out_token, mmsg.group(2)) else: yield (0, out_token, line)
re.match(ur' File "[^"]+", line \d+\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == u'...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item class PythonTracebackLexer(RegexLexer): """ For Python tracebacks. *New in Pygments 0.7.* """ name = 'Python Traceback' aliases = ['pytb'] filenames = ['*.pytb'] mimetypes = ['text/x-python-traceback']