def _highlight_code(self, match): code = match.group(1) try: if self.body_lexer: lexer = get_lexer_by_name(self.body_lexer) else: lexer = guess_lexer(code.strip()) except ClassNotFound: lexer = get_lexer_by_name('text') yield from lexer.get_tokens_unprocessed(code)
def _handle_codeblock(self, match): """ match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks """ from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) yield match.start(3), Text, match.group(3) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(2).strip()) except ClassNotFound: pass code = match.group(4) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(4), String, code else: for item in do_insertions([], lexer.get_tokens_unprocessed(code)): yield item yield match.start(5), String, match.group(5)
def _handle_cssblock(self, match): """ match args: 1:style tag 2:newline, 3:code, 4:closing style tag """ from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name('css') except ClassNotFound: pass code = match.group(3) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(3), String, code return yield from do_insertions([], lexer.get_tokens_unprocessed(code)) yield match.start(4), String, match.group(4)
def get_tokens_unprocessed(self, data): sql = PsqlRegexLexer(**self.options) lines = lookahead(line_re.findall(data)) # prompt-output cycle while 1: # consume the lines of the command: start with an optional prompt # and continue until the end of command is detected curcode = '' insertions = [] for line in lines: # Identify a shell prompt in case of psql commandline example if line.startswith('$') and not curcode: lexer = get_lexer_by_name('console', **self.options) yield from lexer.get_tokens_unprocessed(line) break # Identify a psql prompt mprompt = re_prompt.match(line) if mprompt is not None: insertions.append( (len(curcode), [(0, Generic.Prompt, mprompt.group())])) curcode += line[len(mprompt.group()):] else: curcode += line # Check if this is the end of the command # TODO: better handle multiline comments at the end with # a lexer with an external state? if re_psql_command.match(curcode) \ or re_end_command.search(curcode): break # Emit the combined stream of command and prompt(s) yield from do_insertions(insertions, sql.get_tokens_unprocessed(curcode)) # Emit the output lines out_token = Generic.Output for line in lines: mprompt = re_prompt.match(line) if mprompt is not None: # push the line back to have it processed by the prompt lines.send(line) break mmsg = re_message.match(line) if mmsg is not None: if mmsg.group(1).startswith("ERROR") \ or mmsg.group(1).startswith("FATAL"): out_token = Generic.Error yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) yield (mmsg.start(2), out_token, mmsg.group(2)) else: yield (0, out_token, line) else: return
def _get_lexer(self, lang): if lang.lower() == 'sql': return get_lexer_by_name('postgresql', **self.options) tries = [lang] if lang.startswith('pl'): tries.append(lang[2:]) if lang.endswith('u'): tries.append(lang[:-1]) if lang.startswith('pl') and lang.endswith('u'): tries.append(lang[2:-1]) for l in tries: try: return get_lexer_by_name(l, **self.options) except ClassNotFound: pass else: # TODO: better logging # print >>sys.stderr, "language not found:", lang return None
def _print_help(what, name): try: if what == 'lexer': cls = get_lexer_by_name(name) print("Help on the %s lexer:" % cls.name) print(dedent(cls.__doc__)) elif what == 'formatter': cls = find_formatter_class(name) print("Help on the %s formatter:" % cls.name) print(dedent(cls.__doc__)) elif what == 'filter': cls = find_filter_class(name) print("Help on the %s filter:" % name) print(dedent(cls.__doc__)) return 0 except (AttributeError, ValueError): print("%s not found!" % what, file=sys.stderr) return 1
def _handle_sourcecode(self, match): from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), Punctuation, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator.Word, match.group(3) yield match.start(4), Punctuation, match.group(4) yield match.start(5), Text, match.group(5) yield match.start(6), Keyword, match.group(6) yield match.start(7), Text, match.group(7) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(6).strip()) except ClassNotFound: pass indention = match.group(8) indention_size = len(indention) code = (indention + match.group(9) + match.group(10) + match.group(11)) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(8), String, code return # highlight the lines with the lexer. ins = [] codelines = code.splitlines(True) code = '' for line in codelines: if len(line) > indention_size: ins.append((len(code), [(0, Text, line[:indention_size])])) code += line[indention_size:] else: code += line for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2019 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) allow_custom_lexer_formatter = False # -x: allow custom (eXternal) lexers and formatters if opts.pop('-x', None) is not None: allow_custom_lexer_formatter = True # select lexer lexer = None # given by name? lexername = opts.pop('-l', None) if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: if ':' in lexername: filename, name = lexername.rsplit(':', 1) lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: if ':' in fmter: file, fmtername = fmter.rsplit(':', 1) fmter = load_formatter_from_file(file, fmtername, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3, ): from typecode._vendor.pygments.util import UnclosingTextIOWrapper outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0