def extract_code_strings(filepath): import pygments from pygments import lexers from pygments.token import Token comments = [] code_words = set() lex = lexers.find_lexer_class_for_filename(filepath) if lex is None: return comments, code_words slineno = 1 with open(filepath, encoding='utf-8') as fh: source = fh.read() for ty, ttext in lex.get_tokens(source): if ty in { Token.Literal.String, Token.Literal.String.Double, Token.Literal.String.Single }: comments.append(Comment(filepath, ttext, slineno, 'STRING')) else: for match in re_vars.finditer(ttext): code_words.add(match.group(0)) # Ugh - not nice or fast. slineno += ttext.count("\n") return comments, code_words
def configure_syntax_highlighting(self, extension=None): if self.highlighter: self.language.emit(self.DEFAULT_LANGUAGE) self.highlighter = None # Removing the highlighter does not refresh current text # Therefore, it is a necessity to call the set_text method self.modes.remove(modes.PygmentsSyntaxHighlighter) self.set_text(self.get_text()) if extension: lexer = find_lexer_class_for_filename(extension) if not lexer: return self.language.emit(lexer().name) self.highlighter = modes.PygmentsSyntaxHighlighter(self.document(), lexer=lexer()) self.highlighter.pygments_style = self.THEME self.modes.append(self.highlighter)
def pygments_lexer(self): language = self.data.get('pygments_language') lexer = None if language is not None: try: lexer = lexers.get_lexer_by_name(language) except ClassNotFound: lexer = None if lexer is None and self.gist_file_name is not None: try: lexer = lexers.find_lexer_class_for_filename( self.gist_file_name, ) except ClassNotFound: lexer = None else: lexer = lexer() if lexer is None: lexer = lexers.TextLexer() return lexer
def main(fn, lexer=None, options={}): if lexer is not None: lxcls = get_lexer_by_name(lexer).__class__ else: lxcls = find_lexer_class_for_filename(os.path.basename(fn)) if lxcls is None: name, rest = fn.split('_', 1) lxcls = find_lexer_class(name) if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__, lxcls.__name__)) debug_lexer = False # if profile: # # does not work for e.g. ExtendedRegexLexers # if lxcls.__bases__ == (RegexLexer,): # # yes we can! (change the metaclass) # lxcls.__class__ = ProfilingRegexLexerMeta # lxcls.__bases__ = (ProfilingRegexLexer,) # lxcls._prof_sort_index = profsort # else: # if lxcls.__bases__ == (RegexLexer,): # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True # elif lxcls.__bases__ == (DebuggingRegexLexer,): # # already debugged before # debug_lexer = True # else: # # HACK: ExtendedRegexLexer subclasses will only partially work here. # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True lx = lxcls(**options) lno = 1 if fn == '-': text = sys.stdin.read() else: with open(fn, 'rb') as fp: text = fp.read().decode('utf-8') text = text.strip('\n') + '\n' tokens = [] states = [] def show_token(tok, state): reprs = list(map(repr, tok)) print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') if debug_lexer: print(' ' + ' ' * (29-len(reprs[0])) + ' : '.join(state) if state else '', end=' ') print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error and not ignerror: print('Error parsing', fn, 'on line', lno) if not showall: print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) else: show_token(tokens[i], None) print('Error token:') l = len(repr(val)) print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'ctx'): print(' ' * (60-l) + ' : '.join(lx.ctx.stack), end=' ') print() print() return 1 tokens.append((type, val)) if debug_lexer: if hasattr(lx, 'ctx'): states.append(lx.ctx.stack[:]) else: states.append(None) if showall: show_token((type, val), states[-1] if debug_lexer else None) return 0
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) allow_custom_lexer_formatter = False # -x: allow custom (eXternal) lexers and formatters if opts.pop('-x', None) is not None: allow_custom_lexer_formatter = True # select lexer lexer = None # given by name? lexername = opts.pop('-l', None) if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: if ':' in lexername: filename, name = lexername.rsplit(':', 1) lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: if ':' in fmter: file, fmtername = fmter.rsplit(':', 1) fmter = load_formatter_from_file(file, fmtername, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3,): from pygments.util import UnclosingTextIOWrapper outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream( outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0
def main(fn, lexer=None, options={}): if lexer is not None: lxcls = get_lexer_by_name(lexer).__class__ else: lxcls = find_lexer_class_for_filename(os.path.basename(fn)) if lxcls is None: name, rest = fn.split('_', 1) lxcls = find_lexer_class(name) if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__, lxcls.__name__)) debug_lexer = False # if profile: # # does not work for e.g. ExtendedRegexLexers # if lxcls.__bases__ == (RegexLexer,): # # yes we can! (change the metaclass) # lxcls.__class__ = ProfilingRegexLexerMeta # lxcls.__bases__ = (ProfilingRegexLexer,) # lxcls._prof_sort_index = profsort # else: # if lxcls.__bases__ == (RegexLexer,): # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True # elif lxcls.__bases__ == (DebuggingRegexLexer,): # # already debugged before # debug_lexer = True # else: # # HACK: ExtendedRegexLexer subclasses will only partially work here. # lxcls.__bases__ = (DebuggingRegexLexer,) # debug_lexer = True lx = lxcls(**options) lno = 1 if fn == '-': text = sys.stdin.read() else: with open(fn, 'rb') as fp: text = fp.read().decode('utf-8') text = text.strip('\n') + '\n' tokens = [] states = [] def show_token(tok, state): reprs = list(map(repr, tok)) print(' ' + reprs[1] + ' ' + ' ' * (29 - len(reprs[1])) + reprs[0], end=' ') if debug_lexer: print(' ' + ' ' * (29 - len(reprs[0])) + ' : '.join(state) if state else '', end=' ') print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error and not ignerror: print('Error parsing', fn, 'on line', lno) if not showall: print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) else: show_token(tokens[i], None) print('Error token:') l = len(repr(val)) print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'ctx'): print(' ' * (60 - l) + ' : '.join(lx.ctx.stack), end=' ') print() print() return 1 tokens.append((type, val)) if debug_lexer: if hasattr(lx, 'ctx'): states.append(lx.ctx.stack[:]) else: states.append(None) if showall: show_token((type, val), states[-1] if debug_lexer else None) return 0
def main_inner(parser, argns): if argns.help: parser.print_help() return 0 if argns.V: print('Pygments version %s, (c) 2006-2021 by Georg Brandl, Matthäus ' 'Chajdas and contributors.' % __version__) return 0 def is_only_option(opt): return not any(v for (k, v) in vars(argns).items() if k != opt) # handle ``pygmentize -L`` if argns.L is not None: if not is_only_option('L'): parser.print_help(sys.stderr) return 2 # print version main(['', '-V']) allowed_types = {'lexer', 'formatter', 'filter', 'style'} largs = [arg.rstrip('s') for arg in argns.L] if any(arg not in allowed_types for arg in largs): parser.print_help(sys.stderr) return 0 if not largs: largs = allowed_types for arg in largs: _print_list(arg) return 0 # handle ``pygmentize -H`` if argns.H: if not is_only_option('H'): parser.print_help(sys.stderr) return 2 what, name = argns.H if what not in ('lexer', 'formatter', 'filter'): parser.print_help(sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(argns.O or []) # parse -P options for p_opt in argns.P or []: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` if argns.N: lexer = find_lexer_class_for_filename(argns.N) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -C`` if argns.C: inp = sys.stdin.buffer.read() try: lexer = guess_lexer(inp, inencoding=inencoding) except ClassNotFound: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = argns.S a_opt = argns.a if S_opt is not None: f_opt = argns.f if not f_opt: parser.print_help(sys.stderr) return 2 if argns.l or argns.INPUTFILE: parser.print_help(sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if argns.a is not None: parser.print_help(sys.stderr) return 2 # parse -F options F_opts = _parse_filters(argns.F or []) # -x: allow custom (eXternal) lexers and formatters allow_custom_lexer_formatter = bool(argns.x) # select lexer lexer = None # given by name? lexername = argns.l if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: filename = None name = None if ':' in lexername: filename, name = lexername.rsplit(':', 1) if '.py' in name: # This can happen on Windows: If the lexername is # C:\lexer.py -- return to normal load path in that case name = None if filename and name: lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if argns.INPUTFILE: if argns.s: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = argns.INPUTFILE try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if argns.g: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif not argns.s: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it code = sys.stdin.buffer.read() # use .buffer to get a binary stream if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = argns.o fmter = argns.f if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: filename = None name = None if ':' in fmter: # Same logic as above for custom lexer filename, name = fmter.rsplit(':', 1) if '.py' in name: name = None if filename and name: fmter = load_formatter_from_file(filename, name, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) outfile = sys.stdout.buffer # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if not argns.s: # process whole input as per normal... try: highlight(code, lexer, fmter, outfile) finally: if outfn: outfile.close() return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: line = sys.stdin.buffer.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0 finally: if outfn: outfile.close()
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) allow_custom_lexer_formatter = False # -x: allow custom (eXternal) lexers and formatters if opts.pop('-x', None) is not None: allow_custom_lexer_formatter = True # select lexer lexer = None # given by name? lexername = opts.pop('-l', None) if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: if ':' in lexername: filename, name = lexername.rsplit(':', 1) lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: if ':' in fmter: file, fmtername = fmter.rsplit(':', 1) fmter = load_formatter_from_file(file, fmtername, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3, ): from pygments.util import UnclosingTextIOWrapper outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2019 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print >> sys.stderr, usage return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print >> sys.stderr, usage return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print >> sys.stderr, usage, return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print >> sys.stderr, usage return 2 if opts or args: print >> sys.stderr, usage return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound, err: print >> sys.stderr, err return 1 print(fmter.get_style_defs(a_opt or '')) return 0