def lexer(self) -> Optional[Lexer]: """The lexer for this syntax, or None if no lexer was found. Tries to find the lexer by name if a string was passed to the constructor. """ if isinstance(self._lexer, Lexer): return self._lexer try: return get_lexer_by_name( self._lexer, stripnl=False, ensurenl=True, tabsize=self.tab_size, ) except ClassNotFound: return None
def _print_help(what, name): try: if what == 'lexer': cls = get_lexer_by_name(name) print("Help on the %s lexer:" % cls.name) print(dedent(cls.__doc__)) elif what == 'formatter': cls = find_formatter_class(name) print("Help on the %s formatter:" % cls.name) print(dedent(cls.__doc__)) elif what == 'filter': cls = find_filter_class(name) print("Help on the %s filter:" % name) print(dedent(cls.__doc__)) return 0 except (AttributeError, ValueError): print("%s not found!" % what, file=sys.stderr) return 1
def guess_lexer(cls, path: str, code: Optional[str] = None) -> str: """Guess the alias of the Pygments lexer to use based on a path and an optional string of code. If code is supplied, it will use a combination of the code and the filename to determine the best lexer to use. For example, if the file is ``index.html`` and the file contains Django templating syntax, then "html+django" will be returned. If the file is ``index.html``, and no templating language is used, the "html" lexer will be used. If no string of code is supplied, the lexer will be chosen based on the file extension.. Args: path (AnyStr): The path to the file containing the code you wish to know the lexer for. code (str, optional): Optional string of code that will be used as a fallback if no lexer is found for the supplied path. Returns: str: The name of the Pygments lexer that best matches the supplied path/code. """ lexer: Optional[Lexer] = None lexer_name = "default" if code: try: lexer = guess_lexer_for_filename(path, code) except ClassNotFound: pass if not lexer: try: _, ext = os.path.splitext(path) if ext: extension = ext.lstrip(".").lower() lexer = get_lexer_by_name(extension) except ClassNotFound: pass if lexer: if lexer.aliases: lexer_name = lexer.aliases[0] else: lexer_name = lexer.name return lexer_name
def main_inner(parser, argns): if argns.help: parser.print_help() return 0 if argns.V: print('Pygments version %s, (c) 2006-2022 by Georg Brandl, Matthäus ' 'Chajdas and contributors.' % __version__) return 0 def is_only_option(opt): return not any(v for (k, v) in vars(argns).items() if k != opt) # handle ``pygmentize -L`` if argns.L is not None: arg_set = set() for k, v in vars(argns).items(): if v: arg_set.add(k) arg_set.discard('L') arg_set.discard('json') if arg_set: parser.print_help(sys.stderr) return 2 # print version if not argns.json: main(['', '-V']) allowed_types = {'lexer', 'formatter', 'filter', 'style'} largs = [arg.rstrip('s') for arg in argns.L] if any(arg not in allowed_types for arg in largs): parser.print_help(sys.stderr) return 0 if not largs: largs = allowed_types if not argns.json: for arg in largs: _print_list(arg) else: _print_list_as_json(largs) return 0 # handle ``pygmentize -H`` if argns.H: if not is_only_option('H'): parser.print_help(sys.stderr) return 2 what, name = argns.H if what not in ('lexer', 'formatter', 'filter'): parser.print_help(sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(argns.O or []) # parse -P options for p_opt in argns.P or []: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` if argns.N: lexer = find_lexer_class_for_filename(argns.N) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -C`` if argns.C: inp = sys.stdin.buffer.read() try: lexer = guess_lexer(inp, inencoding=inencoding) except ClassNotFound: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = argns.S a_opt = argns.a if S_opt is not None: f_opt = argns.f if not f_opt: parser.print_help(sys.stderr) return 2 if argns.l or argns.INPUTFILE: parser.print_help(sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if argns.a is not None: parser.print_help(sys.stderr) return 2 # parse -F options F_opts = _parse_filters(argns.F or []) # -x: allow custom (eXternal) lexers and formatters allow_custom_lexer_formatter = bool(argns.x) # select lexer lexer = None # given by name? lexername = argns.l if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: filename = None name = None if ':' in lexername: filename, name = lexername.rsplit(':', 1) if '.py' in name: # This can happen on Windows: If the lexername is # C:\lexer.py -- return to normal load path in that case name = None if filename and name: lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if argns.INPUTFILE: if argns.s: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = argns.INPUTFILE try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if argns.g: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif not argns.s: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it code = sys.stdin.buffer.read() # use .buffer to get a binary stream if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = argns.o fmter = argns.f if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: filename = None name = None if ':' in fmter: # Same logic as above for custom lexer filename, name = fmter.rsplit(':', 1) if '.py' in name: name = None if filename and name: fmter = load_formatter_from_file(filename, name, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) outfile = sys.stdout.buffer # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import pip._vendor.colorama.initialise as colorama_initialise except ImportError: pass else: outfile = colorama_initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if not argns.s: # process whole input as per normal... try: highlight(code, lexer, fmter, outfile) finally: if outfn: outfile.close() return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: line = sys.stdin.buffer.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0 finally: if outfn: outfile.close()
def from_path( cls, path: str, encoding: str = "utf-8", theme: Union[str, SyntaxTheme] = DEFAULT_THEME, dedent: bool = False, line_numbers: bool = False, line_range: Optional[Tuple[int, int]] = None, start_line: int = 1, highlight_lines: Optional[Set[int]] = None, code_width: Optional[int] = None, tab_size: int = 4, word_wrap: bool = False, background_color: Optional[str] = None, indent_guides: bool = False, ) -> "Syntax": """Construct a Syntax object from a file. Args: path (str): Path to file to highlight. encoding (str): Encoding of file. theme (str, optional): Color theme, aka Pygments style (see https://pygments.org/docs/styles/#getting-a-list-of-available-styles). Defaults to "emacs". dedent (bool, optional): Enable stripping of initial whitespace. Defaults to True. line_numbers (bool, optional): Enable rendering of line numbers. Defaults to False. start_line (int, optional): Starting number for line numbers. Defaults to 1. line_range (Tuple[int, int], optional): If given should be a tuple of the start and end line to render. highlight_lines (Set[int]): A set of line numbers to highlight. code_width: Width of code to render (not including line numbers), or ``None`` to use all available width. tab_size (int, optional): Size of tabs. Defaults to 4. word_wrap (bool, optional): Enable word wrapping of code. background_color (str, optional): Optional background color, or None to use theme color. Defaults to None. indent_guides (bool, optional): Show indent guides. Defaults to False. Returns: [Syntax]: A Syntax object that may be printed to the console """ with open(path, "rt", encoding=encoding) as code_file: code = code_file.read() lexer = None lexer_name = "default" try: _, ext = os.path.splitext(path) if ext: extension = ext.lstrip(".").lower() lexer = get_lexer_by_name(extension) lexer_name = lexer.name except ClassNotFound: pass if lexer is None: try: lexer_name = guess_lexer_for_filename(path, code).name except ClassNotFound: pass return cls( code, lexer_name, theme=theme, dedent=dedent, line_numbers=line_numbers, line_range=line_range, start_line=start_line, highlight_lines=highlight_lines, code_width=code_width, tab_size=tab_size, word_wrap=word_wrap, background_color=background_color, indent_guides=indent_guides, )
def highlight(self, code: str, line_range: Optional[Tuple[int, int]] = None) -> Text: """Highlight code and return a Text instance. Args: code (str): Code to highlight. line_range(Tuple[int, int], optional): Optional line range to highlight. Returns: Text: A text instance containing highlighted syntax. """ base_style = self._get_base_style() justify: JustifyMethod = ( "default" if base_style.transparent_background else "left") text = Text( justify=justify, style=base_style, tab_size=self.tab_size, no_wrap=not self.word_wrap, ) _get_theme_style = self._theme.get_style_for_token try: lexer = get_lexer_by_name( self.lexer_name, stripnl=False, ensurenl=True, tabsize=self.tab_size, ) except ClassNotFound: text.append(code) else: if line_range: # More complicated path to only stylize a portion of the code # This speeds up further operations as there are less spans to process line_start, line_end = line_range def line_tokenize() -> Iterable[Tuple[Any, str]]: """Split tokens to one per line.""" for token_type, token in lexer.get_tokens(code): while token: line_token, new_line, token = token.partition("\n") yield token_type, line_token + new_line def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]: """Convert tokens to spans.""" tokens = iter(line_tokenize()) line_no = 0 _line_start = line_start - 1 # Skip over tokens until line start while line_no < _line_start: _token_type, token = next(tokens) yield (token, None) if token.endswith("\n"): line_no += 1 # Generate spans until line end for token_type, token in tokens: yield (token, _get_theme_style(token_type)) if token.endswith("\n"): line_no += 1 if line_no >= line_end: break text.append_tokens(tokens_to_spans()) else: text.append_tokens( (token, _get_theme_style(token_type)) for token_type, token in lexer.get_tokens(code)) if self.background_color is not None: text.stylize(f"on {self.background_color}") return text