Example #1
0
    def lexer(self) -> Optional[Lexer]:
        """The lexer for this syntax, or None if no lexer was found.

        Tries to find the lexer by name if a string was passed to the constructor.
        """

        if isinstance(self._lexer, Lexer):
            return self._lexer
        try:
            return get_lexer_by_name(
                self._lexer,
                stripnl=False,
                ensurenl=True,
                tabsize=self.tab_size,
            )
        except ClassNotFound:
            return None
Example #2
0
def _print_help(what, name):
    try:
        if what == 'lexer':
            cls = get_lexer_by_name(name)
            print("Help on the %s lexer:" % cls.name)
            print(dedent(cls.__doc__))
        elif what == 'formatter':
            cls = find_formatter_class(name)
            print("Help on the %s formatter:" % cls.name)
            print(dedent(cls.__doc__))
        elif what == 'filter':
            cls = find_filter_class(name)
            print("Help on the %s filter:" % name)
            print(dedent(cls.__doc__))
        return 0
    except (AttributeError, ValueError):
        print("%s not found!" % what, file=sys.stderr)
        return 1
Example #3
0
    def guess_lexer(cls, path: str, code: Optional[str] = None) -> str:
        """Guess the alias of the Pygments lexer to use based on a path and an optional string of code.
        If code is supplied, it will use a combination of the code and the filename to determine the
        best lexer to use. For example, if the file is ``index.html`` and the file contains Django
        templating syntax, then "html+django" will be returned. If the file is ``index.html``, and no
        templating language is used, the "html" lexer will be used. If no string of code
        is supplied, the lexer will be chosen based on the file extension..

        Args:
             path (AnyStr): The path to the file containing the code you wish to know the lexer for.
             code (str, optional): Optional string of code that will be used as a fallback if no lexer
                is found for the supplied path.

        Returns:
            str: The name of the Pygments lexer that best matches the supplied path/code.
        """
        lexer: Optional[Lexer] = None
        lexer_name = "default"
        if code:
            try:
                lexer = guess_lexer_for_filename(path, code)
            except ClassNotFound:
                pass

        if not lexer:
            try:
                _, ext = os.path.splitext(path)
                if ext:
                    extension = ext.lstrip(".").lower()
                    lexer = get_lexer_by_name(extension)
            except ClassNotFound:
                pass

        if lexer:
            if lexer.aliases:
                lexer_name = lexer.aliases[0]
            else:
                lexer_name = lexer.name

        return lexer_name
Example #4
0
def main_inner(parser, argns):
    if argns.help:
        parser.print_help()
        return 0

    if argns.V:
        print('Pygments version %s, (c) 2006-2022 by Georg Brandl, Matthäus '
              'Chajdas and contributors.' % __version__)
        return 0

    def is_only_option(opt):
        return not any(v for (k, v) in vars(argns).items() if k != opt)

    # handle ``pygmentize -L``
    if argns.L is not None:
        arg_set = set()
        for k, v in vars(argns).items():
            if v:
                arg_set.add(k)

        arg_set.discard('L')
        arg_set.discard('json')

        if arg_set:
            parser.print_help(sys.stderr)
            return 2

        # print version
        if not argns.json:
            main(['', '-V'])
        allowed_types = {'lexer', 'formatter', 'filter', 'style'}
        largs = [arg.rstrip('s') for arg in argns.L]
        if any(arg not in allowed_types for arg in largs):
            parser.print_help(sys.stderr)
            return 0
        if not largs:
            largs = allowed_types
        if not argns.json:
            for arg in largs:
                _print_list(arg)
        else:
            _print_list_as_json(largs)
        return 0

    # handle ``pygmentize -H``
    if argns.H:
        if not is_only_option('H'):
            parser.print_help(sys.stderr)
            return 2
        what, name = argns.H
        if what not in ('lexer', 'formatter', 'filter'):
            parser.print_help(sys.stderr)
            return 2
        return _print_help(what, name)

    # parse -O options
    parsed_opts = _parse_options(argns.O or [])

    # parse -P options
    for p_opt in argns.P or []:
        try:
            name, value = p_opt.split('=', 1)
        except ValueError:
            parsed_opts[p_opt] = True
        else:
            parsed_opts[name] = value

    # encodings
    inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
    outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))

    # handle ``pygmentize -N``
    if argns.N:
        lexer = find_lexer_class_for_filename(argns.N)
        if lexer is None:
            lexer = TextLexer

        print(lexer.aliases[0])
        return 0

    # handle ``pygmentize -C``
    if argns.C:
        inp = sys.stdin.buffer.read()
        try:
            lexer = guess_lexer(inp, inencoding=inencoding)
        except ClassNotFound:
            lexer = TextLexer

        print(lexer.aliases[0])
        return 0

    # handle ``pygmentize -S``
    S_opt = argns.S
    a_opt = argns.a
    if S_opt is not None:
        f_opt = argns.f
        if not f_opt:
            parser.print_help(sys.stderr)
            return 2
        if argns.l or argns.INPUTFILE:
            parser.print_help(sys.stderr)
            return 2

        try:
            parsed_opts['style'] = S_opt
            fmter = get_formatter_by_name(f_opt, **parsed_opts)
        except ClassNotFound as err:
            print(err, file=sys.stderr)
            return 1

        print(fmter.get_style_defs(a_opt or ''))
        return 0

    # if no -S is given, -a is not allowed
    if argns.a is not None:
        parser.print_help(sys.stderr)
        return 2

    # parse -F options
    F_opts = _parse_filters(argns.F or [])

    # -x: allow custom (eXternal) lexers and formatters
    allow_custom_lexer_formatter = bool(argns.x)

    # select lexer
    lexer = None

    # given by name?
    lexername = argns.l
    if lexername:
        # custom lexer, located relative to user's cwd
        if allow_custom_lexer_formatter and '.py' in lexername:
            try:
                filename = None
                name = None
                if ':' in lexername:
                    filename, name = lexername.rsplit(':', 1)

                    if '.py' in name:
                        # This can happen on Windows: If the lexername is
                        # C:\lexer.py -- return to normal load path in that case
                        name = None

                if filename and name:
                    lexer = load_lexer_from_file(filename, name, **parsed_opts)
                else:
                    lexer = load_lexer_from_file(lexername, **parsed_opts)
            except ClassNotFound as err:
                print('Error:', err, file=sys.stderr)
                return 1
        else:
            try:
                lexer = get_lexer_by_name(lexername, **parsed_opts)
            except (OptionError, ClassNotFound) as err:
                print('Error:', err, file=sys.stderr)
                return 1

    # read input code
    code = None

    if argns.INPUTFILE:
        if argns.s:
            print('Error: -s option not usable when input file specified',
                  file=sys.stderr)
            return 2

        infn = argns.INPUTFILE
        try:
            with open(infn, 'rb') as infp:
                code = infp.read()
        except Exception as err:
            print('Error: cannot read infile:', err, file=sys.stderr)
            return 1
        if not inencoding:
            code, inencoding = guess_decode(code)

        # do we have to guess the lexer?
        if not lexer:
            try:
                lexer = get_lexer_for_filename(infn, code, **parsed_opts)
            except ClassNotFound as err:
                if argns.g:
                    try:
                        lexer = guess_lexer(code, **parsed_opts)
                    except ClassNotFound:
                        lexer = TextLexer(**parsed_opts)
                else:
                    print('Error:', err, file=sys.stderr)
                    return 1
            except OptionError as err:
                print('Error:', err, file=sys.stderr)
                return 1

    elif not argns.s:  # treat stdin as full file (-s support is later)
        # read code from terminal, always in binary mode since we want to
        # decode ourselves and be tolerant with it
        code = sys.stdin.buffer.read()  # use .buffer to get a binary stream
        if not inencoding:
            code, inencoding = guess_decode_from_terminal(code, sys.stdin)
            # else the lexer will do the decoding
        if not lexer:
            try:
                lexer = guess_lexer(code, **parsed_opts)
            except ClassNotFound:
                lexer = TextLexer(**parsed_opts)

    else:  # -s option needs a lexer with -l
        if not lexer:
            print('Error: when using -s a lexer has to be selected with -l',
                  file=sys.stderr)
            return 2

    # process filters
    for fname, fopts in F_opts:
        try:
            lexer.add_filter(fname, **fopts)
        except ClassNotFound as err:
            print('Error:', err, file=sys.stderr)
            return 1

    # select formatter
    outfn = argns.o
    fmter = argns.f
    if fmter:
        # custom formatter, located relative to user's cwd
        if allow_custom_lexer_formatter and '.py' in fmter:
            try:
                filename = None
                name = None
                if ':' in fmter:
                    # Same logic as above for custom lexer
                    filename, name = fmter.rsplit(':', 1)

                    if '.py' in name:
                        name = None

                if filename and name:
                    fmter = load_formatter_from_file(filename, name,
                                                     **parsed_opts)
                else:
                    fmter = load_formatter_from_file(fmter, **parsed_opts)
            except ClassNotFound as err:
                print('Error:', err, file=sys.stderr)
                return 1
        else:
            try:
                fmter = get_formatter_by_name(fmter, **parsed_opts)
            except (OptionError, ClassNotFound) as err:
                print('Error:', err, file=sys.stderr)
                return 1

    if outfn:
        if not fmter:
            try:
                fmter = get_formatter_for_filename(outfn, **parsed_opts)
            except (OptionError, ClassNotFound) as err:
                print('Error:', err, file=sys.stderr)
                return 1
        try:
            outfile = open(outfn, 'wb')
        except Exception as err:
            print('Error: cannot open outfile:', err, file=sys.stderr)
            return 1
    else:
        if not fmter:
            if '256' in os.environ.get('TERM', ''):
                fmter = Terminal256Formatter(**parsed_opts)
            else:
                fmter = TerminalFormatter(**parsed_opts)
        outfile = sys.stdout.buffer

    # determine output encoding if not explicitly selected
    if not outencoding:
        if outfn:
            # output file? use lexer encoding for now (can still be None)
            fmter.encoding = inencoding
        else:
            # else use terminal encoding
            fmter.encoding = terminal_encoding(sys.stdout)

    # provide coloring under Windows, if possible
    if not outfn and sys.platform in ('win32', 'cygwin') and \
       fmter.name in ('Terminal', 'Terminal256'):  # pragma: no cover
        # unfortunately colorama doesn't support binary streams on Py3
        outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
        fmter.encoding = None
        try:
            import pip._vendor.colorama.initialise as colorama_initialise
        except ImportError:
            pass
        else:
            outfile = colorama_initialise.wrap_stream(outfile,
                                                      convert=None,
                                                      strip=None,
                                                      autoreset=False,
                                                      wrap=True)

    # When using the LaTeX formatter and the option `escapeinside` is
    # specified, we need a special lexer which collects escaped text
    # before running the chosen language lexer.
    escapeinside = parsed_opts.get('escapeinside', '')
    if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
        left = escapeinside[0]
        right = escapeinside[1]
        lexer = LatexEmbeddedLexer(left, right, lexer)

    # ... and do it!
    if not argns.s:
        # process whole input as per normal...
        try:
            highlight(code, lexer, fmter, outfile)
        finally:
            if outfn:
                outfile.close()
        return 0
    else:
        # line by line processing of stdin (eg: for 'tail -f')...
        try:
            while 1:
                line = sys.stdin.buffer.readline()
                if not line:
                    break
                if not inencoding:
                    line = guess_decode_from_terminal(line, sys.stdin)[0]
                highlight(line, lexer, fmter, outfile)
                if hasattr(outfile, 'flush'):
                    outfile.flush()
            return 0
        except KeyboardInterrupt:  # pragma: no cover
            return 0
        finally:
            if outfn:
                outfile.close()
Example #5
0
    def from_path(
        cls,
        path: str,
        encoding: str = "utf-8",
        theme: Union[str, SyntaxTheme] = DEFAULT_THEME,
        dedent: bool = False,
        line_numbers: bool = False,
        line_range: Optional[Tuple[int, int]] = None,
        start_line: int = 1,
        highlight_lines: Optional[Set[int]] = None,
        code_width: Optional[int] = None,
        tab_size: int = 4,
        word_wrap: bool = False,
        background_color: Optional[str] = None,
        indent_guides: bool = False,
    ) -> "Syntax":
        """Construct a Syntax object from a file.

        Args:
            path (str): Path to file to highlight.
            encoding (str): Encoding of file.
            theme (str, optional): Color theme, aka Pygments style (see https://pygments.org/docs/styles/#getting-a-list-of-available-styles). Defaults to "emacs".
            dedent (bool, optional): Enable stripping of initial whitespace. Defaults to True.
            line_numbers (bool, optional): Enable rendering of line numbers. Defaults to False.
            start_line (int, optional): Starting number for line numbers. Defaults to 1.
            line_range (Tuple[int, int], optional): If given should be a tuple of the start and end line to render.
            highlight_lines (Set[int]): A set of line numbers to highlight.
            code_width: Width of code to render (not including line numbers), or ``None`` to use all available width.
            tab_size (int, optional): Size of tabs. Defaults to 4.
            word_wrap (bool, optional): Enable word wrapping of code.
            background_color (str, optional): Optional background color, or None to use theme color. Defaults to None.
            indent_guides (bool, optional): Show indent guides. Defaults to False.

        Returns:
            [Syntax]: A Syntax object that may be printed to the console
        """
        with open(path, "rt", encoding=encoding) as code_file:
            code = code_file.read()

        lexer = None
        lexer_name = "default"
        try:
            _, ext = os.path.splitext(path)
            if ext:
                extension = ext.lstrip(".").lower()
                lexer = get_lexer_by_name(extension)
                lexer_name = lexer.name
        except ClassNotFound:
            pass

        if lexer is None:
            try:
                lexer_name = guess_lexer_for_filename(path, code).name
            except ClassNotFound:
                pass

        return cls(
            code,
            lexer_name,
            theme=theme,
            dedent=dedent,
            line_numbers=line_numbers,
            line_range=line_range,
            start_line=start_line,
            highlight_lines=highlight_lines,
            code_width=code_width,
            tab_size=tab_size,
            word_wrap=word_wrap,
            background_color=background_color,
            indent_guides=indent_guides,
        )
Example #6
0
    def highlight(self,
                  code: str,
                  line_range: Optional[Tuple[int, int]] = None) -> Text:
        """Highlight code and return a Text instance.

        Args:
            code (str): Code to highlight.
            line_range(Tuple[int, int], optional): Optional line range to highlight.

        Returns:
            Text: A text instance containing highlighted syntax.
        """

        base_style = self._get_base_style()
        justify: JustifyMethod = (
            "default" if base_style.transparent_background else "left")

        text = Text(
            justify=justify,
            style=base_style,
            tab_size=self.tab_size,
            no_wrap=not self.word_wrap,
        )
        _get_theme_style = self._theme.get_style_for_token
        try:
            lexer = get_lexer_by_name(
                self.lexer_name,
                stripnl=False,
                ensurenl=True,
                tabsize=self.tab_size,
            )
        except ClassNotFound:
            text.append(code)
        else:
            if line_range:
                # More complicated path to only stylize a portion of the code
                # This speeds up further operations as there are less spans to process
                line_start, line_end = line_range

                def line_tokenize() -> Iterable[Tuple[Any, str]]:
                    """Split tokens to one per line."""
                    for token_type, token in lexer.get_tokens(code):
                        while token:
                            line_token, new_line, token = token.partition("\n")
                            yield token_type, line_token + new_line

                def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]:
                    """Convert tokens to spans."""
                    tokens = iter(line_tokenize())
                    line_no = 0
                    _line_start = line_start - 1

                    # Skip over tokens until line start
                    while line_no < _line_start:
                        _token_type, token = next(tokens)
                        yield (token, None)
                        if token.endswith("\n"):
                            line_no += 1
                    # Generate spans until line end
                    for token_type, token in tokens:
                        yield (token, _get_theme_style(token_type))
                        if token.endswith("\n"):
                            line_no += 1
                            if line_no >= line_end:
                                break

                text.append_tokens(tokens_to_spans())

            else:
                text.append_tokens(
                    (token, _get_theme_style(token_type))
                    for token_type, token in lexer.get_tokens(code))
            if self.background_color is not None:
                text.stylize(f"on {self.background_color}")
        return text