Esempio n. 1
0
    def _format_lines(self, tokensource):
        buf = []
        for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]):
            if ttype in Token.Sql:

                for t, v in HtmlFormatter._format_lines(self, iter(buf)):
                    yield t, v
                buf = []

                if ttype is Token.Sql:
                    yield 1, "<div class='show_sql'>%s</div>" % \
                        re.sub(
                            r'(?:{stop}|\n+)$', '', filters.html_escape(value))
                elif ttype is Token.Sql.Link:
                    yield 1, "<a href='#' class='sql_link'>sql</a>"
                elif ttype is Token.Sql.Popup:
                    yield 1, "<div class='popup_sql'>%s</div>" % \
                        re.sub(
                            r'(?:{stop}|\n+)$', '', filters.html_escape(value))
            else:
                buf.append((ttype, value))

        for t, v in _strip_trailing_whitespace(
                HtmlFormatter._format_lines(self, iter(buf))):
            yield t, v
Esempio n. 2
0
    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, str):
            if self.encoding == 'guess':
                text, _ = guess_decode(text)
            elif self.encoding == 'chardet':
                try:
                    import chardet
                except ImportError as e:
                    raise ImportError(
                        'To enable chardet encoding guessing, '
                        'please install the chardet library '
                        'from http://chardet.feedparser.org/') from e
                # check for BOM first
                decoded = None
                for bom, encoding in _encoding_map:
                    if text.startswith(bom):
                        decoded = text[len(bom):].decode(encoding, 'replace')
                        break
                # no BOM found, so use chardet
                if decoded is None:
                    enc = chardet.detect(text[:1024])  # Guess using first 1KB
                    decoded = text.decode(
                        enc.get('encoding') or 'utf-8', 'replace')
                text = decoded
            else:
                text = text.decode(self.encoding)
                if text.startswith('\ufeff'):
                    text = text[len('\ufeff'):]
        else:
            if text.startswith('\ufeff'):
                text = text[len('\ufeff'):]

        # text now *is* a unicode string
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip('\n')
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith('\n'):
            text += '\n'

        def streamer():
            for _, t, v in self.get_tokens_unprocessed(text):
                yield t, v

        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream
Esempio n. 3
0
    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, unicode):
            if self.encoding == "guess":
                try:
                    text = text.decode("utf-8")
                    if text.startswith(u"\ufeff"):
                        text = text[len(u"\ufeff") :]
                except UnicodeDecodeError:
                    text = text.decode("latin1")
            elif self.encoding == "chardet":
                try:
                    import chardet
                except ImportError:
                    raise ImportError(
                        "To enable chardet encoding guessing, "
                        "please install the chardet library "
                        "from http://chardet.feedparser.org/"
                    )
                # check for BOM first
                decoded = None
                for bom, encoding in _encoding_map:
                    if text.startswith(bom):
                        decoded = unicode(text[len(bom) :], encoding, errors="replace")
                        break
                # no BOM found, so use chardet
                if decoded is None:
                    enc = chardet.detect(text[:1024])  # Guess using first 1KB
                    decoded = unicode(text, enc.get("encoding") or "utf-8", errors="replace")
                text = decoded
            else:
                text = text.decode(self.encoding)
        # text now *is* a unicode string
        text = text.replace("\r\n", "\n")
        text = text.replace("\r", "\n")
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip("\n")
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith("\n"):
            text += "\n"

        def streamer():
            for i, t, v in self.get_tokens_unprocessed(text):
                yield t, v

        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream
Esempio n. 4
0
    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, text_type):
            if self.encoding == 'guess':
                text, _ = guess_decode(text)
            elif self.encoding == 'chardet':
                try:
                    import chardet
                except ImportError:
                    raise ImportError('To enable chardet encoding guessing, '
                                      'please install the chardet library '
                                      'from http://chardet.feedparser.org/')
                # check for BOM first
                decoded = None
                for bom, encoding in _encoding_map:
                    if text.startswith(bom):
                        decoded = text[len(bom):].decode(encoding, 'replace')
                        break
                # no BOM found, so use chardet
                if decoded is None:
                    enc = chardet.detect(text[:1024])  # Guess using first 1KB
                    decoded = text.decode(enc.get('encoding') or 'utf-8',
                                          'replace')
                text = decoded
            else:
                text = text.decode(self.encoding)
                if text.startswith(u'\ufeff'):
                    text = text[len(u'\ufeff'):]
        else:
            if text.startswith(u'\ufeff'):
                text = text[len(u'\ufeff'):]

        # text now *is* a unicode string
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip('\n')
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith('\n'):
            text += '\n'

        def streamer():
            for _, t, v in self.get_tokens_unprocessed(text):
                yield t, v
        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream
Esempio n. 5
0
 def _filter_tokens(self, tokensource):
     for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]):
         if ttype in Token.Sql:
             if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open:
                 yield Token.Literal, re.sub(r"{stop}", "", value)
             else:
                 continue
         else:
             yield ttype, value
Esempio n. 6
0
 def _filter_tokens(self, tokensource):
     for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]):
         if ttype in Token.Sql:
             if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open:
                 yield Token.Literal, re.sub(r'{stop}', '', value)
             else:
                 continue
         else:
             yield ttype, value
def _pygments_get_tokens_postprocess(self, text, unfiltered=False):
    def streamer():
        for _, t, v in self.get_tokens_unprocessed(text):
            yield t, v

    stream = streamer()
    if not unfiltered:
        stream = apply_filters(stream, self.filters, self)
    return stream
Esempio n. 8
0
    def _format_lines(self, tokensource):
        detect_annotations = DetectAnnotationsFilter()

        for ttype, value in super()._format_lines(
            apply_filters(tokensource, [detect_annotations])
        ):
            yield ttype, value

        self.annotated = detect_annotations.annotated
Esempio n. 9
0
    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, unicode):
            if self.encoding == 'guess':
                try:
                    text = text.decode('utf-8')
                    if text.startswith(u'\ufeff'):
                        text = text[len(u'\ufeff'):]
                except UnicodeDecodeError:
                    text = text.decode('latin1')
            elif self.encoding == 'chardet':
                try:
                    import chardet
                except ImportError:
                    raise ImportError('To enable chardet encoding guessing, '
                                      'please install the chardet library '
                                      'from http://chardet.feedparser.org/')
                enc = chardet.detect(text)
                text = text.decode(enc['encoding'])
            else:
                text = text.decode(self.encoding)
        # text now *is* a unicode string
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip('\n')
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith('\n'):
            text += '\n'

        def streamer():
            for i, t, v in self.get_tokens_unprocessed(text):
                yield t, v

        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream
Esempio n. 10
0
    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, unicode):
            if self.encoding == 'guess':
                try:
                    text = text.decode('utf-8')
                    if text.startswith(u'\ufeff'):
                        text = text[len(u'\ufeff'):]
                except UnicodeDecodeError:
                    text = text.decode('utf-8')
            elif self.encoding == 'chardet':
                try:
                    import chardet
                except ImportError:
                    raise ImportError('To enable chardet encoding guessing, '
                                      'please install the chardet library '
                                      'from http://chardet.feedparser.org/')
                enc = chardet.detect(text)
                text = text.decode(enc['encoding'])
            else:
                text = text.decode(self.encoding)
        # text now *is* a unicode string
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip('\n')
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith('\n'):
            text += '\n'

        def streamer():
            for i, t, v in self.get_tokens_unprocessed(text):
                yield t, v
        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream
Esempio n. 11
0
    def _format_lines(self, tokensource):
        buf = []
        for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]):
            if ttype in Token.Sql:
                for t, v in HtmlFormatter._format_lines(self, iter(buf)):
                    yield t, v
                buf = []

                if ttype is Token.Sql:
                    yield 1, "<div class='show_sql'>%s</div>" % re.sub(r"(?:[{stop}|\n]*)$", "", value)
                elif ttype is Token.Sql.Link:
                    yield 1, "<a href='#' class='sql_link'>sql</a>"
                elif ttype is Token.Sql.Popup:
                    yield 1, "<div class='popup_sql'>%s</div>" % re.sub(r"(?:[{stop}|\n]*)$", "", value)
            else:
                buf.append((ttype, value))

        for t, v in _strip_trailing_whitespace(HtmlFormatter._format_lines(self, iter(buf))):
            yield t, v
Esempio n. 12
0
    def _format_lines(self, tokensource):
        sql_lexer = SqlLexer()

        formatter = HtmlFormatter(nowrap=True)
        buf = []
        for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]):
            if ttype in Token.Sql:

                for t, v in HtmlFormatter._format_lines(self, iter(buf)):
                    yield t, v
                buf = []

                if ttype is Token.Sql:
                    yield (
                        1,
                        "<div class='show_sql'>%s</div>"
                        % pygments.highlight(
                            re.sub(r"(?:{stop}|\n+)\s*$", "", value),
                            sql_lexer,
                            formatter,
                        ),
                    )
                elif ttype is Token.Sql.Link:
                    yield 1, "<a href='#' class='sql_link'>sql</a>"
                elif ttype is Token.Sql.Popup:
                    yield (
                        1,
                        "<div class='popup_sql'>%s</div>"
                        % pygments.highlight(
                            re.sub(r"(?:{stop}|\n+)$", "", value),
                            sql_lexer,
                            formatter,
                        ),
                    )
            else:
                buf.append((ttype, value))

        for t, v in _strip_trailing_whitespace(
            HtmlFormatter._format_lines(self, iter(buf))
        ):
            yield t, v