Beispiel #1
0
    def text(self, clean=True):
        """ 
        Returns the text of the PDF as a single string.
        Options:

          :clean:
            Removes misc cruft, like lots of whitespace.
        """
        if clean:
            return utils.normalise_whitespace(''.join(self))
        else:
            return ''.join(self) 
Beispiel #2
0
    def text(self, clean=True, stringify=True):
        """ 
        Returns the text of the PDF as a single string.
        Options:

          :clean:
            Removes misc cruft, like lots of whitespace.
        """
        as_text = b''.join(self)
        if stringify and sys.version_info.major > 2:
            # Modern Python; detect encoding and cast to string
            guessed_encoding = chardet.detect(as_text)
            as_text = as_text.decode(guessed_encoding['encoding'])
        if clean:
            return utils.normalise_whitespace(as_text)
        else:
            return as_text