def text(self, clean=True): """ Returns the text of the PDF as a single string. Options: :clean: Removes misc cruft, like lots of whitespace. """ if clean: return utils.normalise_whitespace(''.join(self)) else: return ''.join(self)
def text(self, clean=True, stringify=True): """ Returns the text of the PDF as a single string. Options: :clean: Removes misc cruft, like lots of whitespace. """ as_text = b''.join(self) if stringify and sys.version_info.major > 2: # Modern Python; detect encoding and cast to string guessed_encoding = chardet.detect(as_text) as_text = as_text.decode(guessed_encoding['encoding']) if clean: return utils.normalise_whitespace(as_text) else: return as_text