def _update(self, data, mime_type=None): new_digest = hashlib.md5(data).hexdigest() if new_digest == self.digest: return self.digest = new_digest self.data = data self.size = len(data) if mime_type: self.mime_type = mime_type # TODO Else: use a sniffer # TODO: This should be asynchronous if self.mime_type != "application/pdf": try: self.pdf = converter.to_pdf(self.digest, self.data, self.mime_type) except ConversionError: traceback.print_exc() else: self.pdf = self.data try: self.text = converter.to_text(self.digest, self.data, self.mime_type) except ConversionError: self.text = u"" traceback.print_exc() try: self.extra_metadata = converter.get_metadata(self.digest, self.data, self.mime_type) except ConversionError: self.extra_metadata = {} traceback.print_exc() if self.text: self.language = guessLanguageName(self.text) self.page_num = self.extra_metadata.get("PDF:Pages", 1)
def test_image_to_pdf(self): blob = self.read_file("picture.jpg") pdf = converter.to_pdf("", blob, "image/jpeg") eq_("application/pdf", mime_sniffer.from_buffer(pdf))
def XXXtest_word_to_pdf(self): blob = self.read_file("test.doc") pdf = converter.to_pdf("", blob, "application/msword") eq_("application/pdf", mime_sniffer.from_buffer(pdf))
def XXXtest_odt_to_pdf(self): blob = self.read_file("test.odt") pdf = converter.to_pdf("", blob, "application/vnd.oasis.opendocument.text") eq_("application/pdf", mime_sniffer.from_buffer(pdf))