Example #1
0
  def _update(self, data, mime_type=None):
    new_digest = hashlib.md5(data).hexdigest()
    if new_digest == self.digest:
      return

    self.digest = new_digest
    self.data = data
    self.size = len(data)
    if mime_type:
      self.mime_type = mime_type
    # TODO Else: use a sniffer

    # TODO: This should be asynchronous
    if self.mime_type != "application/pdf":
      try:
        self.pdf = converter.to_pdf(self.digest, self.data, self.mime_type)
      except ConversionError:
        traceback.print_exc()
    else:
      self.pdf = self.data

    try:
      self.text = converter.to_text(self.digest, self.data, self.mime_type)
    except ConversionError:
      self.text = u""
      traceback.print_exc()

    try:
      self.extra_metadata = converter.get_metadata(self.digest, self.data, self.mime_type)
    except ConversionError:
      self.extra_metadata = {}
      traceback.print_exc()

    if self.text:
      self.language = guessLanguageName(self.text)

    self.page_num = self.extra_metadata.get("PDF:Pages", 1)
Example #2
0
 def test_image_to_pdf(self):
     blob = self.read_file("picture.jpg")
     pdf = converter.to_pdf("", blob, "image/jpeg")
     eq_("application/pdf", mime_sniffer.from_buffer(pdf))
Example #3
0
 def XXXtest_word_to_pdf(self):
     blob = self.read_file("test.doc")
     pdf = converter.to_pdf("", blob, "application/msword")
     eq_("application/pdf", mime_sniffer.from_buffer(pdf))
Example #4
0
 def XXXtest_odt_to_pdf(self):
     blob = self.read_file("test.odt")
     pdf = converter.to_pdf("", blob, "application/vnd.oasis.opendocument.text")
     eq_("application/pdf", mime_sniffer.from_buffer(pdf))