Example #1
0
def extract_from_file(sender, instance, **kwargs):
    if Tika.ping():
        tika_handle = Tika.from_file(instance.original_file.path)
        doc_text = UnicodeDammit(tika_handle.text()).unicode_markup
        instance.extracted_text = doc_text
        instance.extracted_html = tika_handle.html()
        instance.metadata = tika_handle.meta()
Example #2
0
 def tika_handle(self):
     return (Tika.ping() and self.path) \
         and Tika.from_file(self.path) \
         or None