Exemple #1
0
 def parse_file(self, file: File, fallback_city: str):
     logging.info("- Parsing: " + str(file.id) + " (" + file.name + ")")
     with minio_client().get_object(minio_file_bucket,
                                    str(file.id)) as file_handle:
         recognized_text = get_ocr_text_from_pdf(file_handle.read())
     if len(recognized_text) > 0:
         file.parsed_text = cleanup_extracted_text(recognized_text)
         file.mentioned_persons = extract_persons(file.name + "\n" +
                                                  (recognized_text or "") +
                                                  "\n")
         file.locations.set(
             extract_locations(file.parsed_text, fallback_city))
         file.save()
     else:
         logging.warning("Nothing recognized")
Exemple #2
0
 def parse_file(self, file: File):
     logging.info("- Parsing: " + str(file.id) + " (" + file.name + ")")
     file_path = os.path.abspath(os.path.dirname(__name__))
     file_path = os.path.join(file_path, settings.MEDIA_ROOT,
                              file.storage_filename)
     recognized_text = get_ocr_text_from_pdf(file_path)
     if len(recognized_text) > 0:
         file.parsed_text = cleanup_extracted_text(recognized_text)
         file.mentioned_persons = extract_persons(file.name + "\n" +
                                                  (recognized_text or "") +
                                                  "\n")
         file.locations = extract_locations(recognized_text)
         file.save()
     else:
         logging.warning("Nothing recognized")
Exemple #3
0
 def parse_file(self, file: File, fallback_city: str):
     self.stdout.write("Parsing: " + str(file.id) + " (" + file.name + ")")
     locations = extract_locations(file.parsed_text, fallback_city)
     self.stdout.write("{} locations found".format(len(locations)))
     file.locations.set(locations)
     file.save()
Exemple #4
0
 def parse_file(self, file: File):
     logging.info("- Parsing: " + str(file.id) + " (" + file.name + ")")
     file.mentioned_persons = extract_persons(file.name + "\n" + (file.parsed_text or "") + "\n")
     file.save()