def _consume(filepath): if os.path.isdir(filepath): return if not os.path.isfile(filepath): logger.debug(f"Not consuming file {filepath}: File has moved.") return if not is_file_ext_supported(os.path.splitext(filepath)[1]): logger.warning( f"Not consuming file {filepath}: Unknown file extension.") return tag_ids = None try: if settings.CONSUMER_SUBDIRS_AS_TAGS: tag_ids = _tags_from_path(filepath) except Exception as e: logger.error("Error creating tags from path: {}".format(e)) try: async_task("documents.tasks.consume_file", filepath, override_tag_ids=tag_ids if tag_ids else None, task_name=os.path.basename(filepath)[:100]) except Exception as e: # Catch all so that the consumer won't crash. # This is also what the test case is listening for to check for # errors. logger.error("Error while consuming document: {}".format(e))
def test_file_extensions(self): for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]: self.assertIn(ext, get_supported_file_extensions()) self.assertEqual(get_default_file_extension('application/pdf'), ".pdf") self.assertEqual(get_default_file_extension('image/png'), ".png") self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg") self.assertEqual(get_default_file_extension('text/plain'), ".txt") self.assertEqual(get_default_file_extension('text/csv'), ".csv") self.assertEqual(get_default_file_extension('application/zip'), ".zip") self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), "") self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser) self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser) self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None) self.assertTrue(is_file_ext_supported('.pdf')) self.assertFalse(is_file_ext_supported('.hsdfh'))