Example #1
0
def _consume(filepath):
    if os.path.isdir(filepath):
        return

    if not os.path.isfile(filepath):
        logger.debug(f"Not consuming file {filepath}: File has moved.")
        return

    if not is_file_ext_supported(os.path.splitext(filepath)[1]):
        logger.warning(
            f"Not consuming file {filepath}: Unknown file extension.")
        return

    tag_ids = None
    try:
        if settings.CONSUMER_SUBDIRS_AS_TAGS:
            tag_ids = _tags_from_path(filepath)
    except Exception as e:
        logger.error("Error creating tags from path: {}".format(e))

    try:
        async_task("documents.tasks.consume_file",
                   filepath,
                   override_tag_ids=tag_ids if tag_ids else None,
                   task_name=os.path.basename(filepath)[:100])
    except Exception as e:
        # Catch all so that the consumer won't crash.
        # This is also what the test case is listening for to check for
        # errors.
        logger.error("Error while consuming document: {}".format(e))
Example #2
0
    def test_file_extensions(self):

        for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
            self.assertIn(ext, get_supported_file_extensions())
        self.assertEqual(get_default_file_extension('application/pdf'), ".pdf")
        self.assertEqual(get_default_file_extension('image/png'), ".png")
        self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg")
        self.assertEqual(get_default_file_extension('text/plain'), ".txt")
        self.assertEqual(get_default_file_extension('text/csv'), ".csv")
        self.assertEqual(get_default_file_extension('application/zip'), ".zip")
        self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), "")

        self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
        self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
        self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)

        self.assertTrue(is_file_ext_supported('.pdf'))
        self.assertFalse(is_file_ext_supported('.hsdfh'))