Example #1
0
    def import_file(filepath,
                    username=None,
                    file_title=None,
                    inbox_title="Inbox",
                    delete_after_import=False,
                    start_ocr_async=True,
                    upload=True):
        """
        Gets as input a path to a file on a local file system and:
            1. creates a document instance (if there is a available space).
            2. Copies file to doc_instance.url()
            3. (optionally) uploads the document to S3 storage.
            4. (optionally) starts ocr_async task.

        Is used on customers instance by:
            * import_file command - to import files from SFTP directory
            * import_attachment command - to import attachments from mailbox
        """
        logger.debug(f"Importing file {filepath}")

        if username is None:
            user = get_root_user()
        else:
            user = User.objects.get(username=username)

        if file_title is None:
            file_title = get_file_title(filepath)

        if not is_storage_left(filepath, user=user):
            logger.error(f"user.username reached his disk quota")
            return False

        lang = Document.get_default_language()
        # get_pagecount() might raise an exception in case
        # file is either wrong (not a PDF) or not yet
        # completed to upload
        try:
            page_count = get_pagecount(filepath)
        except Exception:
            # which means that document is not yet fully
            # uploaded by SFTP client.
            logger.error(f"File {filepath} not yet ready for importing.")
            return False

        inbox, _ = Folder.objects.get_or_create(title=inbox_title,
                                                parent=None,
                                                user=user)
        doc = Document.create_document(user=user,
                                       title=file_title,
                                       size=get_file_size(filepath),
                                       lang=lang,
                                       file_name=file_title,
                                       parent_id=inbox.id,
                                       page_count=page_count)
        logger.debug(f"Uploading file {filepath} to {doc.doc_ep.url()}")
        # Import file is executed as root (import-file.service)
        # (because import-file need to access/delete sftp files, folder
        # as of another system user)
        # Thus, after copying file into (newly created) folders,
        # it need to change permissions (of newly created files and folders)
        # to the app_user/app_group.
        copy2doc_url(src_file_path=filepath,
                     doc_url=doc.doc_ep.url(),
                     user=settings.APP_USER,
                     group=settings.APP_GROUP)

        if upload and settings.S3:
            upload_document_to_s3(doc.doc_ep)

        if start_ocr_async and settings.OCR:
            Document.ocr_async(document=doc,
                               page_count=page_count,
                               lang=lang,
                               s3_enabled=settings.S3)

        if delete_after_import:
            os.remove(filepath)

        return True
Example #2
0
    def post(self, request):

        files = request.FILES.getlist('file')
        if not files:
            logger.warning("POST request.FILES is empty. Forgot adding file?")

        if len(files) > 1:
            logger.warning("More then one files per ajax? how come?")
            return HttpResponse(json.dumps({}),
                                content_type="application/json",
                                status_code=400)

        f = files[0]

        logger.debug("upload for f=%s user=%s", f, request.user)

        if not is_storage_left(f.temporary_file_path()):
            logger.warning("Storage is full for user=%s.", request.user)
            msg = "Cannot upload file {}. Storage is full.".format(f.name)

            return HttpResponse(json.dumps({'error': msg}),
                                status=400,
                                content_type="application/json")

        user = request.user
        size = os.path.getsize(f.temporary_file_path())
        parent_id = request.POST.get('parent', "-1")
        if parent_id and "-1" in parent_id:
            parent_id = None

        lang = request.POST.get('language')
        notes = request.POST.get('notes')
        page_count = get_pagecount(f.temporary_file_path())
        logger.info("creating document {}".format(f.name))

        doc = Document.create_document(user=user,
                                       title=f.name,
                                       size=size,
                                       lang=lang,
                                       file_name=f.name,
                                       parent_id=parent_id,
                                       notes=notes,
                                       page_count=page_count)
        logger.debug("uploading to {}".format(doc.doc_ep.url()))

        copy2doc_url(src_file_path=f.temporary_file_path(),
                     doc_url=doc.doc_ep.url())

        if settings.S3:
            upload_document_to_s3(doc.doc_ep)

        if settings.OCR:
            Document.ocr_async(document=doc, page_count=page_count, lang=lang)

        # upload only one file at time.
        # after each upload return a json object with
        # following fields:
        #
        # - title
        # - preview_url
        # - doc_id
        # - action_url  -> needed for renaming/deleting selected item
        #
        # with that info a new thumbnail will be created.

        action_url = reverse('boss:core_basetreenode_change', args=(doc.id, ))

        preview_url = reverse('core:preview', args=(doc.id, 200, 1))

        result = {
            'title': doc.title,
            'doc_id': doc.id,
            'action_url': action_url,
            'preview_url': preview_url
        }
        logger.info("and response is!")
        return HttpResponse(json.dumps(result),
                            content_type="application/json")