예제 #1
0
    def handle_upload(self,
                      file_object,
                      description=None,
                      document_type=None,
                      expand=False,
                      label=None,
                      language=None,
                      user=None):
        """
        Handle an upload request from a file object which may be an individual
        document or a compressed file containing multiple documents.
        """
        documents = []
        if not document_type:
            document_type = self.document_type

        kwargs = {
            'description': description,
            'document_type': document_type,
            'label': label,
            'language': language,
            'user': user
        }

        if expand:
            try:
                compressed_file = Archive.open(file_object=file_object)
                for compressed_file_child in compressed_file.members():
                    with compressed_file.open_member(
                            filename=compressed_file_child) as file_object:
                        kwargs.update(
                            {'label': force_text(compressed_file_child)})
                        documents.append(
                            self.upload_document(file_object=file_object,
                                                 **kwargs))
            except NoMIMETypeMatch:
                logger.debug(msg='Exception: NoMIMETypeMatch')
                documents.append(
                    self.upload_document(file_object=file_object, **kwargs))
        else:
            documents.append(
                self.upload_document(file_object=file_object, **kwargs))

        # Return a list of newly created documents. Used by the email source
        # to assign the from and subject metadata values.
        return documents
예제 #2
0
def task_source_handle_upload(self,
                              document_type_id,
                              shared_uploaded_file_id,
                              source_id,
                              description=None,
                              expand=False,
                              label=None,
                              language=None,
                              querystring=None,
                              skip_list=None,
                              user_id=None):
    SharedUploadedFile = apps.get_model(app_label='common',
                                        model_name='SharedUploadedFile')

    DocumentType = apps.get_model(app_label='documents',
                                  model_name='DocumentType')

    try:
        document_type = DocumentType.objects.get(pk=document_type_id)
        shared_upload = SharedUploadedFile.objects.get(
            pk=shared_uploaded_file_id)

        if not label:
            label = shared_upload.filename

    except OperationalError as exception:
        logger.warning(
            'Operational error during attempt to load data to handle source '
            'upload: %s. Retrying.', exception)
        raise self.retry(exc=exception)

    kwargs = {
        'description': description,
        'document_type_id': document_type.pk,
        'label': label,
        'language': language,
        'querystring': querystring,
        'source_id': source_id,
        'user_id': user_id
    }

    if not skip_list:
        skip_list = []

    with shared_upload.open() as file_object:
        if expand:
            try:
                compressed_file = Archive.open(file_object=file_object)
                for compressed_file_child in compressed_file.get_members():
                    # TODO: find way to uniquely identify child files
                    # Use filename in the meantime.
                    if force_text(compressed_file_child) not in skip_list:
                        kwargs.update(
                            {'label': force_text(compressed_file_child)})

                        try:
                            child_shared_uploaded_file = SharedUploadedFile.objects.create(
                                file=File(compressed_file_child))
                        except OperationalError as exception:
                            logger.warning(
                                'Operational error while preparing to upload '
                                'child document: %s. Rescheduling.', exception)

                            # TODO: Don't call the task itself again
                            # Update to use celery's retry feature
                            task_source_handle_upload.delay(
                                document_type_id=document_type_id,
                                shared_uploaded_file_id=shared_uploaded_file_id,
                                source_id=source_id,
                                description=description,
                                expand=expand,
                                label=label,
                                language=language,
                                skip_list=skip_list,
                                querystring=querystring,
                                user_id=user_id)
                            return
                        else:
                            skip_list.append(force_text(compressed_file_child))
                            task_upload_document.delay(
                                shared_uploaded_file_id=
                                child_shared_uploaded_file.pk,
                                **kwargs)
                        finally:
                            compressed_file_child.close()

                    compressed_file_child.close()
                try:
                    shared_upload.delete()
                except OperationalError as exception:
                    logger.warning(
                        'Operational error during attempt to delete shared '
                        'upload file: %s; %s. Retrying.', shared_upload,
                        exception)
            except NoMIMETypeMatch:
                logger.debug('Exception: NoMIMETypeMatch')
                task_upload_document.delay(
                    shared_uploaded_file_id=shared_upload.pk, **kwargs)
        else:
            task_upload_document.delay(
                shared_uploaded_file_id=shared_upload.pk, **kwargs)