def handle_upload(self, file_object, description=None, document_type=None, expand=False, label=None, language=None, user=None): """ Handle an upload request from a file object which may be an individual document or a compressed file containing multiple documents. """ documents = [] if not document_type: document_type = self.document_type kwargs = { 'description': description, 'document_type': document_type, 'label': label, 'language': language, 'user': user } if expand: try: compressed_file = Archive.open(file_object=file_object) for compressed_file_child in compressed_file.members(): with compressed_file.open_member( filename=compressed_file_child) as file_object: kwargs.update( {'label': force_text(compressed_file_child)}) documents.append( self.upload_document(file_object=file_object, **kwargs)) except NoMIMETypeMatch: logger.debug(msg='Exception: NoMIMETypeMatch') documents.append( self.upload_document(file_object=file_object, **kwargs)) else: documents.append( self.upload_document(file_object=file_object, **kwargs)) # Return a list of newly created documents. Used by the email source # to assign the from and subject metadata values. return documents
def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, source_id, description=None, expand=False, label=None, language=None, querystring=None, skip_list=None, user_id=None): SharedUploadedFile = apps.get_model(app_label='common', model_name='SharedUploadedFile') DocumentType = apps.get_model(app_label='documents', model_name='DocumentType') try: document_type = DocumentType.objects.get(pk=document_type_id) shared_upload = SharedUploadedFile.objects.get( pk=shared_uploaded_file_id) if not label: label = shared_upload.filename except OperationalError as exception: logger.warning( 'Operational error during attempt to load data to handle source ' 'upload: %s. Retrying.', exception) raise self.retry(exc=exception) kwargs = { 'description': description, 'document_type_id': document_type.pk, 'label': label, 'language': language, 'querystring': querystring, 'source_id': source_id, 'user_id': user_id } if not skip_list: skip_list = [] with shared_upload.open() as file_object: if expand: try: compressed_file = Archive.open(file_object=file_object) for compressed_file_child in compressed_file.get_members(): # TODO: find way to uniquely identify child files # Use filename in the meantime. if force_text(compressed_file_child) not in skip_list: kwargs.update( {'label': force_text(compressed_file_child)}) try: child_shared_uploaded_file = SharedUploadedFile.objects.create( file=File(compressed_file_child)) except OperationalError as exception: logger.warning( 'Operational error while preparing to upload ' 'child document: %s. Rescheduling.', exception) # TODO: Don't call the task itself again # Update to use celery's retry feature task_source_handle_upload.delay( document_type_id=document_type_id, shared_uploaded_file_id=shared_uploaded_file_id, source_id=source_id, description=description, expand=expand, label=label, language=language, skip_list=skip_list, querystring=querystring, user_id=user_id) return else: skip_list.append(force_text(compressed_file_child)) task_upload_document.delay( shared_uploaded_file_id= child_shared_uploaded_file.pk, **kwargs) finally: compressed_file_child.close() compressed_file_child.close() try: shared_upload.delete() except OperationalError as exception: logger.warning( 'Operational error during attempt to delete shared ' 'upload file: %s; %s. Retrying.', shared_upload, exception) except NoMIMETypeMatch: logger.debug('Exception: NoMIMETypeMatch') task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs) else: task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs)