def replace(self, new_content, new_mime_type): """ Replace the content of the current document with new content and mime_type """ if self.replaced_by: raise ValueError( "cannot replace a document that is already replaced") from indivo.document_processing.document_processing import DocumentProcessing new_doc = DocumentProcessing(new_content, new_mime_type) if not new_doc.is_binary: # set content and mime_type self.content = new_doc.content self.mime_type = new_mime_type # empty out derived fields so that doc processing will repopulate them self.type = None self.size = None self.digest = None else: # Why aren't we doing anything for binaries? pass self.processed = False # We have changed the content, which now needs processing self.save() return True
def _setupargs(self, attachment_num=1, message=None, content='<?xml version="1.0" ?><body></body>', size=None, type=None): self.message = message self.attachment_num = attachment_num self.content = content self.size = size or len(content) self.type = type or DocumentProcessing(content, 'application/xml').xml_type
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ fobjs_to_update = [] if not self.processed: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Update fact docs as Necessary if hasattr(doc, 'f_objs'): for fobj in doc.f_objs: # Delete fact objects from the document we are replacing if self.replaces: fobj.__class__.objects.filter( document=self.replaces).delete() # we can't update here, since we don't have an id yet if fobj: fobjs_to_update.append(fobj) # Update document info based on processing if doc.is_binary: self.content = None self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest( ) # Mark document as processed self.processed = True super(Document, self).save(*args, **kwargs) # Update newly created Fact objs, if any for fobj in fobjs_to_update: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self self.save()
def replace(self, new_content, new_mime_type): """ Replace the content of the current document with new content and mime_type """ if self.replaced_by: raise Exception( "cannot replace a document that is already replaced") from indivo.document_processing.document_processing import DocumentProcessing new_doc = DocumentProcessing(new_content, new_mime_type) if not new_doc.is_binary: self.type = new_doc.get_document_schema() self.digest = new_doc.get_document_digest() self.size = new_doc.get_document_size() self.content = new_doc.content else: # Why aren't we doing anything for binaries? pass self.save() return True
def add_attachment(self, attachment_num, content): """ attachment_num is 1-indexed """ if int(attachment_num) > self.num_attachments: raise Exception("attachment num is too high") mime_type = 'application/xml' # Only handle XML attachments for now from indivo.document_processing.document_processing import DocumentProcessing doc_utils = DocumentProcessing(content, mime_type) attachment = MessageAttachment.objects.create( message=self, content=content, size=doc_utils.size, type=doc_utils.fqn, attachment_num=attachment_num) return attachment
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ if self.processed: doc = None # Nothing to do here else: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Delete fact objects from the document we are replacing if self.replaces: from indivo.models import Fact Fact.objects.filter(document=self.replaces).delete() # Update document info based on processing self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest( ) # Create our content file if we are binary cf = None if doc.is_binary: self.content = None cf = ContentFile(self.content) # Oracle is incompatible with multi-column unique constraints where # one column might be null (i.e., UNIQUE(record, external_id)). # We therefore insure that all Documents have an external id, # mirroring the internal id if none was passed in. # Set the external_id to a random uuid so that we can save it to the # db before it has an internal id if not self.external_id: self.external_id = 'TEMP-EXTID' + str(uuid.uuid4()) super(Document, self).save(*args, **kwargs) # Will we need to rewrite this to the DB after changes? save_again = False # Now that we have an id, we can handle any document-processing stuff that requires an id if not self.processed: # save our content file if we were binary, now that we have an id. if cf: self.content_file.save( self.id, cf, save=False ) # Don't force a save now, as we will resave later # We can also mark the document we are replacing as replaced by us if self.replaces: self.replaces.replaced_by = self self.replaces.save() # Mark document as processed self.processed = True save_again = True # If we set a temporary external_id, set it to mirror the internal id if self.external_id.startswith('TEMP-EXTID'): self.external_id = self.id save_again = True # Update newly created Fact objs, if we created any if doc and hasattr(doc, 'f_objs'): for fobj in doc.f_objs: if fobj: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self save_again = True if save_again: self.save()
def _document_create(creator, content, pha, record, replaces_document=None, external_id=None, mime_type=None, status=None): """ Create an Indivo Document This is called for both document creation within a record and document creation within a record for a specific application. The PHA argument, if non-null, indicates app-specificity only. By this point, the external_id should be fully formed. FIXME: figure out the transactional aspect here If status is specified, then it is used, otherwise it is not specified and the DB does its default thing. """ new_doc = None # Overwrite content if we are replacing an existing PHA doc if pha and replaces_document: replaces_document.replace(content, mime_type) # Create new document else: creator = creator.effective_principal doc_args = { PHA: pha, RECORD: record, CREATOR: creator, MIME_TYPE: mime_type, EXTERNAL_ID: external_id, REPLACES: replaces_document, CONTENT: content, ORIGINAL_ID: replaces_document.original_id if replaces_document else None } if status: create_args[STATUS] = status # create the document new_doc = Document.objects.create(**doc_args) # Save the binary file if DocumentProcessing(content, mime_type).is_binary: file = ContentFile(content) new_doc.content_file.save(new_doc.id, file) # Mark old doc as replaced if replaces_document: replaces_document.replaced_by = new_doc replaces_document.save() # return new doc if we have it, otherwise updated old doc return new_doc or replaces_document