def _process_doc(content, pha): """process the document into medical facts. exceptions are passed up without processing.""" if content: doc = DocumentProcessing(content) doc.is_binary = DocumentUtils().is_binary(content) if not pha: doc.process() doc.get_document_schema() return doc return False
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ fobjs_to_update = [] if not self.processed: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Update fact docs as Necessary if hasattr(doc, 'f_objs'): for fobj in doc.f_objs: # Delete fact objects from the document we are replacing if self.replaces: fobj.__class__.objects.filter( document=self.replaces).delete() # we can't update here, since we don't have an id yet if fobj: fobjs_to_update.append(fobj) # Update document info based on processing if doc.is_binary: self.content = None self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest( ) # Mark document as processed self.processed = True super(Document, self).save(*args, **kwargs) # Update newly created Fact objs, if any for fobj in fobjs_to_update: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self self.save()
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ fobjs_to_update = [] if not self.processed: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Update fact docs as Necessary if hasattr(doc, 'f_objs'): for fobj in doc.f_objs: # Delete fact objects from the document we are replacing if self.replaces: fobj.__class__.objects.filter(document = self.replaces).delete() # we can't update here, since we don't have an id yet if fobj: fobjs_to_update.append(fobj) # Update document info based on processing if doc.is_binary: self.content = None self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest() # Mark document as processed self.processed = True super(Document,self).save(*args, **kwargs) # Update newly created Fact objs, if any for fobj in fobjs_to_update: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self self.save()
def replace(self, new_content, new_mime_type): """ Replace the content of the current document with new content and mime_type """ if self.replaced_by: raise Exception("cannot replace a document that is already replaced") from indivo.document_processing.document_processing import DocumentProcessing new_doc = DocumentProcessing(new_content, new_mime_type) if not new_doc.is_binary: self.type = new_doc.get_document_schema() self.digest = new_doc.get_document_digest() self.size = new_doc.get_document_size() self.content = new_doc.content else: # Why aren't we doing anything for binaries? pass self.save() return True
def replace(self, new_content, new_mime_type): """ Replace the content of the current document with new content and mime_type """ if self.replaced_by: raise Exception( "cannot replace a document that is already replaced") from indivo.document_processing.document_processing import DocumentProcessing new_doc = DocumentProcessing(new_content, new_mime_type) if not new_doc.is_binary: self.type = new_doc.get_document_schema() self.digest = new_doc.get_document_digest() self.size = new_doc.get_document_size() self.content = new_doc.content else: # Why aren't we doing anything for binaries? pass self.save() return True
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ if self.processed: doc = None # Nothing to do here else: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Delete fact objects from the document we are replacing if self.replaces: from indivo.models import Fact Fact.objects.filter(document = self.replaces).delete() # Update document info based on processing self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest() # Create our content file if we are binary cf = None if doc.is_binary: cf = ContentFile(self.content) self.content = None # Oracle is incompatible with multi-column unique constraints where # one column might be null (i.e., UNIQUE(record, external_id)). # We therefore insure that all Documents have an external id, # mirroring the internal id if none was passed in. # Set the external_id to a random uuid so that we can save it to the # db before it has an internal id if not self.external_id: self.external_id = 'TEMP-EXTID' + str(uuid.uuid4()) super(Document,self).save(*args, **kwargs) # Will we need to rewrite this to the DB after changes? save_again = False # Now that we have an id, we can handle any document-processing stuff that requires an id if not self.processed: # save our content file if we were binary, now that we have an id. if cf: self.content_file.save(self.id, cf, save=False) # Don't force a save now, as we will resave later # We can also mark the document we are replacing as replaced by us if self.replaces: self.replaces.replaced_by = self self.replaces.save() # Mark document as processed self.processed = True save_again = True # If we set a temporary external_id, set it to mirror the internal id if self.external_id.startswith('TEMP-EXTID'): self.external_id = self.id save_again = True # Update newly created Fact objs, if we created any if doc and hasattr(doc, 'f_objs'): for fobj in doc.f_objs: if fobj: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self save_again = True if save_again: self.save()
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ if self.processed: doc = None # Nothing to do here else: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Delete fact objects from the document we are replacing if self.replaces: from indivo.models import Fact Fact.objects.filter(document=self.replaces).delete() # Update document info based on processing self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest( ) # Create our content file if we are binary cf = None if doc.is_binary: self.content = None cf = ContentFile(self.content) # Oracle is incompatible with multi-column unique constraints where # one column might be null (i.e., UNIQUE(record, external_id)). # We therefore insure that all Documents have an external id, # mirroring the internal id if none was passed in. # Set the external_id to a random uuid so that we can save it to the # db before it has an internal id if not self.external_id: self.external_id = 'TEMP-EXTID' + str(uuid.uuid4()) super(Document, self).save(*args, **kwargs) # Will we need to rewrite this to the DB after changes? save_again = False # Now that we have an id, we can handle any document-processing stuff that requires an id if not self.processed: # save our content file if we were binary, now that we have an id. if cf: self.content_file.save( self.id, cf, save=False ) # Don't force a save now, as we will resave later # We can also mark the document we are replacing as replaced by us if self.replaces: self.replaces.replaced_by = self self.replaces.save() # Mark document as processed self.processed = True save_again = True # If we set a temporary external_id, set it to mirror the internal id if self.external_id.startswith('TEMP-EXTID'): self.external_id = self.id save_again = True # Update newly created Fact objs, if we created any if doc and hasattr(doc, 'f_objs'): for fobj in doc.f_objs: if fobj: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self save_again = True if save_again: self.save()
def save(self, *args, **kwargs): """ Handle document processing whenever a new document is created. This method processes the document, updates fact objects, and then saves the document """ fobjs_to_update = [] if not self.processed: # import dynamically because DocumentProcessing imports DocumentSchema from this file from indivo.document_processing.document_processing import DocumentProcessing doc = DocumentProcessing(self.content, self.mime_type) # Process the Doc, if necessary if not self.pha and self.content: doc.process() # Update fact docs as Necessary if hasattr(doc, 'f_objs'): for fobj in doc.f_objs: # Delete fact objects from the document we are replacing if self.replaces: fobj.__class__.objects.filter(document = self.replaces).delete() # we can't update here, since we don't have an id yet if fobj: fobjs_to_update.append(fobj) # Update document info based on processing if doc.is_binary: self.content = None self.type = self.type if self.type else doc.get_document_schema() self.size = self.size if self.size else doc.get_document_size() self.digest = self.digest if self.digest else doc.get_document_digest() # Mark document as processed self.processed = True # Oracle is incompatible with multi-column unique constraints where # one column might be null (i.e., UNIQUE(record, external_id)). # We therefore insure that all Documents have an external id, # mirroring the internal id if none was passed in. # Set the external_id to a random uuid so that we can save it to the # db before it has an internal id if not self.external_id: self.external_id = 'TEMP-EXTID' + str(uuid.uuid4()) super(Document,self).save(*args, **kwargs) # Do we need to rewrite this to the DB after changes? save_again = False # If we set a temporary external_id, set it to mirror the internal id if self.external_id.startswith('TEMP-EXTID'): self.external_id = self.id save_again = True # Update newly created Fact objs, if any for fobj in fobjs_to_update: fobj.document = self fobj.record = self.record fobj.save() if not self.original: self.original = self save_again = True if save_again: self.save()