Esempio n. 1
0
def _process_doc(content, pha):
    """process the document into medical facts. exceptions are passed up without processing."""
    if content:
        doc = DocumentProcessing(content)
        doc.is_binary = DocumentUtils().is_binary(content)
        if not pha:
            doc.process()
        doc.get_document_schema()
        return doc

    return False
Esempio n. 2
0
    def save(self, *args, **kwargs):
        """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
        fobjs_to_update = []
        if not self.processed:
            # import dynamically because DocumentProcessing imports DocumentSchema from this file
            from indivo.document_processing.document_processing import DocumentProcessing
            doc = DocumentProcessing(self.content, self.mime_type)

            # Process the Doc, if necessary
            if not self.pha and self.content:
                doc.process()

            # Update fact docs as Necessary
            if hasattr(doc, 'f_objs'):
                for fobj in doc.f_objs:

                    # Delete fact objects from the document we are replacing
                    if self.replaces:
                        fobj.__class__.objects.filter(
                            document=self.replaces).delete()

                    # we can't update here, since we don't have an id yet
                    if fobj:
                        fobjs_to_update.append(fobj)

            # Update document info based on processing
            if doc.is_binary:
                self.content = None
            self.type = self.type if self.type else doc.get_document_schema()
            self.size = self.size if self.size else doc.get_document_size()
            self.digest = self.digest if self.digest else doc.get_document_digest(
            )

            # Mark document as processed
            self.processed = True

        super(Document, self).save(*args, **kwargs)

        # Update newly created Fact objs, if any
        for fobj in fobjs_to_update:
            fobj.document = self
            fobj.record = self.record
            fobj.save()

        if not self.original:
            self.original = self
            self.save()
  def save(self, *args, **kwargs):
    """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
    fobjs_to_update = []
    if not self.processed:
      # import dynamically because DocumentProcessing imports DocumentSchema from this file
      from indivo.document_processing.document_processing import DocumentProcessing
      doc = DocumentProcessing(self.content, self.mime_type)

      # Process the Doc, if necessary
      if not self.pha and self.content:
        doc.process()

      # Update fact docs as Necessary
      if hasattr(doc, 'f_objs'):
        for fobj in doc.f_objs:

          # Delete fact objects from the document we are replacing
          if self.replaces:
            fobj.__class__.objects.filter(document = self.replaces).delete()

          # we can't update here, since we don't have an id yet
          if fobj:
            fobjs_to_update.append(fobj)

      # Update document info based on processing
      if doc.is_binary:
        self.content = None
      self.type = self.type if self.type else doc.get_document_schema()
      self.size = self.size if self.size else doc.get_document_size()
      self.digest = self.digest if self.digest else doc.get_document_digest()

      # Mark document as processed
      self.processed = True

    super(Document,self).save(*args, **kwargs)

    # Update newly created Fact objs, if any
    for fobj in fobjs_to_update:
      fobj.document = self
      fobj.record   = self.record
      fobj.save()

    if not self.original:
      self.original = self
      self.save()
  def replace(self, new_content, new_mime_type):
    """
    Replace the content of the current document with new content and mime_type
    """
    if self.replaced_by:
      raise Exception("cannot replace a document that is already replaced")

    from indivo.document_processing.document_processing import DocumentProcessing
    new_doc = DocumentProcessing(new_content, new_mime_type)
    if not new_doc.is_binary:
      self.type = new_doc.get_document_schema()
      self.digest = new_doc.get_document_digest()
      self.size = new_doc.get_document_size()
      self.content = new_doc.content
    else:
      # Why aren't we doing anything for binaries?
      pass
    self.save()
    return True
Esempio n. 5
0
    def replace(self, new_content, new_mime_type):
        """
    Replace the content of the current document with new content and mime_type
    """
        if self.replaced_by:
            raise Exception(
                "cannot replace a document that is already replaced")

        from indivo.document_processing.document_processing import DocumentProcessing
        new_doc = DocumentProcessing(new_content, new_mime_type)
        if not new_doc.is_binary:
            self.type = new_doc.get_document_schema()
            self.digest = new_doc.get_document_digest()
            self.size = new_doc.get_document_size()
            self.content = new_doc.content
        else:
            # Why aren't we doing anything for binaries?
            pass
        self.save()
        return True
  def save(self, *args, **kwargs):
    """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
    if self.processed:
      doc = None # Nothing to do here

    else:
      # import dynamically because DocumentProcessing imports DocumentSchema from this file
      from indivo.document_processing.document_processing import DocumentProcessing
      doc = DocumentProcessing(self.content, self.mime_type)

      # Process the Doc, if necessary
      if not self.pha and self.content:
        doc.process()

      # Delete fact objects from the document we are replacing
      if self.replaces:
        from indivo.models import Fact
        Fact.objects.filter(document = self.replaces).delete()

      # Update document info based on processing
      self.type = self.type if self.type else doc.get_document_schema()
      self.size = self.size if self.size else doc.get_document_size()
      self.digest = self.digest if self.digest else doc.get_document_digest()

      # Create our content file if we are binary
      cf = None
      if doc.is_binary:
        cf = ContentFile(self.content)
        self.content = None
        
    # Oracle is incompatible with multi-column unique constraints where
    # one column might be null (i.e., UNIQUE(record, external_id)).
    # We therefore insure that all Documents have an external id,
    # mirroring the internal id if none was passed in.
  
    # Set the external_id to a random uuid so that we can save it to the
    # db before it has an internal id
    if not self.external_id:
      self.external_id = 'TEMP-EXTID' + str(uuid.uuid4())

    super(Document,self).save(*args, **kwargs)

    # Will we need to rewrite this to the DB after changes?
    save_again = False

    # Now that we have an id, we can handle any document-processing stuff that requires an id
    if not self.processed:
      
      # save our content file if we were binary, now that we have an id.
      if cf:
        self.content_file.save(self.id, cf, save=False) # Don't force a save now, as we will resave later

      # We can also mark the document we are replacing as replaced by us
      if self.replaces:
        self.replaces.replaced_by = self
        self.replaces.save()

      # Mark document as processed
      self.processed = True
      save_again = True

    # If we set a temporary external_id, set it to mirror the internal id
    if self.external_id.startswith('TEMP-EXTID'):
      self.external_id = self.id
      save_again = True

    # Update newly created Fact objs, if we created any
    if doc and hasattr(doc, 'f_objs'):
      for fobj in doc.f_objs:
        if fobj:
          fobj.document = self
          fobj.record = self.record
          fobj.save()

    if not self.original:
      self.original = self
      save_again = True

    if save_again:
      self.save()
    def save(self, *args, **kwargs):
        """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
        if self.processed:
            doc = None  # Nothing to do here

        else:
            # import dynamically because DocumentProcessing imports DocumentSchema from this file
            from indivo.document_processing.document_processing import DocumentProcessing
            doc = DocumentProcessing(self.content, self.mime_type)

            # Process the Doc, if necessary
            if not self.pha and self.content:
                doc.process()

            # Delete fact objects from the document we are replacing
            if self.replaces:
                from indivo.models import Fact
                Fact.objects.filter(document=self.replaces).delete()

            # Update document info based on processing
            self.type = self.type if self.type else doc.get_document_schema()
            self.size = self.size if self.size else doc.get_document_size()
            self.digest = self.digest if self.digest else doc.get_document_digest(
            )

            # Create our content file if we are binary
            cf = None
            if doc.is_binary:
                self.content = None
                cf = ContentFile(self.content)

        # Oracle is incompatible with multi-column unique constraints where
        # one column might be null (i.e., UNIQUE(record, external_id)).
        # We therefore insure that all Documents have an external id,
        # mirroring the internal id if none was passed in.

        # Set the external_id to a random uuid so that we can save it to the
        # db before it has an internal id
        if not self.external_id:
            self.external_id = 'TEMP-EXTID' + str(uuid.uuid4())

        super(Document, self).save(*args, **kwargs)

        # Will we need to rewrite this to the DB after changes?
        save_again = False

        # Now that we have an id, we can handle any document-processing stuff that requires an id
        if not self.processed:

            # save our content file if we were binary, now that we have an id.
            if cf:
                self.content_file.save(
                    self.id, cf, save=False
                )  # Don't force a save now, as we will resave later

            # We can also mark the document we are replacing as replaced by us
            if self.replaces:
                self.replaces.replaced_by = self
                self.replaces.save()

            # Mark document as processed
            self.processed = True
            save_again = True

        # If we set a temporary external_id, set it to mirror the internal id
        if self.external_id.startswith('TEMP-EXTID'):
            self.external_id = self.id
            save_again = True

        # Update newly created Fact objs, if we created any
        if doc and hasattr(doc, 'f_objs'):
            for fobj in doc.f_objs:
                if fobj:
                    fobj.document = self
                    fobj.record = self.record
                    fobj.save()

        if not self.original:
            self.original = self
            save_again = True

        if save_again:
            self.save()
  def save(self, *args, **kwargs):
    """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
    fobjs_to_update = []
    if not self.processed:

      # import dynamically because DocumentProcessing imports DocumentSchema from this file
      from indivo.document_processing.document_processing import DocumentProcessing
      doc = DocumentProcessing(self.content, self.mime_type)

      # Process the Doc, if necessary
      if not self.pha and self.content:
        doc.process()

      # Update fact docs as Necessary
      if hasattr(doc, 'f_objs'):
        for fobj in doc.f_objs:

          # Delete fact objects from the document we are replacing
          if self.replaces:
            fobj.__class__.objects.filter(document = self.replaces).delete()

          # we can't update here, since we don't have an id yet
          if fobj:
            fobjs_to_update.append(fobj)

      # Update document info based on processing
      if doc.is_binary:
        self.content = None
      self.type = self.type if self.type else doc.get_document_schema()
      self.size = self.size if self.size else doc.get_document_size()
      self.digest = self.digest if self.digest else doc.get_document_digest()

      # Mark document as processed
      self.processed = True

    # Oracle is incompatible with multi-column unique constraints where
    # one column might be null (i.e., UNIQUE(record, external_id)).
    # We therefore insure that all Documents have an external id,
    # mirroring the internal id if none was passed in.
  
    # Set the external_id to a random uuid so that we can save it to the
    # db before it has an internal id
    if not self.external_id:
      self.external_id = 'TEMP-EXTID' + str(uuid.uuid4())

    super(Document,self).save(*args, **kwargs)

    # Do we need to rewrite this to the DB after changes?
    save_again = False

    # If we set a temporary external_id, set it to mirror the internal id
    if self.external_id.startswith('TEMP-EXTID'):
      self.external_id = self.id
      save_again = True

    # Update newly created Fact objs, if any
    for fobj in fobjs_to_update:
      fobj.document = self
      fobj.record   = self.record
      fobj.save()

    if not self.original:
      self.original = self
      save_again = True

    if save_again:
      self.save()