Python DocumentProcessing.DocumentProcessing 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: indivo.document_processing.document_processing

클래스/타입: DocumentProcessing

메소드/함수: DocumentProcessing

hotexamples.com에서의 예제들: 7

Python DocumentProcessing.DocumentProcessing - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 indivo.document_processing.document_processing.DocumentProcessing.DocumentProcessing에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DocumentProcessing(7)

expand_schema(7)

get_document_size(4)

get_document_digest(3)

get_document_schema(3)

process(2)

get_type(1)

is_binary(1)

예제 #1

파일 보기

파일: records_and_documents.py 프로젝트: claimat/indivo_server

    def replace(self, new_content, new_mime_type):
        """
    Replace the content of the current document with new content and mime_type
    """
        if self.replaced_by:
            raise ValueError(
                "cannot replace a document that is already replaced")

        from indivo.document_processing.document_processing import DocumentProcessing
        new_doc = DocumentProcessing(new_content, new_mime_type)
        if not new_doc.is_binary:
            # set content and mime_type
            self.content = new_doc.content
            self.mime_type = new_mime_type

            # empty out derived fields so that doc processing will repopulate them
            self.type = None
            self.size = None
            self.digest = None

        else:
            # Why aren't we doing anything for binaries?
            pass

        self.processed = False  # We have changed the content, which now needs processing
        self.save()
        return True

예제 #2

파일 보기

파일: message.py 프로젝트: travisjgood/indivo_server

 def _setupargs(self, attachment_num=1, message=None, content='<?xml version="1.0" ?><body></body>', 
                size=None, type=None):
     self.message = message
     self.attachment_num = attachment_num
     self.content = content
     self.size = size or len(content)
     self.type = type or DocumentProcessing(content, 'application/xml').xml_type

예제 #3

파일 보기

    def save(self, *args, **kwargs):
        """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
        fobjs_to_update = []
        if not self.processed:
            # import dynamically because DocumentProcessing imports DocumentSchema from this file
            from indivo.document_processing.document_processing import DocumentProcessing
            doc = DocumentProcessing(self.content, self.mime_type)

            # Process the Doc, if necessary
            if not self.pha and self.content:
                doc.process()

            # Update fact docs as Necessary
            if hasattr(doc, 'f_objs'):
                for fobj in doc.f_objs:

                    # Delete fact objects from the document we are replacing
                    if self.replaces:
                        fobj.__class__.objects.filter(
                            document=self.replaces).delete()

                    # we can't update here, since we don't have an id yet
                    if fobj:
                        fobjs_to_update.append(fobj)

            # Update document info based on processing
            if doc.is_binary:
                self.content = None
            self.type = self.type if self.type else doc.get_document_schema()
            self.size = self.size if self.size else doc.get_document_size()
            self.digest = self.digest if self.digest else doc.get_document_digest(
            )

            # Mark document as processed
            self.processed = True

        super(Document, self).save(*args, **kwargs)

        # Update newly created Fact objs, if any
        for fobj in fobjs_to_update:
            fobj.document = self
            fobj.record = self.record
            fobj.save()

        if not self.original:
            self.original = self
            self.save()

예제 #4

파일 보기

    def replace(self, new_content, new_mime_type):
        """
    Replace the content of the current document with new content and mime_type
    """
        if self.replaced_by:
            raise Exception(
                "cannot replace a document that is already replaced")

        from indivo.document_processing.document_processing import DocumentProcessing
        new_doc = DocumentProcessing(new_content, new_mime_type)
        if not new_doc.is_binary:
            self.type = new_doc.get_document_schema()
            self.digest = new_doc.get_document_digest()
            self.size = new_doc.get_document_size()
            self.content = new_doc.content
        else:
            # Why aren't we doing anything for binaries?
            pass
        self.save()
        return True

예제 #5

파일 보기

파일: messaging.py 프로젝트: travisjgood/indivo_server

    def add_attachment(self, attachment_num, content):
        """
        attachment_num is 1-indexed
        """

        if int(attachment_num) > self.num_attachments:
            raise Exception("attachment num is too high")

        mime_type = 'application/xml'  # Only handle XML attachments for now

        from indivo.document_processing.document_processing import DocumentProcessing
        doc_utils = DocumentProcessing(content, mime_type)

        attachment = MessageAttachment.objects.create(
            message=self,
            content=content,
            size=doc_utils.size,
            type=doc_utils.fqn,
            attachment_num=attachment_num)

        return attachment

예제 #6

파일 보기

파일: records_and_documents.py 프로젝트: claimat/indivo_server

    def save(self, *args, **kwargs):
        """
    Handle document processing whenever a new document is created. This method
    processes the document, updates fact objects, and then saves the document
    """
        if self.processed:
            doc = None  # Nothing to do here

        else:
            # import dynamically because DocumentProcessing imports DocumentSchema from this file
            from indivo.document_processing.document_processing import DocumentProcessing
            doc = DocumentProcessing(self.content, self.mime_type)

            # Process the Doc, if necessary
            if not self.pha and self.content:
                doc.process()

            # Delete fact objects from the document we are replacing
            if self.replaces:
                from indivo.models import Fact
                Fact.objects.filter(document=self.replaces).delete()

            # Update document info based on processing
            self.type = self.type if self.type else doc.get_document_schema()
            self.size = self.size if self.size else doc.get_document_size()
            self.digest = self.digest if self.digest else doc.get_document_digest(
            )

            # Create our content file if we are binary
            cf = None
            if doc.is_binary:
                self.content = None
                cf = ContentFile(self.content)

        # Oracle is incompatible with multi-column unique constraints where
        # one column might be null (i.e., UNIQUE(record, external_id)).
        # We therefore insure that all Documents have an external id,
        # mirroring the internal id if none was passed in.

        # Set the external_id to a random uuid so that we can save it to the
        # db before it has an internal id
        if not self.external_id:
            self.external_id = 'TEMP-EXTID' + str(uuid.uuid4())

        super(Document, self).save(*args, **kwargs)

        # Will we need to rewrite this to the DB after changes?
        save_again = False

        # Now that we have an id, we can handle any document-processing stuff that requires an id
        if not self.processed:

            # save our content file if we were binary, now that we have an id.
            if cf:
                self.content_file.save(
                    self.id, cf, save=False
                )  # Don't force a save now, as we will resave later

            # We can also mark the document we are replacing as replaced by us
            if self.replaces:
                self.replaces.replaced_by = self
                self.replaces.save()

            # Mark document as processed
            self.processed = True
            save_again = True

        # If we set a temporary external_id, set it to mirror the internal id
        if self.external_id.startswith('TEMP-EXTID'):
            self.external_id = self.id
            save_again = True

        # Update newly created Fact objs, if we created any
        if doc and hasattr(doc, 'f_objs'):
            for fobj in doc.f_objs:
                if fobj:
                    fobj.document = self
                    fobj.record = self.record
                    fobj.save()

        if not self.original:
            self.original = self
            save_again = True

        if save_again:
            self.save()

예제 #7

파일 보기

def _document_create(creator,
                     content,
                     pha,
                     record,
                     replaces_document=None,
                     external_id=None,
                     mime_type=None,
                     status=None):
    """ Create an Indivo Document

  This is called for both document creation within a record
    and document creation within a record for a specific application.

  The PHA argument, if non-null, indicates app-specificity only.
  By this point, the external_id should be fully formed.

  FIXME: figure out the transactional aspect here

  If status is specified, then it is used, otherwise it is not specified and the DB does its default thing.
  """
    new_doc = None

    # Overwrite content if we are replacing an existing PHA doc
    if pha and replaces_document:
        replaces_document.replace(content, mime_type)

    # Create new document
    else:
        creator = creator.effective_principal
        doc_args = {
            PHA:
            pha,
            RECORD:
            record,
            CREATOR:
            creator,
            MIME_TYPE:
            mime_type,
            EXTERNAL_ID:
            external_id,
            REPLACES:
            replaces_document,
            CONTENT:
            content,
            ORIGINAL_ID:
            replaces_document.original_id if replaces_document else None
        }
        if status:
            create_args[STATUS] = status

        # create the document
        new_doc = Document.objects.create(**doc_args)

        # Save the binary file
        if DocumentProcessing(content, mime_type).is_binary:
            file = ContentFile(content)
            new_doc.content_file.save(new_doc.id, file)

        # Mark old doc as replaced
        if replaces_document:
            replaces_document.replaced_by = new_doc
            replaces_document.save()

    # return new doc if we have it, otherwise updated old doc
    return new_doc or replaces_document