Example #1
0
    def process_document_page(self, document_page):
        logger.info(
            'Processing page: %d of document version: %s',
            document_page.page_number, document_page.document_version
        )

        DocumentPageOCRContent = apps.get_model(
            app_label='ocr', model_name='DocumentPageOCRContent'
        )

        task = task_generate_document_page_image.apply_async(
            kwargs=dict(
                document_page_id=document_page.pk
            )
        )

        cache_filename = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)

        with storage_documentimagecache.open(cache_filename) as file_object:
            document_page_content, created = DocumentPageOCRContent.objects.get_or_create(
                document_page=document_page
            )
            document_page_content.content = ocr_backend.execute(
                file_object=file_object,
                language=document_page.document.language
            )
            document_page_content.save()

        logger.info(
            'Finished processing page: %d of document version: %s',
            document_page.page_number, document_page.document_version
        )
Example #2
0
    def process_document_page(self, document_page):
        logger.info(
            'Processing page: %d of document version: %s',
            document_page.page_number, document_page.document_version
        )

        DocumentPageOCRContent = apps.get_model(
            app_label='ocr', model_name='DocumentPageOCRContent'
        )

        task = task_generate_document_page_image.apply_async(
            kwargs=dict(
                document_page_id=document_page.pk
            )
        )

        cache_filename = task.get(
            timeout=DOCUMENT_IMAGE_TASK_TIMEOUT, disable_sync_subtasks=False
        )

        with document_page.cache_partition.get_file(filename=cache_filename).open() as file_object:
            ocr_content = ocr_backend.execute(
                file_object=file_object,
                language=document_page.document.language
            )
            DocumentPageOCRContent.objects.update_or_create(
                document_page=document_page, defaults={
                    'content': ocr_content
                }
            )

        logger.info(
            'Finished processing page: %d of document version: %s',
            document_page.page_number, document_page.document_version
        )