コード例 #1
0
ファイル: views.py プロジェクト: mabroor/mayan
def queue_document_list(request, queue_name='default'):
    check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])

    document_queue = get_object_or_404(DocumentQueue, name=queue_name)

    return object_list(
        request,
        queryset=document_queue.queuedocument_set.all(),
        template_name='generic_list.html',
        extra_context={
            'title': _(u'documents in queue: %s') % document_queue,
            'hide_object': True,
            'queue': document_queue,
            'object_name': _(u'document queue'),
            'navigation_object_name': 'queue',
            'list_object_variable_name': 'queue_document',
            'extra_columns': [
                {'name': 'document', 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))},
                {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))},
                {'name': 'submitted', 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
                {'name': 'delay', 'attribute': 'delay'},
                {'name': 'state', 'attribute': encapsulate(lambda x: x.get_state_display())},
                {'name': 'node', 'attribute': 'node_name'},
                {'name': 'result', 'attribute': 'result'},
            ],
            'multi_select_as_buttons': True,
            'sidebar_subtemplates_list': [
                {
                    'name': 'generic_subtemplate.html',
                    'context': {
                        'side_bar': True,
                        'title': _(u'document queue properties'),
                        'content': _(u'Current state: %s') % document_queue.get_state_display(),
                    }
                }
            ]
        },
    )
コード例 #2
0

@receiver(post_version_upload, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion)
def post_version_upload_ocr(sender, instance, **kwargs):
    logger.debug('received post_version_upload')
    logger.debug('instance pk: %s', instance.pk)
    if instance.document.document_type.ocr:
        instance.submit_for_ocr()


Document.add_to_class('submit_for_ocr', document_ocr_submit)
DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit)

class_permissions(Document, [PERMISSION_OCR_DOCUMENT])

register_tool(link_entry_list)

APIEndPoint('ocr')

register_model_list_columns(DocumentVersionOCRError, [
    {
        'name': _('Document'), 'attribute': encapsulate(lambda entry: document_link(entry.document_version.document))
    },
    {
        'name': _('Added'), 'attribute': 'datetime_submitted'
    },
    {
        'name': _('Result'), 'attribute': 'result'
    },
])
コード例 #3
0
    def ready(self):
        super(DocumentParsingApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document = apps.get_model(
            app_label='documents', model_name='Document'
        )

        DocumentVersion = apps.get_model(
            app_label='documents', model_name='DocumentVersion'
        )

        DocumentVersionParseError = self.get_model('DocumentVersionParseError')

        Document.add_to_class('submit_for_parsing', document_parsing_submit)
        DocumentVersion.add_to_class(
            'content', get_document_content
        )
        DocumentVersion.add_to_class(
            'submit_for_parsing', document_version_parsing_submit
        )

        ModelPermission.register(
            model=Document, permissions=(permission_content_view,)
        )

        SourceColumn(
            source=DocumentVersionParseError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionParseError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionParseError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('parsing', Exchange('parsing'), routing_key='parsing'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'document_parsing.tasks.task_parse_document_version': {
                    'queue': 'parsing'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__content__content', label=_('Content')
        )

        document_page_search.add_model_field(
            field='content__content', label=_('Content')
        )

        menu_facet.bind_links(
            links=(link_document_content,), sources=(Document,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_secondary.bind_links(
            links=(
                link_document_content, link_document_parsing_errors_list,
                link_document_content_download
            ),
            sources=(
                'document_parsing:document_content',
                'document_parsing:document_content_download',
                'document_parsing:document_parsing_error_list',
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_type_submit, link_error_list,
            )
        )

        post_version_upload.connect(
            dispatch_uid='document_parsing_handler_parse_document_version',
            receiver=handler_parse_document_version,
            sender=DocumentVersion
        )
コード例 #4
0
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(app_label='documents', model_name='Document')
        DocumentPage = apps.get_model(app_label='documents',
                                      model_name='DocumentPage')
        DocumentType = apps.get_model(app_label='documents',
                                      model_name='DocumentType')
        DocumentTypeSettings = self.get_model(
            model_name='DocumentTypeSettings')
        DocumentVersion = apps.get_model(app_label='documents',
                                         model_name='DocumentVersion')

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class('ocr_content', get_document_ocr_content)
        DocumentVersion.add_to_class('submit_for_ocr',
                                     document_version_ocr_submit)

        ModelField(Document, name='versions__pages__ocr_content__content')

        ModelPermission.register(model=Document,
                                 permissions=(permission_ocr_document,
                                              permission_ocr_content_view))
        ModelPermission.register(
            model=DocumentType,
            permissions=(permission_document_type_ocr_setup, ))
        ModelPermission.register_inheritance(
            model=DocumentTypeSettings,
            related='document_type',
        )

        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Document'),
                     func=lambda context: document_link(context[
                         'object'].document_version.document))
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Added'),
                     attribute='datetime_submitted')
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Result'),
                     attribute='result')

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'), )

        app.conf.CELERY_ROUTES.update({
            'ocr.tasks.task_do_ocr': {
                'queue': 'ocr'
            },
        })

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR'))

        document_page_search.add_model_field(field='ocr_content__content',
                                             label=_('OCR'))

        menu_facet.bind_links(links=(link_document_ocr_content, ),
                              sources=(Document, ))
        menu_facet.bind_links(links=(link_document_page_ocr_content, ),
                              sources=(DocumentPage, ))
        menu_multi_item.bind_links(links=(link_document_submit_multiple, ),
                                   sources=(Document, ))
        menu_object.bind_links(links=(link_document_submit, ),
                               sources=(Document, ))
        menu_object.bind_links(links=(link_document_page_ocr_content, ),
                               sources=(DocumentPage, ))
        menu_object.bind_links(links=(link_document_type_ocr_settings, ),
                               sources=(DocumentType, ))
        menu_secondary.bind_links(links=(link_document_ocr_content,
                                         link_document_ocr_errors_list,
                                         link_document_ocr_download),
                                  sources=(
                                      'ocr:document_content',
                                      'ocr:document_ocr_error_list',
                                      'ocr:document_ocr_download',
                                  ))
        menu_secondary.bind_links(
            links=(link_entry_list, ),
            sources=('ocr:entry_list', 'ocr:entry_delete_multiple',
                     'ocr:entry_re_queue_multiple', DocumentVersionOCRError))
        menu_tools.bind_links(links=(link_document_type_submit,
                                     link_entry_list))

        post_save.connect(
            dispatch_uid='ocr_handler_initialize_new_ocr_settings',
            receiver=handler_initialize_new_ocr_settings,
            sender=DocumentType)
        post_version_upload.connect(
            dispatch_uid='ocr_handler_ocr_document_version',
            receiver=handler_ocr_document_version,
            sender=DocumentVersion)
コード例 #5
0
ファイル: views.py プロジェクト: rkpob/mayan-edms
def queue_document_list(request, queue_name='default'):
    Permission.objects.check_permissions(request.user,
                                         [PERMISSION_OCR_DOCUMENT])

    document_queue = get_object_or_404(DocumentQueue, name=queue_name)

    context = {
        'object_list':
        document_queue.queuedocument_set.all(),
        'title':
        _(u'documents in queue: %s') % document_queue,
        'hide_object':
        True,
        'queue':
        document_queue,
        'object_name':
        _(u'document queue'),
        'navigation_object_name':
        'queue',
        'list_object_variable_name':
        'queue_document',
        'extra_columns': [
            {
                'name':
                'document',
                'attribute':
                encapsulate(lambda x: document_link(x.document) if hasattr(
                    x, 'document') else _(u'Missing document.'))
            },
            {
                'name': _(u'thumbnail'),
                'attribute':
                encapsulate(lambda x: document_thumbnail(x.document))
            },
            {
                'name':
                'submitted',
                'attribute':
                encapsulate(
                    lambda x: unicode(x.datetime_submitted).split('.')[0]),
                'keep_together':
                True
            },
            {
                'name': 'delay',
                'attribute': 'delay'
            },
            {
                'name': 'state',
                'attribute': encapsulate(lambda x: x.get_state_display())
            },
            {
                'name': 'node',
                'attribute': 'node_name'
            },
            {
                'name': 'result',
                'attribute': 'result'
            },
        ],
        'multi_select_as_buttons':
        True,
        'sidebar_subtemplates_list': [{
            'name': 'generic_subtemplate.html',
            'context': {
                'side_bar':
                True,
                'title':
                _(u'document queue properties'),
                'content':
                _(u'Current state: %s') % document_queue.get_state_display(),
            }
        }]
    }

    return render_to_response('generic_list.html',
                              context,
                              context_instance=RequestContext(request))
コード例 #6
0
ファイル: apps.py プロジェクト: e6/mayan-edms
    def ready(self):
        super(OCRApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class(
            'submit_for_ocr', document_version_ocr_submit
        )

        ModelPermission.register(
            model=Document, permissions=(
                permission_ocr_document, permission_ocr_content_view
            )
        )

        SourceColumn(
            source=DocumentVersionOCRError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'ocr.tasks.task_do_ocr': {
                    'queue': 'ocr'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('Content')
        )

        menu_facet.bind_links(
            links=(link_document_content,), sources=(Document,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(link_entry_list,),
            sources=(
                'ocr:entry_list', 'ocr:entry_delete_multiple',
                'ocr:entry_re_queue_multiple', DocumentVersionOCRError
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_submit_all, link_document_type_submit,
                link_entry_list
            )
        )

        post_save.connect(
            initialize_new_ocr_settings,
            dispatch_uid='initialize_new_ocr_settings', sender=DocumentType
        )
        post_version_upload.connect(
            post_version_upload_ocr, dispatch_uid='post_version_upload_ocr',
            sender=DocumentVersion
        )

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'pdftotext', _('pdftotext version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), pdftotext('-v').stderr,
                report=True
            )

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'tesseract', _('tesseract version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'tesseract', _('tesseract version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'tesseract', _('tesseract version'), tesseract('-v').stderr,
                report=True
            )
コード例 #7
0
ファイル: apps.py プロジェクト: leolimma/maxacali
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(app_label='documents', model_name='Document')

        DocumentType = apps.get_model(app_label='documents',
                                      model_name='DocumentType')

        DocumentVersion = apps.get_model(app_label='documents',
                                         model_name='DocumentVersion')

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class('submit_for_ocr',
                                     document_version_ocr_submit)

        ModelPermission.register(model=Document,
                                 permissions=(permission_ocr_document,
                                              permission_ocr_content_view))

        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Document'),
                     func=lambda context: document_link(context[
                         'object'].document_version.document))
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Added'),
                     attribute='datetime_submitted')
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Result'),
                     attribute='result')

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'), )

        app.conf.CELERY_ROUTES.update({
            'ocr.tasks.task_do_ocr': {
                'queue': 'ocr'
            },
        })

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR'))

        menu_facet.bind_links(links=(link_document_content, ),
                              sources=(Document, ))
        menu_multi_item.bind_links(links=(link_document_submit_multiple, ),
                                   sources=(Document, ))
        menu_object.bind_links(links=(link_document_submit, ),
                               sources=(Document, ))
        menu_object.bind_links(links=(link_document_type_ocr_settings, ),
                               sources=(DocumentType, ))
        menu_secondary.bind_links(
            links=(link_entry_list, ),
            sources=('ocr:entry_list', 'ocr:entry_delete_multiple',
                     'ocr:entry_re_queue_multiple', DocumentVersionOCRError))
        menu_tools.bind_links(links=(link_document_submit_all,
                                     link_document_type_submit,
                                     link_entry_list))

        post_save.connect(initialize_new_ocr_settings,
                          dispatch_uid='initialize_new_ocr_settings',
                          sender=DocumentType)
        post_version_upload.connect(post_version_upload_ocr,
                                    dispatch_uid='post_version_upload_ocr',
                                    sender=DocumentVersion)

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   pdftotext('-v').stderr,
                                   report=True)

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   tesseract('-v').stderr,
                                   report=True)
コード例 #8
0
ファイル: apps.py プロジェクト: mayan-edms/mayan-edms
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(
            app_label='documents', model_name='Document'
        )
        DocumentPage = apps.get_model(
            app_label='documents', model_name='DocumentPage'
        )
        DocumentType = apps.get_model(
            app_label='documents', model_name='DocumentType'
        )
        DocumentTypeSettings = self.get_model(
            model_name='DocumentTypeSettings'
        )
        DocumentVersion = apps.get_model(
            app_label='documents', model_name='DocumentVersion'
        )

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class(
            'ocr_content', get_document_ocr_content
        )
        DocumentVersion.add_to_class(
            'submit_for_ocr', document_version_ocr_submit
        )

        ModelField(
            Document, name='versions__pages__ocr_content__content'
        )

        ModelPermission.register(
            model=Document, permissions=(
                permission_ocr_document, permission_ocr_content_view
            )
        )
        ModelPermission.register(
            model=DocumentType, permissions=(
                permission_document_type_ocr_setup,
            )
        )
        ModelPermission.register_inheritance(
            model=DocumentTypeSettings, related='document_type',
        )

        SourceColumn(
            source=DocumentVersionOCRError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'ocr.tasks.task_do_ocr': {
                    'queue': 'ocr'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR')
        )

        document_page_search.add_model_field(
            field='ocr_content__content', label=_('OCR')
        )

        menu_facet.bind_links(
            links=(link_document_ocr_content,), sources=(Document,)
        )
        menu_facet.bind_links(
            links=(link_document_page_ocr_content,), sources=(DocumentPage,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_page_ocr_content,), sources=(DocumentPage,)
        )
        menu_object.bind_links(
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(
                link_document_ocr_content, link_document_ocr_errors_list,
                link_document_ocr_download
            ),
            sources=(
                'ocr:document_content', 'ocr:document_ocr_error_list',
                'ocr:document_ocr_download',
            )
        )
        menu_secondary.bind_links(
            links=(link_entry_list,),
            sources=(
                'ocr:entry_list', 'ocr:entry_delete_multiple',
                'ocr:entry_re_queue_multiple', DocumentVersionOCRError
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_type_submit, link_entry_list
            )
        )

        post_document_version_ocr.connect(
            dispatch_uid='ocr_handler_index_document',
            receiver=handler_index_document,
            sender=DocumentVersion
        )
        post_save.connect(
            dispatch_uid='ocr_handler_initialize_new_ocr_settings',
            receiver=handler_initialize_new_ocr_settings,
            sender=DocumentType
        )
        post_version_upload.connect(
            dispatch_uid='ocr_handler_ocr_document_version',
            receiver=handler_ocr_document_version,
            sender=DocumentVersion
        )
コード例 #9
0
ファイル: __init__.py プロジェクト: vinchu/mayan-edms
    if instance.document.document_type.ocr:
        instance.submit_for_ocr()


Document.add_to_class('submit_for_ocr', document_ocr_submit)
DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit)

class_permissions(Document, [PERMISSION_OCR_DOCUMENT])

register_tool(link_entry_list)

APIEndPoint('ocr')

register_model_list_columns(DocumentVersionOCRError, [
    {
        'name':
        _('Document'),
        'attribute':
        encapsulate(
            lambda entry: document_link(entry.document_version.document))
    },
    {
        'name': _('Added'),
        'attribute': 'datetime_submitted'
    },
    {
        'name': _('Result'),
        'attribute': 'result'
    },
])