def queue_document_list(request, queue_name='default'): check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) document_queue = get_object_or_404(DocumentQueue, name=queue_name) return object_list( request, queryset=document_queue.queuedocument_set.all(), template_name='generic_list.html', extra_context={ 'title': _(u'documents in queue: %s') % document_queue, 'hide_object': True, 'queue': document_queue, 'object_name': _(u'document queue'), 'navigation_object_name': 'queue', 'list_object_variable_name': 'queue_document', 'extra_columns': [ {'name': 'document', 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))}, {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))}, {'name': 'submitted', 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True}, {'name': 'delay', 'attribute': 'delay'}, {'name': 'state', 'attribute': encapsulate(lambda x: x.get_state_display())}, {'name': 'node', 'attribute': 'node_name'}, {'name': 'result', 'attribute': 'result'}, ], 'multi_select_as_buttons': True, 'sidebar_subtemplates_list': [ { 'name': 'generic_subtemplate.html', 'context': { 'side_bar': True, 'title': _(u'document queue properties'), 'content': _(u'Current state: %s') % document_queue.get_state_display(), } } ] }, )
@receiver(post_version_upload, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion) def post_version_upload_ocr(sender, instance, **kwargs): logger.debug('received post_version_upload') logger.debug('instance pk: %s', instance.pk) if instance.document.document_type.ocr: instance.submit_for_ocr() Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) class_permissions(Document, [PERMISSION_OCR_DOCUMENT]) register_tool(link_entry_list) APIEndPoint('ocr') register_model_list_columns(DocumentVersionOCRError, [ { 'name': _('Document'), 'attribute': encapsulate(lambda entry: document_link(entry.document_version.document)) }, { 'name': _('Added'), 'attribute': 'datetime_submitted' }, { 'name': _('Result'), 'attribute': 'result' }, ])
def ready(self): super(DocumentParsingApp, self).ready() APIEndPoint(app=self, version_string='1') Document = apps.get_model( app_label='documents', model_name='Document' ) DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) DocumentVersionParseError = self.get_model('DocumentVersionParseError') Document.add_to_class('submit_for_parsing', document_parsing_submit) DocumentVersion.add_to_class( 'content', get_document_content ) DocumentVersion.add_to_class( 'submit_for_parsing', document_version_parsing_submit ) ModelPermission.register( model=Document, permissions=(permission_content_view,) ) SourceColumn( source=DocumentVersionParseError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionParseError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionParseError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('parsing', Exchange('parsing'), routing_key='parsing'), ) app.conf.CELERY_ROUTES.update( { 'document_parsing.tasks.task_parse_document_version': { 'queue': 'parsing' }, } ) document_search.add_model_field( field='versions__pages__content__content', label=_('Content') ) document_page_search.add_model_field( field='content__content', label=_('Content') ) menu_facet.bind_links( links=(link_document_content,), sources=(Document,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_secondary.bind_links( links=( link_document_content, link_document_parsing_errors_list, link_document_content_download ), sources=( 'document_parsing:document_content', 'document_parsing:document_content_download', 'document_parsing:document_parsing_error_list', ) ) menu_tools.bind_links( links=( link_document_type_submit, link_error_list, ) ) post_version_upload.connect( dispatch_uid='document_parsing_handler_parse_document_version', receiver=handler_parse_document_version, sender=DocumentVersion )
def ready(self): super(OCRApp, self).ready() Document = apps.get_model(app_label='documents', model_name='Document') DocumentPage = apps.get_model(app_label='documents', model_name='DocumentPage') DocumentType = apps.get_model(app_label='documents', model_name='DocumentType') DocumentTypeSettings = self.get_model( model_name='DocumentTypeSettings') DocumentVersion = apps.get_model(app_label='documents', model_name='DocumentVersion') DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('ocr_content', get_document_ocr_content) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) ModelField(Document, name='versions__pages__ocr_content__content') ModelPermission.register(model=Document, permissions=(permission_ocr_document, permission_ocr_content_view)) ModelPermission.register( model=DocumentType, permissions=(permission_document_type_ocr_setup, )) ModelPermission.register_inheritance( model=DocumentTypeSettings, related='document_type', ) SourceColumn(source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context[ 'object'].document_version.document)) SourceColumn(source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted') SourceColumn(source=DocumentVersionOCRError, label=_('Result'), attribute='result') app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update({ 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, }) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR')) document_page_search.add_model_field(field='ocr_content__content', label=_('OCR')) menu_facet.bind_links(links=(link_document_ocr_content, ), sources=(Document, )) menu_facet.bind_links(links=(link_document_page_ocr_content, ), sources=(DocumentPage, )) menu_multi_item.bind_links(links=(link_document_submit_multiple, ), sources=(Document, )) menu_object.bind_links(links=(link_document_submit, ), sources=(Document, )) menu_object.bind_links(links=(link_document_page_ocr_content, ), sources=(DocumentPage, )) menu_object.bind_links(links=(link_document_type_ocr_settings, ), sources=(DocumentType, )) menu_secondary.bind_links(links=(link_document_ocr_content, link_document_ocr_errors_list, link_document_ocr_download), sources=( 'ocr:document_content', 'ocr:document_ocr_error_list', 'ocr:document_ocr_download', )) menu_secondary.bind_links( links=(link_entry_list, ), sources=('ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError)) menu_tools.bind_links(links=(link_document_type_submit, link_entry_list)) post_save.connect( dispatch_uid='ocr_handler_initialize_new_ocr_settings', receiver=handler_initialize_new_ocr_settings, sender=DocumentType) post_version_upload.connect( dispatch_uid='ocr_handler_ocr_document_version', receiver=handler_ocr_document_version, sender=DocumentVersion)
def queue_document_list(request, queue_name='default'): Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) document_queue = get_object_or_404(DocumentQueue, name=queue_name) context = { 'object_list': document_queue.queuedocument_set.all(), 'title': _(u'documents in queue: %s') % document_queue, 'hide_object': True, 'queue': document_queue, 'object_name': _(u'document queue'), 'navigation_object_name': 'queue', 'list_object_variable_name': 'queue_document', 'extra_columns': [ { 'name': 'document', 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr( x, 'document') else _(u'Missing document.')) }, { 'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document)) }, { 'name': 'submitted', 'attribute': encapsulate( lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together': True }, { 'name': 'delay', 'attribute': 'delay' }, { 'name': 'state', 'attribute': encapsulate(lambda x: x.get_state_display()) }, { 'name': 'node', 'attribute': 'node_name' }, { 'name': 'result', 'attribute': 'result' }, ], 'multi_select_as_buttons': True, 'sidebar_subtemplates_list': [{ 'name': 'generic_subtemplate.html', 'context': { 'side_bar': True, 'title': _(u'document queue properties'), 'content': _(u'Current state: %s') % document_queue.get_state_display(), } }] } return render_to_response('generic_list.html', context, context_instance=RequestContext(request))
def ready(self): super(OCRApp, self).ready() APIEndPoint(app=self, version_string='1') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class( 'submit_for_ocr', document_version_ocr_submit ) ModelPermission.register( model=Document, permissions=( permission_ocr_document, permission_ocr_content_view ) ) SourceColumn( source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionOCRError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update( { 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, } ) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('Content') ) menu_facet.bind_links( links=(link_document_content,), sources=(Document,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_object.bind_links( links=(link_document_type_ocr_settings,), sources=(DocumentType,) ) menu_secondary.bind_links( links=(link_entry_list,), sources=( 'ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError ) ) menu_tools.bind_links( links=( link_document_submit_all, link_document_type_submit, link_entry_list ) ) post_save.connect( initialize_new_ocr_settings, dispatch_uid='initialize_new_ocr_settings', sender=DocumentType ) post_version_upload.connect( post_version_upload_ocr, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion ) namespace = PropertyNamespace('ocr', _('OCR')) try: pdftotext = sh.Command(setting_pdftotext_path.value) except sh.CommandNotFound: namespace.add_property( 'pdftotext', _('pdftotext version'), _('not found'), report=True ) except Exception: namespace.add_property( 'pdftotext', _('pdftotext version'), _('error getting version'), report=True ) else: namespace.add_property( 'pdftotext', _('pdftotext version'), pdftotext('-v').stderr, report=True ) try: tesseract = sh.Command(setting_tesseract_path.value) except sh.CommandNotFound: namespace.add_property( 'tesseract', _('tesseract version'), _('not found'), report=True ) except Exception: namespace.add_property( 'tesseract', _('tesseract version'), _('error getting version'), report=True ) else: namespace.add_property( 'tesseract', _('tesseract version'), tesseract('-v').stderr, report=True )
def ready(self): super(OCRApp, self).ready() Document = apps.get_model(app_label='documents', model_name='Document') DocumentType = apps.get_model(app_label='documents', model_name='DocumentType') DocumentVersion = apps.get_model(app_label='documents', model_name='DocumentVersion') DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') APIEndPoint(app=self, version_string='1') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) ModelPermission.register(model=Document, permissions=(permission_ocr_document, permission_ocr_content_view)) SourceColumn(source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context[ 'object'].document_version.document)) SourceColumn(source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted') SourceColumn(source=DocumentVersionOCRError, label=_('Result'), attribute='result') app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update({ 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, }) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR')) menu_facet.bind_links(links=(link_document_content, ), sources=(Document, )) menu_multi_item.bind_links(links=(link_document_submit_multiple, ), sources=(Document, )) menu_object.bind_links(links=(link_document_submit, ), sources=(Document, )) menu_object.bind_links(links=(link_document_type_ocr_settings, ), sources=(DocumentType, )) menu_secondary.bind_links( links=(link_entry_list, ), sources=('ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError)) menu_tools.bind_links(links=(link_document_submit_all, link_document_type_submit, link_entry_list)) post_save.connect(initialize_new_ocr_settings, dispatch_uid='initialize_new_ocr_settings', sender=DocumentType) post_version_upload.connect(post_version_upload_ocr, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion) namespace = PropertyNamespace('ocr', _('OCR')) try: pdftotext = sh.Command(setting_pdftotext_path.value) except sh.CommandNotFound: namespace.add_property('pdftotext', _('pdftotext version'), _('not found'), report=True) except Exception: namespace.add_property('pdftotext', _('pdftotext version'), _('error getting version'), report=True) else: namespace.add_property('pdftotext', _('pdftotext version'), pdftotext('-v').stderr, report=True) try: tesseract = sh.Command(setting_tesseract_path.value) except sh.CommandNotFound: namespace.add_property('tesseract', _('tesseract version'), _('not found'), report=True) except Exception: namespace.add_property('tesseract', _('tesseract version'), _('error getting version'), report=True) else: namespace.add_property('tesseract', _('tesseract version'), tesseract('-v').stderr, report=True)
def ready(self): super(OCRApp, self).ready() Document = apps.get_model( app_label='documents', model_name='Document' ) DocumentPage = apps.get_model( app_label='documents', model_name='DocumentPage' ) DocumentType = apps.get_model( app_label='documents', model_name='DocumentType' ) DocumentTypeSettings = self.get_model( model_name='DocumentTypeSettings' ) DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class( 'ocr_content', get_document_ocr_content ) DocumentVersion.add_to_class( 'submit_for_ocr', document_version_ocr_submit ) ModelField( Document, name='versions__pages__ocr_content__content' ) ModelPermission.register( model=Document, permissions=( permission_ocr_document, permission_ocr_content_view ) ) ModelPermission.register( model=DocumentType, permissions=( permission_document_type_ocr_setup, ) ) ModelPermission.register_inheritance( model=DocumentTypeSettings, related='document_type', ) SourceColumn( source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionOCRError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update( { 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, } ) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR') ) document_page_search.add_model_field( field='ocr_content__content', label=_('OCR') ) menu_facet.bind_links( links=(link_document_ocr_content,), sources=(Document,) ) menu_facet.bind_links( links=(link_document_page_ocr_content,), sources=(DocumentPage,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_object.bind_links( links=(link_document_page_ocr_content,), sources=(DocumentPage,) ) menu_object.bind_links( links=(link_document_type_ocr_settings,), sources=(DocumentType,) ) menu_secondary.bind_links( links=( link_document_ocr_content, link_document_ocr_errors_list, link_document_ocr_download ), sources=( 'ocr:document_content', 'ocr:document_ocr_error_list', 'ocr:document_ocr_download', ) ) menu_secondary.bind_links( links=(link_entry_list,), sources=( 'ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError ) ) menu_tools.bind_links( links=( link_document_type_submit, link_entry_list ) ) post_document_version_ocr.connect( dispatch_uid='ocr_handler_index_document', receiver=handler_index_document, sender=DocumentVersion ) post_save.connect( dispatch_uid='ocr_handler_initialize_new_ocr_settings', receiver=handler_initialize_new_ocr_settings, sender=DocumentType ) post_version_upload.connect( dispatch_uid='ocr_handler_ocr_document_version', receiver=handler_ocr_document_version, sender=DocumentVersion )
if instance.document.document_type.ocr: instance.submit_for_ocr() Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) class_permissions(Document, [PERMISSION_OCR_DOCUMENT]) register_tool(link_entry_list) APIEndPoint('ocr') register_model_list_columns(DocumentVersionOCRError, [ { 'name': _('Document'), 'attribute': encapsulate( lambda entry: document_link(entry.document_version.document)) }, { 'name': _('Added'), 'attribute': 'datetime_submitted' }, { 'name': _('Result'), 'attribute': 'result' }, ])