Example #1
0
    def ready(self):
        super(SourcesApp, self).ready()

        POP3Email = self.get_model('POP3Email')
        IMAPEmail = self.get_model('IMAPEmail')
        Source = self.get_model('Source')
        SourceLog = self.get_model('SourceLog')
        SaneScanner = self.get_model('SaneScanner')
        StagingFolderSource = self.get_model('StagingFolderSource')
        WatchFolderSource = self.get_model('WatchFolderSource')
        WebFormSource = self.get_model('WebFormSource')

        APIEndPoint(app=self, version_string='1')

        MissingItem(
            label=_('Create a document source'),
            description=_(
                'Document sources are the way in which new documents are '
                'feed to Mayan EDMS, create at least a web form source to '
                'be able to upload documents from a browser.'),
            condition=lambda: not Source.objects.exists(),
            view='sources:setup_source_list')

        SourceColumn(
            source=StagingFile,
            label=_('Created'),
            func=lambda context: context['object'].get_date_time_created())

        html_widget = StagingFileThumbnailWidget()
        SourceColumn(source=StagingFile,
                     label=_('Thumbnail'),
                     func=lambda context: html_widget.render(instance=context[
                         'object'], ))

        SourceColumn(source=SourceLog,
                     label=_('Date time'),
                     func=lambda context: context['object'].datetime)
        SourceColumn(source=SourceLog,
                     label=_('Message'),
                     func=lambda context: context['object'].message)

        app.conf.CELERY_QUEUES.extend((
            Queue('sources', Exchange('sources'), routing_key='sources'),
            Queue('sources_periodic',
                  Exchange('sources_periodic'),
                  routing_key='sources_periodic',
                  delivery_mode=1),
        ))

        app.conf.CELERY_ROUTES.update({
            'sources.tasks.task_check_interval_source': {
                'queue': 'sources_periodic'
            },
            'sources.tasks.task_source_handle_upload': {
                'queue': 'sources'
            },
            'sources.tasks.task_upload_document': {
                'queue': 'sources'
            },
        })
        menu_documents.bind_links(links=(link_document_create_multiple, ))

        menu_object.bind_links(
            links=(link_setup_source_edit, link_setup_source_delete,
                   link_transformation_list, link_setup_source_logs),
            sources=(POP3Email, IMAPEmail, SaneScanner, StagingFolderSource,
                     WatchFolderSource, WebFormSource))
        menu_object.bind_links(links=(link_staging_file_delete, ),
                               sources=(StagingFile, ))
        menu_object.bind_links(links=(link_setup_source_check_now, ),
                               sources=(
                                   IMAPEmail,
                                   POP3Email,
                                   WatchFolderSource,
                               ))
        menu_secondary.bind_links(
            links=(link_setup_sources, link_setup_source_create_webform,
                   link_setup_source_create_sane_scanner,
                   link_setup_source_create_staging_folder,
                   link_setup_source_create_pop3_email,
                   link_setup_source_create_imap_email,
                   link_setup_source_create_watch_folder),
            sources=(POP3Email, IMAPEmail, StagingFolderSource,
                     WatchFolderSource, WebFormSource,
                     'sources:setup_source_list',
                     'sources:setup_source_create'))
        menu_setup.bind_links(links=(link_setup_sources, ))
        menu_sidebar.bind_links(links=(link_upload_version, ),
                                sources=('documents:document_version_list',
                                         'documents:upload_version',
                                         'documents:document_version_revert'))

        post_upgrade.connect(initialize_periodic_tasks,
                             dispatch_uid='initialize_periodic_tasks')
        post_initial_setup.connect(
            create_default_document_source,
            dispatch_uid='create_default_document_source')
        post_version_upload.connect(
            copy_transformations_to_version,
            dispatch_uid='copy_transformations_to_version')
Example #2
0
    def ready(self):
        super(SourcesApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        MissingItem(
            label=_('Create a document source'),
            description=_(
                'Document sources are the way in which new documents are '
                'feed to Mayan EDMS, create at least a web form source to '
                'be able to upload documents from a browser.'
            ),
            condition=lambda: not Source.objects.exists(),
            view='sources:setup_source_list'
        )

        SourceColumn(
            source=StagingFile,
            label=_('Created'),
            func=lambda context: context['object'].get_date_time_created()
        )

        SourceColumn(
            source=StagingFile,
            label=_('Thumbnail'),
            func=lambda context: staging_file_thumbnail(
                context['object'],
                gallery_name='sources:staging_list',
                title=context['object'].filename, size='100'
            )
        )

        SourceColumn(
            source=SourceLog,
            label=_('Date time'),
            func=lambda context: context['object'].datetime
        )
        SourceColumn(
            source=SourceLog,
            label=_('Message'),
            func=lambda context: context['object'].message
        )

        app.conf.CELERY_QUEUES.extend(
            (
                Queue(
                    'sources', Exchange('sources'), routing_key='sources'
                ),
                Queue(
                    'sources_periodic', Exchange('sources_periodic'),
                    routing_key='sources_periodic', delivery_mode=1
                ),
            )
        )

        app.conf.CELERY_ROUTES.update(
            {
                'sources.tasks.task_check_interval_source': {
                    'queue': 'sources_periodic'
                },
                'sources.tasks.task_source_handle_upload': {
                    'queue': 'sources'
                },
                'sources.tasks.task_upload_document': {
                    'queue': 'sources'
                },
            }
        )

        menu_front_page.bind_links(links=(link_document_create_multiple,))
        menu_object.bind_links(
            links=(link_document_create_siblings,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(
                link_setup_source_edit, link_setup_source_delete,
                link_transformation_list, link_setup_source_logs
            ), sources=(
                POP3Email, IMAPEmail, StagingFolderSource, WatchFolderSource,
                WebFormSource
            )
        )
        menu_object.bind_links(
            links=(link_staging_file_delete,), sources=(StagingFile,)
        )
        menu_secondary.bind_links(
            links=(
                link_setup_sources, link_setup_source_create_webform,
                link_setup_source_create_staging_folder,
                link_setup_source_create_pop3_email,
                link_setup_source_create_imap_email,
                link_setup_source_create_watch_folder
            ), sources=(
                POP3Email, IMAPEmail, StagingFolderSource, WatchFolderSource,
                WebFormSource, 'sources:setup_source_list',
                'sources:setup_source_create'
            )
        )
        menu_setup.bind_links(links=(link_setup_sources,))
        menu_sidebar.bind_links(
            links=(link_upload_version,),
            sources=(
                'documents:document_version_list', 'documents:upload_version',
                'documents:document_version_revert'
            )
        )

        post_upgrade.connect(
            initialize_periodic_tasks,
            dispatch_uid='initialize_periodic_tasks'
        )
        post_initial_setup.connect(
            create_default_document_source,
            dispatch_uid='create_default_document_source'
        )
        post_version_upload.connect(
            copy_transformations_to_version,
            dispatch_uid='copy_transformations_to_version'
        )
Example #3
0
    def ready(self):
        super(DocumentParsingApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document = apps.get_model(
            app_label='documents', model_name='Document'
        )

        DocumentVersion = apps.get_model(
            app_label='documents', model_name='DocumentVersion'
        )

        DocumentVersionParseError = self.get_model('DocumentVersionParseError')

        Document.add_to_class('submit_for_parsing', document_parsing_submit)
        DocumentVersion.add_to_class(
            'content', get_document_content
        )
        DocumentVersion.add_to_class(
            'submit_for_parsing', document_version_parsing_submit
        )

        ModelPermission.register(
            model=Document, permissions=(permission_content_view,)
        )

        SourceColumn(
            source=DocumentVersionParseError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionParseError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionParseError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('parsing', Exchange('parsing'), routing_key='parsing'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'document_parsing.tasks.task_parse_document_version': {
                    'queue': 'parsing'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__content__content', label=_('Content')
        )

        document_page_search.add_model_field(
            field='content__content', label=_('Content')
        )

        menu_facet.bind_links(
            links=(link_document_content,), sources=(Document,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_secondary.bind_links(
            links=(
                link_document_content, link_document_parsing_errors_list,
                link_document_content_download
            ),
            sources=(
                'document_parsing:document_content',
                'document_parsing:document_content_download',
                'document_parsing:document_parsing_error_list',
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_type_submit, link_error_list,
            )
        )

        post_version_upload.connect(
            dispatch_uid='document_parsing_handler_parse_document_version',
            receiver=handler_parse_document_version,
            sender=DocumentVersion
        )
Example #4
0
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(app_label='documents', model_name='Document')
        DocumentPage = apps.get_model(app_label='documents',
                                      model_name='DocumentPage')
        DocumentType = apps.get_model(app_label='documents',
                                      model_name='DocumentType')
        DocumentTypeSettings = self.get_model(
            model_name='DocumentTypeSettings')
        DocumentVersion = apps.get_model(app_label='documents',
                                         model_name='DocumentVersion')

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class('ocr_content', get_document_ocr_content)
        DocumentVersion.add_to_class('submit_for_ocr',
                                     document_version_ocr_submit)

        ModelField(Document, name='versions__pages__ocr_content__content')

        ModelPermission.register(model=Document,
                                 permissions=(permission_ocr_document,
                                              permission_ocr_content_view))
        ModelPermission.register(
            model=DocumentType,
            permissions=(permission_document_type_ocr_setup, ))
        ModelPermission.register_inheritance(
            model=DocumentTypeSettings,
            related='document_type',
        )

        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Document'),
                     func=lambda context: document_link(context[
                         'object'].document_version.document))
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Added'),
                     attribute='datetime_submitted')
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Result'),
                     attribute='result')

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'), )

        app.conf.CELERY_ROUTES.update({
            'ocr.tasks.task_do_ocr': {
                'queue': 'ocr'
            },
        })

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR'))

        document_page_search.add_model_field(field='ocr_content__content',
                                             label=_('OCR'))

        menu_facet.bind_links(links=(link_document_ocr_content, ),
                              sources=(Document, ))
        menu_facet.bind_links(links=(link_document_page_ocr_content, ),
                              sources=(DocumentPage, ))
        menu_multi_item.bind_links(links=(link_document_submit_multiple, ),
                                   sources=(Document, ))
        menu_object.bind_links(links=(link_document_submit, ),
                               sources=(Document, ))
        menu_object.bind_links(links=(link_document_page_ocr_content, ),
                               sources=(DocumentPage, ))
        menu_object.bind_links(links=(link_document_type_ocr_settings, ),
                               sources=(DocumentType, ))
        menu_secondary.bind_links(links=(link_document_ocr_content,
                                         link_document_ocr_errors_list,
                                         link_document_ocr_download),
                                  sources=(
                                      'ocr:document_content',
                                      'ocr:document_ocr_error_list',
                                      'ocr:document_ocr_download',
                                  ))
        menu_secondary.bind_links(
            links=(link_entry_list, ),
            sources=('ocr:entry_list', 'ocr:entry_delete_multiple',
                     'ocr:entry_re_queue_multiple', DocumentVersionOCRError))
        menu_tools.bind_links(links=(link_document_type_submit,
                                     link_entry_list))

        post_save.connect(
            dispatch_uid='ocr_handler_initialize_new_ocr_settings',
            receiver=handler_initialize_new_ocr_settings,
            sender=DocumentType)
        post_version_upload.connect(
            dispatch_uid='ocr_handler_ocr_document_version',
            receiver=handler_ocr_document_version,
            sender=DocumentVersion)
Example #5
0
    def ready(self):
        super(OCRApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class(
            'submit_for_ocr', document_version_ocr_submit
        )

        ModelPermission.register(
            model=Document, permissions=(
                permission_ocr_document, permission_ocr_content_view
            )
        )

        SourceColumn(
            source=DocumentVersionOCRError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'ocr.tasks.task_do_ocr': {
                    'queue': 'ocr'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('Content')
        )

        menu_facet.bind_links(
            links=(link_document_content,), sources=(Document,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(link_entry_list,),
            sources=(
                'ocr:entry_list', 'ocr:entry_delete_multiple',
                'ocr:entry_re_queue_multiple', DocumentVersionOCRError
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_submit_all, link_document_type_submit,
                link_entry_list
            )
        )

        post_save.connect(
            initialize_new_ocr_settings,
            dispatch_uid='initialize_new_ocr_settings', sender=DocumentType
        )
        post_version_upload.connect(
            post_version_upload_ocr, dispatch_uid='post_version_upload_ocr',
            sender=DocumentVersion
        )

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'pdftotext', _('pdftotext version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), pdftotext('-v').stderr,
                report=True
            )

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'tesseract', _('tesseract version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'tesseract', _('tesseract version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'tesseract', _('tesseract version'), tesseract('-v').stderr,
                report=True
            )
Example #6
0
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(app_label='documents', model_name='Document')

        DocumentType = apps.get_model(app_label='documents',
                                      model_name='DocumentType')

        DocumentVersion = apps.get_model(app_label='documents',
                                         model_name='DocumentVersion')

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class('submit_for_ocr',
                                     document_version_ocr_submit)

        ModelPermission.register(model=Document,
                                 permissions=(permission_ocr_document,
                                              permission_ocr_content_view))

        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Document'),
                     func=lambda context: document_link(context[
                         'object'].document_version.document))
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Added'),
                     attribute='datetime_submitted')
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Result'),
                     attribute='result')

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'), )

        app.conf.CELERY_ROUTES.update({
            'ocr.tasks.task_do_ocr': {
                'queue': 'ocr'
            },
        })

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR'))

        menu_facet.bind_links(links=(link_document_content, ),
                              sources=(Document, ))
        menu_multi_item.bind_links(links=(link_document_submit_multiple, ),
                                   sources=(Document, ))
        menu_object.bind_links(links=(link_document_submit, ),
                               sources=(Document, ))
        menu_object.bind_links(links=(link_document_type_ocr_settings, ),
                               sources=(DocumentType, ))
        menu_secondary.bind_links(
            links=(link_entry_list, ),
            sources=('ocr:entry_list', 'ocr:entry_delete_multiple',
                     'ocr:entry_re_queue_multiple', DocumentVersionOCRError))
        menu_tools.bind_links(links=(link_document_submit_all,
                                     link_document_type_submit,
                                     link_entry_list))

        post_save.connect(initialize_new_ocr_settings,
                          dispatch_uid='initialize_new_ocr_settings',
                          sender=DocumentType)
        post_version_upload.connect(post_version_upload_ocr,
                                    dispatch_uid='post_version_upload_ocr',
                                    sender=DocumentVersion)

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   pdftotext('-v').stderr,
                                   report=True)

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   tesseract('-v').stderr,
                                   report=True)
Example #7
0
    def ready(self):
        super(OCRApp, self).ready()

        Document = apps.get_model(
            app_label='documents', model_name='Document'
        )
        DocumentPage = apps.get_model(
            app_label='documents', model_name='DocumentPage'
        )
        DocumentType = apps.get_model(
            app_label='documents', model_name='DocumentType'
        )
        DocumentTypeSettings = self.get_model(
            model_name='DocumentTypeSettings'
        )
        DocumentVersion = apps.get_model(
            app_label='documents', model_name='DocumentVersion'
        )

        DocumentVersionOCRError = self.get_model('DocumentVersionOCRError')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class(
            'ocr_content', get_document_ocr_content
        )
        DocumentVersion.add_to_class(
            'submit_for_ocr', document_version_ocr_submit
        )

        ModelField(
            Document, name='versions__pages__ocr_content__content'
        )

        ModelPermission.register(
            model=Document, permissions=(
                permission_ocr_document, permission_ocr_content_view
            )
        )
        ModelPermission.register(
            model=DocumentType, permissions=(
                permission_document_type_ocr_setup,
            )
        )
        ModelPermission.register_inheritance(
            model=DocumentTypeSettings, related='document_type',
        )

        SourceColumn(
            source=DocumentVersionOCRError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'ocr.tasks.task_do_ocr': {
                    'queue': 'ocr'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('OCR')
        )

        document_page_search.add_model_field(
            field='ocr_content__content', label=_('OCR')
        )

        menu_facet.bind_links(
            links=(link_document_ocr_content,), sources=(Document,)
        )
        menu_facet.bind_links(
            links=(link_document_page_ocr_content,), sources=(DocumentPage,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_page_ocr_content,), sources=(DocumentPage,)
        )
        menu_object.bind_links(
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(
                link_document_ocr_content, link_document_ocr_errors_list,
                link_document_ocr_download
            ),
            sources=(
                'ocr:document_content', 'ocr:document_ocr_error_list',
                'ocr:document_ocr_download',
            )
        )
        menu_secondary.bind_links(
            links=(link_entry_list,),
            sources=(
                'ocr:entry_list', 'ocr:entry_delete_multiple',
                'ocr:entry_re_queue_multiple', DocumentVersionOCRError
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_type_submit, link_entry_list
            )
        )

        post_document_version_ocr.connect(
            dispatch_uid='ocr_handler_index_document',
            receiver=handler_index_document,
            sender=DocumentVersion
        )
        post_save.connect(
            dispatch_uid='ocr_handler_initialize_new_ocr_settings',
            receiver=handler_initialize_new_ocr_settings,
            sender=DocumentType
        )
        post_version_upload.connect(
            dispatch_uid='ocr_handler_ocr_document_version',
            receiver=handler_ocr_document_version,
            sender=DocumentVersion
        )