def ready(self): super(SourcesApp, self).ready() POP3Email = self.get_model('POP3Email') IMAPEmail = self.get_model('IMAPEmail') Source = self.get_model('Source') SourceLog = self.get_model('SourceLog') SaneScanner = self.get_model('SaneScanner') StagingFolderSource = self.get_model('StagingFolderSource') WatchFolderSource = self.get_model('WatchFolderSource') WebFormSource = self.get_model('WebFormSource') APIEndPoint(app=self, version_string='1') MissingItem( label=_('Create a document source'), description=_( 'Document sources are the way in which new documents are ' 'feed to Mayan EDMS, create at least a web form source to ' 'be able to upload documents from a browser.'), condition=lambda: not Source.objects.exists(), view='sources:setup_source_list') SourceColumn( source=StagingFile, label=_('Created'), func=lambda context: context['object'].get_date_time_created()) html_widget = StagingFileThumbnailWidget() SourceColumn(source=StagingFile, label=_('Thumbnail'), func=lambda context: html_widget.render(instance=context[ 'object'], )) SourceColumn(source=SourceLog, label=_('Date time'), func=lambda context: context['object'].datetime) SourceColumn(source=SourceLog, label=_('Message'), func=lambda context: context['object'].message) app.conf.CELERY_QUEUES.extend(( Queue('sources', Exchange('sources'), routing_key='sources'), Queue('sources_periodic', Exchange('sources_periodic'), routing_key='sources_periodic', delivery_mode=1), )) app.conf.CELERY_ROUTES.update({ 'sources.tasks.task_check_interval_source': { 'queue': 'sources_periodic' }, 'sources.tasks.task_source_handle_upload': { 'queue': 'sources' }, 'sources.tasks.task_upload_document': { 'queue': 'sources' }, }) menu_documents.bind_links(links=(link_document_create_multiple, )) menu_object.bind_links( links=(link_setup_source_edit, link_setup_source_delete, link_transformation_list, link_setup_source_logs), sources=(POP3Email, IMAPEmail, SaneScanner, StagingFolderSource, WatchFolderSource, WebFormSource)) menu_object.bind_links(links=(link_staging_file_delete, ), sources=(StagingFile, )) menu_object.bind_links(links=(link_setup_source_check_now, ), sources=( IMAPEmail, POP3Email, WatchFolderSource, )) menu_secondary.bind_links( links=(link_setup_sources, link_setup_source_create_webform, link_setup_source_create_sane_scanner, link_setup_source_create_staging_folder, link_setup_source_create_pop3_email, link_setup_source_create_imap_email, link_setup_source_create_watch_folder), sources=(POP3Email, IMAPEmail, StagingFolderSource, WatchFolderSource, WebFormSource, 'sources:setup_source_list', 'sources:setup_source_create')) menu_setup.bind_links(links=(link_setup_sources, )) menu_sidebar.bind_links(links=(link_upload_version, ), sources=('documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert')) post_upgrade.connect(initialize_periodic_tasks, dispatch_uid='initialize_periodic_tasks') post_initial_setup.connect( create_default_document_source, dispatch_uid='create_default_document_source') post_version_upload.connect( copy_transformations_to_version, dispatch_uid='copy_transformations_to_version')
def ready(self): super(SourcesApp, self).ready() APIEndPoint(app=self, version_string='1') MissingItem( label=_('Create a document source'), description=_( 'Document sources are the way in which new documents are ' 'feed to Mayan EDMS, create at least a web form source to ' 'be able to upload documents from a browser.' ), condition=lambda: not Source.objects.exists(), view='sources:setup_source_list' ) SourceColumn( source=StagingFile, label=_('Created'), func=lambda context: context['object'].get_date_time_created() ) SourceColumn( source=StagingFile, label=_('Thumbnail'), func=lambda context: staging_file_thumbnail( context['object'], gallery_name='sources:staging_list', title=context['object'].filename, size='100' ) ) SourceColumn( source=SourceLog, label=_('Date time'), func=lambda context: context['object'].datetime ) SourceColumn( source=SourceLog, label=_('Message'), func=lambda context: context['object'].message ) app.conf.CELERY_QUEUES.extend( ( Queue( 'sources', Exchange('sources'), routing_key='sources' ), Queue( 'sources_periodic', Exchange('sources_periodic'), routing_key='sources_periodic', delivery_mode=1 ), ) ) app.conf.CELERY_ROUTES.update( { 'sources.tasks.task_check_interval_source': { 'queue': 'sources_periodic' }, 'sources.tasks.task_source_handle_upload': { 'queue': 'sources' }, 'sources.tasks.task_upload_document': { 'queue': 'sources' }, } ) menu_front_page.bind_links(links=(link_document_create_multiple,)) menu_object.bind_links( links=(link_document_create_siblings,), sources=(Document,) ) menu_object.bind_links( links=( link_setup_source_edit, link_setup_source_delete, link_transformation_list, link_setup_source_logs ), sources=( POP3Email, IMAPEmail, StagingFolderSource, WatchFolderSource, WebFormSource ) ) menu_object.bind_links( links=(link_staging_file_delete,), sources=(StagingFile,) ) menu_secondary.bind_links( links=( link_setup_sources, link_setup_source_create_webform, link_setup_source_create_staging_folder, link_setup_source_create_pop3_email, link_setup_source_create_imap_email, link_setup_source_create_watch_folder ), sources=( POP3Email, IMAPEmail, StagingFolderSource, WatchFolderSource, WebFormSource, 'sources:setup_source_list', 'sources:setup_source_create' ) ) menu_setup.bind_links(links=(link_setup_sources,)) menu_sidebar.bind_links( links=(link_upload_version,), sources=( 'documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert' ) ) post_upgrade.connect( initialize_periodic_tasks, dispatch_uid='initialize_periodic_tasks' ) post_initial_setup.connect( create_default_document_source, dispatch_uid='create_default_document_source' ) post_version_upload.connect( copy_transformations_to_version, dispatch_uid='copy_transformations_to_version' )
def ready(self): super(DocumentParsingApp, self).ready() APIEndPoint(app=self, version_string='1') Document = apps.get_model( app_label='documents', model_name='Document' ) DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) DocumentVersionParseError = self.get_model('DocumentVersionParseError') Document.add_to_class('submit_for_parsing', document_parsing_submit) DocumentVersion.add_to_class( 'content', get_document_content ) DocumentVersion.add_to_class( 'submit_for_parsing', document_version_parsing_submit ) ModelPermission.register( model=Document, permissions=(permission_content_view,) ) SourceColumn( source=DocumentVersionParseError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionParseError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionParseError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('parsing', Exchange('parsing'), routing_key='parsing'), ) app.conf.CELERY_ROUTES.update( { 'document_parsing.tasks.task_parse_document_version': { 'queue': 'parsing' }, } ) document_search.add_model_field( field='versions__pages__content__content', label=_('Content') ) document_page_search.add_model_field( field='content__content', label=_('Content') ) menu_facet.bind_links( links=(link_document_content,), sources=(Document,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_secondary.bind_links( links=( link_document_content, link_document_parsing_errors_list, link_document_content_download ), sources=( 'document_parsing:document_content', 'document_parsing:document_content_download', 'document_parsing:document_parsing_error_list', ) ) menu_tools.bind_links( links=( link_document_type_submit, link_error_list, ) ) post_version_upload.connect( dispatch_uid='document_parsing_handler_parse_document_version', receiver=handler_parse_document_version, sender=DocumentVersion )
def ready(self): super(OCRApp, self).ready() Document = apps.get_model(app_label='documents', model_name='Document') DocumentPage = apps.get_model(app_label='documents', model_name='DocumentPage') DocumentType = apps.get_model(app_label='documents', model_name='DocumentType') DocumentTypeSettings = self.get_model( model_name='DocumentTypeSettings') DocumentVersion = apps.get_model(app_label='documents', model_name='DocumentVersion') DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('ocr_content', get_document_ocr_content) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) ModelField(Document, name='versions__pages__ocr_content__content') ModelPermission.register(model=Document, permissions=(permission_ocr_document, permission_ocr_content_view)) ModelPermission.register( model=DocumentType, permissions=(permission_document_type_ocr_setup, )) ModelPermission.register_inheritance( model=DocumentTypeSettings, related='document_type', ) SourceColumn(source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context[ 'object'].document_version.document)) SourceColumn(source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted') SourceColumn(source=DocumentVersionOCRError, label=_('Result'), attribute='result') app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update({ 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, }) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR')) document_page_search.add_model_field(field='ocr_content__content', label=_('OCR')) menu_facet.bind_links(links=(link_document_ocr_content, ), sources=(Document, )) menu_facet.bind_links(links=(link_document_page_ocr_content, ), sources=(DocumentPage, )) menu_multi_item.bind_links(links=(link_document_submit_multiple, ), sources=(Document, )) menu_object.bind_links(links=(link_document_submit, ), sources=(Document, )) menu_object.bind_links(links=(link_document_page_ocr_content, ), sources=(DocumentPage, )) menu_object.bind_links(links=(link_document_type_ocr_settings, ), sources=(DocumentType, )) menu_secondary.bind_links(links=(link_document_ocr_content, link_document_ocr_errors_list, link_document_ocr_download), sources=( 'ocr:document_content', 'ocr:document_ocr_error_list', 'ocr:document_ocr_download', )) menu_secondary.bind_links( links=(link_entry_list, ), sources=('ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError)) menu_tools.bind_links(links=(link_document_type_submit, link_entry_list)) post_save.connect( dispatch_uid='ocr_handler_initialize_new_ocr_settings', receiver=handler_initialize_new_ocr_settings, sender=DocumentType) post_version_upload.connect( dispatch_uid='ocr_handler_ocr_document_version', receiver=handler_ocr_document_version, sender=DocumentVersion)
def ready(self): super(OCRApp, self).ready() APIEndPoint(app=self, version_string='1') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class( 'submit_for_ocr', document_version_ocr_submit ) ModelPermission.register( model=Document, permissions=( permission_ocr_document, permission_ocr_content_view ) ) SourceColumn( source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionOCRError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update( { 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, } ) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('Content') ) menu_facet.bind_links( links=(link_document_content,), sources=(Document,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_object.bind_links( links=(link_document_type_ocr_settings,), sources=(DocumentType,) ) menu_secondary.bind_links( links=(link_entry_list,), sources=( 'ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError ) ) menu_tools.bind_links( links=( link_document_submit_all, link_document_type_submit, link_entry_list ) ) post_save.connect( initialize_new_ocr_settings, dispatch_uid='initialize_new_ocr_settings', sender=DocumentType ) post_version_upload.connect( post_version_upload_ocr, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion ) namespace = PropertyNamespace('ocr', _('OCR')) try: pdftotext = sh.Command(setting_pdftotext_path.value) except sh.CommandNotFound: namespace.add_property( 'pdftotext', _('pdftotext version'), _('not found'), report=True ) except Exception: namespace.add_property( 'pdftotext', _('pdftotext version'), _('error getting version'), report=True ) else: namespace.add_property( 'pdftotext', _('pdftotext version'), pdftotext('-v').stderr, report=True ) try: tesseract = sh.Command(setting_tesseract_path.value) except sh.CommandNotFound: namespace.add_property( 'tesseract', _('tesseract version'), _('not found'), report=True ) except Exception: namespace.add_property( 'tesseract', _('tesseract version'), _('error getting version'), report=True ) else: namespace.add_property( 'tesseract', _('tesseract version'), tesseract('-v').stderr, report=True )
def ready(self): super(OCRApp, self).ready() Document = apps.get_model(app_label='documents', model_name='Document') DocumentType = apps.get_model(app_label='documents', model_name='DocumentType') DocumentVersion = apps.get_model(app_label='documents', model_name='DocumentVersion') DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') APIEndPoint(app=self, version_string='1') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) ModelPermission.register(model=Document, permissions=(permission_ocr_document, permission_ocr_content_view)) SourceColumn(source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context[ 'object'].document_version.document)) SourceColumn(source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted') SourceColumn(source=DocumentVersionOCRError, label=_('Result'), attribute='result') app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update({ 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, }) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR')) menu_facet.bind_links(links=(link_document_content, ), sources=(Document, )) menu_multi_item.bind_links(links=(link_document_submit_multiple, ), sources=(Document, )) menu_object.bind_links(links=(link_document_submit, ), sources=(Document, )) menu_object.bind_links(links=(link_document_type_ocr_settings, ), sources=(DocumentType, )) menu_secondary.bind_links( links=(link_entry_list, ), sources=('ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError)) menu_tools.bind_links(links=(link_document_submit_all, link_document_type_submit, link_entry_list)) post_save.connect(initialize_new_ocr_settings, dispatch_uid='initialize_new_ocr_settings', sender=DocumentType) post_version_upload.connect(post_version_upload_ocr, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion) namespace = PropertyNamespace('ocr', _('OCR')) try: pdftotext = sh.Command(setting_pdftotext_path.value) except sh.CommandNotFound: namespace.add_property('pdftotext', _('pdftotext version'), _('not found'), report=True) except Exception: namespace.add_property('pdftotext', _('pdftotext version'), _('error getting version'), report=True) else: namespace.add_property('pdftotext', _('pdftotext version'), pdftotext('-v').stderr, report=True) try: tesseract = sh.Command(setting_tesseract_path.value) except sh.CommandNotFound: namespace.add_property('tesseract', _('tesseract version'), _('not found'), report=True) except Exception: namespace.add_property('tesseract', _('tesseract version'), _('error getting version'), report=True) else: namespace.add_property('tesseract', _('tesseract version'), tesseract('-v').stderr, report=True)
def ready(self): super(OCRApp, self).ready() Document = apps.get_model( app_label='documents', model_name='Document' ) DocumentPage = apps.get_model( app_label='documents', model_name='DocumentPage' ) DocumentType = apps.get_model( app_label='documents', model_name='DocumentType' ) DocumentTypeSettings = self.get_model( model_name='DocumentTypeSettings' ) DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) DocumentVersionOCRError = self.get_model('DocumentVersionOCRError') Document.add_to_class('submit_for_ocr', document_ocr_submit) DocumentVersion.add_to_class( 'ocr_content', get_document_ocr_content ) DocumentVersion.add_to_class( 'submit_for_ocr', document_version_ocr_submit ) ModelField( Document, name='versions__pages__ocr_content__content' ) ModelPermission.register( model=Document, permissions=( permission_ocr_document, permission_ocr_content_view ) ) ModelPermission.register( model=DocumentType, permissions=( permission_document_type_ocr_setup, ) ) ModelPermission.register_inheritance( model=DocumentTypeSettings, related='document_type', ) SourceColumn( source=DocumentVersionOCRError, label=_('Document'), func=lambda context: document_link(context['object'].document_version.document) ) SourceColumn( source=DocumentVersionOCRError, label=_('Added'), attribute='datetime_submitted' ) SourceColumn( source=DocumentVersionOCRError, label=_('Result'), attribute='result' ) app.conf.CELERY_QUEUES.append( Queue('ocr', Exchange('ocr'), routing_key='ocr'), ) app.conf.CELERY_ROUTES.update( { 'ocr.tasks.task_do_ocr': { 'queue': 'ocr' }, } ) document_search.add_model_field( field='versions__pages__ocr_content__content', label=_('OCR') ) document_page_search.add_model_field( field='ocr_content__content', label=_('OCR') ) menu_facet.bind_links( links=(link_document_ocr_content,), sources=(Document,) ) menu_facet.bind_links( links=(link_document_page_ocr_content,), sources=(DocumentPage,) ) menu_multi_item.bind_links( links=(link_document_submit_multiple,), sources=(Document,) ) menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) menu_object.bind_links( links=(link_document_page_ocr_content,), sources=(DocumentPage,) ) menu_object.bind_links( links=(link_document_type_ocr_settings,), sources=(DocumentType,) ) menu_secondary.bind_links( links=( link_document_ocr_content, link_document_ocr_errors_list, link_document_ocr_download ), sources=( 'ocr:document_content', 'ocr:document_ocr_error_list', 'ocr:document_ocr_download', ) ) menu_secondary.bind_links( links=(link_entry_list,), sources=( 'ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError ) ) menu_tools.bind_links( links=( link_document_type_submit, link_entry_list ) ) post_document_version_ocr.connect( dispatch_uid='ocr_handler_index_document', receiver=handler_index_document, sender=DocumentVersion ) post_save.connect( dispatch_uid='ocr_handler_initialize_new_ocr_settings', receiver=handler_initialize_new_ocr_settings, sender=DocumentType ) post_version_upload.connect( dispatch_uid='ocr_handler_ocr_document_version', receiver=handler_ocr_document_version, sender=DocumentVersion )