Beispiel #1
0
 def test_get_mime_types(self):
     doc = Document.load(self.path)
     self.assertEqual(
         {
             'application/vnd.prima.page+xml', 'image/tiff',
             'application/alto+xml'
         }, doc.mime_types)
Beispiel #2
0
 def test_path_with_spaces(self):
     doc = Document.load(
         (TEST_BASE_PATH /
          'example/workspaces/heavy quoting/mets.xml').as_uri())
     page = doc.page_for_id('PHYS_0017', 'OCR-D-GT-PAGE')
     image = doc.workspace.image_from_page(page.page, 'PHYS_0017')
     # Assert no exceptions happened and a sensible return value
     self.assertGreater(image[0].height, 100)
Beispiel #3
0
 def test_missing_image(self):
     path = TEST_BASE_PATH / 'example/workspaces/kant_aufklaerung_1784_missing_image/mets.xml'
     uri = path.as_uri()
     doc = Document.load(uri)
     page = doc.page_for_id('PHYS_0017', 'OCR-D-GT-PAGE')
     image, info, exif = page.get_image(feature_selector='',
                                        feature_filter='binarized')
     # Assert no exceptions happened and no image returned
     self.assertIsNone(image)
Beispiel #4
0
 def test_path_ocrd_file(self):
     doc = Document.load(self.path)
     image_file = list(
         doc.workspace.mets.find_files(pageId='PHYS_0017',
                                       fileGrp='OCR-D-IMG'))[0]
     self.assertEqual(
         ASSETS_PATH /
         'kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif',
         doc.path(image_file))
Beispiel #5
0
    def _open(self, uri: str) -> None:
        # noinspection PyTypeChecker
        self.document = Document.load(uri, emitter=self.emit)
        self.page_list.set_document(self.document)

        self.view_manager.set_document(self.document)
        self.update_ui()

        if len(self.document.page_ids):
            self.on_page_activated(None, self.document.page_ids[0])
Beispiel #6
0
 def test_page_for_id_with_multiple_images_for_page_and_fileGrp(self):
     """
     returns first image and warns
     """
     doc = Document.load(ASSETS_PATH /
                         'kant_aufklaerung_1784-complex/data/mets.xml')
     # with self.assertLogs('ocrd_browser.model.document', level='WARNING') as log_watch:
     page = doc.page_for_id('PHYS_0017', 'OCR-D-IMG-CLIP')
     self.assertIsInstance(page, Page)
     self.assertIsInstance(page.pc_gts, PcGtsType)
Beispiel #7
0
 def test_can_call_get_image_if_supported(self):
     page: LazyPage = Document.load(ASSETS_PATH / 'kant_aufklaerung_1784-binarized/data/mets.xml').page_for_id('P_0017', 'OCR-D-GT-WORD')
     if IMAGE_FROM_PAGE_FILENAME_SUPPORT:
         image_by_feature, _, _ = page.get_image(feature_selector={'binarized'}, feature_filter={'cropped'})
         image_by_filename, _, _ = page.get_image(filename='OCR-D-IMG-BIN/BIN_0017.png', feature_filter={'cropped'})
         self.assertEqual(image_by_feature, image_by_filename)
     else:
         try:
             page.get_image(filename='OCR-D-GT-IMG-BIN/PAGE_2019.tif')
             self.fail('IMAGE_FROM_PAGE_FILENAME_SUPPORT detected wrong')
         except RuntimeError as e:
             self.assertTrue(str(e).startswith('Parameter filename not supported in '))
Beispiel #8
0
 def test_get_page_index(self):
     doc = Document.load(self.path)
     file_index = doc.get_file_index()
     page17 = [
         file for file in file_index.values()
         if file.static_page_id == 'PHYS_0017'
     ]
     alto = [
         file for file in file_index.values()
         if file.mimetype == 'application/alto+xml'
     ]
     self.assertEqual(3, len(page17))
     self.assertEqual(2, len(alto))
Beispiel #9
0
 def test_page_for_id_with_multiple_images_for_page_and_fileGrp(self):
     """
     returns first image and warns
     """
     doc = Document.load(ASSETS_PATH /
                         'kant_aufklaerung_1784-complex/data/mets.xml')
     with self.assertLogs('ocrd_browser.model.document',
                          level='WARNING') as log_watch:
         page = doc.page_for_id('PHYS_0017', 'OCR-D-IMG-CLIP')
     self.assertIsInstance(page, Page)
     self.assertEqual(1, len(log_watch.records))
     self.assertEqual(
         "No PAGE-XML but 2 images for page 'PHYS_0017' in fileGrp 'OCR-D-IMG-CLIP'",
         log_watch.records[0].msg)
Beispiel #10
0
    def test_page_for_id_with_no_images_for_page_and_fileGrp(self):
        """
        Issue #4: list index out of range on non-XML fileGrp

        https://github.com/hnesk/browse-ocrd/issues/4
        """
        doc = Document.load(ASSETS_PATH /
                            'kant_aufklaerung_1784-complex/data/mets.xml')
        with self.assertLogs('ocrd_browser.model.document',
                             level='WARNING') as log_watch:
            page = doc.page_for_id('PHYS_0020', 'OCR-D-IMG-CLIP')
        self.assertIsNone(page)
        self.assertEqual(1, len(log_watch.records))
        self.assertEqual(
            "No PAGE-XML and no image for page 'PHYS_0020' in fileGrp 'OCR-D-IMG-CLIP'",
            log_watch.records[0].msg)
Beispiel #11
0
    def test_page_for_id_with_nothing_for_page_and_fileGrp(self):
        """
        Issue #4 again: This time for missing PAGE-XMLs

        https://github.com/hnesk/browse-ocrd/issues/4
        """
        doc = Document.load(
            ASSETS_PATH /
            '../example/workspaces/kant_aufklaerung_1784_missing_xml/mets.xml')
        with self.assertLogs('ocrd_browser.model.document',
                             level='WARNING') as log_watch:
            page = doc.page_for_id('PHYS_0020', 'OCR-D-GT-PAGE')
        self.assertIsNone(page)
        self.assertEqual(1, len(log_watch.records))
        self.assertEqual(
            "No PAGE-XML and no image for page 'PHYS_0020' in fileGrp 'OCR-D-GT-PAGE'",
            log_watch.records[0].msg)
Beispiel #12
0
    def test_save(self):
        doc = Document.clone(self.path)
        with TemporaryDirectory(prefix='browse-ocrd-tests') as directory:
            saved_mets = directory + '/mets.xml'
            doc.save_as(saved_mets)
            saved = Document.load(saved_mets)
            self.assertEqual(doc.file_groups, saved.file_groups)
            self.assertEqual(doc.page_ids, saved.page_ids)
            self.assertEqual(doc.workspace.mets.unique_identifier,
                             saved.workspace.mets.unique_identifier)

            for page_id in doc.page_ids:
                for file_group, mime in doc.file_groups_and_mimetypes:
                    original_file = doc.files_for_page_id(
                        page_id, file_group, mime)[0]
                    saved_file = saved.files_for_page_id(
                        page_id, file_group, mime)[0]
                    self.assertEqual(original_file, saved_file)
Beispiel #13
0
 def test_modify_when_not_editable(self):
     doc = Document.load(self.path)
     with self.assertRaises(PermissionError):
         doc.reorder(['PHYS_0020', 'PHYS_0017'])
Beispiel #14
0
 def test_path_path(self):
     doc = Document.load(self.path)
     self.assertEqual(
         ASSETS_PATH / 'kant_aufklaerung_1784/data/OCR-D-DIR/lala.xml',
         doc.path(Path('OCR-D-DIR/lala.xml')))
Beispiel #15
0
 def test_path_string(self):
     doc = Document.load(self.path)
     self.assertEqual(ASSETS_PATH / 'kant_aufklaerung_1784/data/lala.xml',
                      doc.path('lala.xml'))
Beispiel #16
0
 def test_get_default_image_group_with_missing_ocr_d_img(self):
     doc = Document.load(
         ASSETS_PATH / '../example/workspaces/no_ocrd_d_img_group/mets.xml')
     file_group = doc.get_default_image_group()
     self.assertEqual('OCR-D-IMG-PNG', file_group)
Beispiel #17
0
 def test_get_default_image_group_no_preference(self):
     doc = Document.load(ASSETS_PATH /
                         'kant_aufklaerung_1784-complex/data/mets.xml')
     file_group = doc.get_default_image_group()
     self.assertEqual('OCR-D-IMG', file_group)
Beispiel #18
0
 def test_get_default_image_group(self):
     doc = Document.load(ASSETS_PATH /
                         'kant_aufklaerung_1784-complex/data/mets.xml')
     file_group = doc.get_default_image_group(
         ['OCR-D-IMG-BIN', 'OCR-D-IMG.*'])
     self.assertEqual('OCR-D-IMG-BIN', file_group)
Beispiel #19
0
 def test_get_image_paths(self):
     doc = Document.load(self.path)
     image_paths = doc.get_image_paths('OCR-D-IMG')
     self.assertEqual(2, len(image_paths))
     self.assertEqual('INPUT_0017.tif', image_paths['PHYS_0017'].name)
     self.assertEqual('INPUT_0020.tif', image_paths['PHYS_0020'].name)
Beispiel #20
0
 def test_get_file_groups_and_mimetypes(self):
     doc = Document.load(self.path)
     expected = [('OCR-D-IMG', 'image/tiff'),
                 ('OCR-D-GT-PAGE', 'application/vnd.prima.page+xml'),
                 ('OCR-D-GT-ALTO', 'application/alto+xml')]
     self.assertEqual(expected, doc.file_groups_and_mimetypes)
Beispiel #21
0
 def test_xpath_works_with_different_namespaces(self):
     doc = Document.load(TEST_BASE_PATH / 'example/workspaces/aletheiaexamplepage/mets.xml')
     for page_id in ['PAGE_2017', 'PAGE_2018', 'PAGE_2019']:
         page = doc.page_for_id(page_id, 'OCR-D-GT-PAGE')
         xpath_result = page.xpath('/page:PcGts/page:Page/@imageFilename')
         self.assertGreater(len(xpath_result), 0)
Beispiel #22
0
 def setUp(self) -> None:
     self.launcher = Launcher()
     self.doc = Document.load(BASE_PATH / 'mets.xml')
     self.file = self.doc.files_for_page_id('PHYS_0017', 'OCR-D-GT-PAGE')[0]
Beispiel #23
0
 def test_get_page_ids(self):
     doc = Document.load(self.path)
     self.assertEqual(['PHYS_0017', 'PHYS_0020'], doc.page_ids)