def test_validate_multi_textequiv(self): ocrd_page = parse(assets.path_to('kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0020_PAGE'), silence=True) self.assertEqual(len(PageValidator.validate(ocrd_page=ocrd_page).errors), 25, '25 errors - strict') word = ocrd_page.get_Page().get_TextRegion()[0].get_TextLine()[0].get_Word()[1] # delete all textequivs del(word.get_TextEquiv()[0]) # Add textequiv set_text(word, 'FOO', 'index1') word.add_TextEquiv(TextEquivType(Unicode='BAR', conf=.7)) self.assertEqual(get_text(word, 'index1'), 'FOO') set_text(word, 'BAR', 'index1') self.assertEqual(get_text(word, 'index1'), 'BAR')
def test_validate_multi_textequiv_first(self): ocrd_page = parse(assets.path_to('kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0020_PAGE.xml'), silence=True) report = PageValidator.validate(ocrd_page=ocrd_page) self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 25, '25 textequiv consistency errors - strict') word = ocrd_page.get_Page().get_TextRegion()[0].get_TextLine()[0].get_Word()[1] # delete all textequivs word.set_TextEquiv([]) # Add textequiv set_text(word, 'FOO', 'first') word.add_TextEquiv(TextEquivType(Unicode='BAR', conf=.7)) word.add_TextEquiv(TextEquivType(Unicode='BAZ', conf=.5, index=0)) self.assertEqual(get_text(word, 'first'), 'BAZ') set_text(word, 'XYZ', 'first') self.assertEqual(get_text(word, 'first'), 'XYZ')