예제 #1
0
    def test_validate_multi_textequiv(self):
        ocrd_page = parse(assets.path_to('kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0020_PAGE'), silence=True)
        self.assertEqual(len(PageValidator.validate(ocrd_page=ocrd_page).errors), 25, '25 errors - strict')

        word = ocrd_page.get_Page().get_TextRegion()[0].get_TextLine()[0].get_Word()[1]

        # delete all textequivs
        del(word.get_TextEquiv()[0])

        # Add textequiv
        set_text(word, 'FOO', 'index1')
        word.add_TextEquiv(TextEquivType(Unicode='BAR', conf=.7))

        self.assertEqual(get_text(word, 'index1'), 'FOO')
        set_text(word, 'BAR', 'index1')
        self.assertEqual(get_text(word, 'index1'), 'BAR')
    def test_validate_multi_textequiv_first(self):
        ocrd_page = parse(assets.path_to('kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0020_PAGE.xml'), silence=True)
        report = PageValidator.validate(ocrd_page=ocrd_page)
        self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 25, '25 textequiv consistency errors - strict')

        word = ocrd_page.get_Page().get_TextRegion()[0].get_TextLine()[0].get_Word()[1]

        # delete all textequivs
        word.set_TextEquiv([])

        # Add textequiv
        set_text(word, 'FOO', 'first')
        word.add_TextEquiv(TextEquivType(Unicode='BAR', conf=.7))
        word.add_TextEquiv(TextEquivType(Unicode='BAZ', conf=.5, index=0))
        self.assertEqual(get_text(word, 'first'), 'BAZ')
        set_text(word, 'XYZ', 'first')
        self.assertEqual(get_text(word, 'first'), 'XYZ')