def test_titles_in_post_process_sentence1(self): sentence = '''This is a title And this is the next sentence. ''' text = '''Something. ''' + sentence + ''' That's it.''' start = text.index(sentence) span = (start, start + len(sentence)) actual = [text[start:end].strip() for start, end in post_process_sentence(text, span)] expected = ['This is a title', 'And this is the next sentence.'] self.assertEqual(expected, actual)
def test_ocr_artifacts_in_post_process_sentence2(self): sentence = '''\\ ______f hello hello ''' text = '''Something. ''' + sentence + '''That's it.''' start = text.index(sentence) span = (start, start + len(sentence)) actual = [text[start:end] for start, end in post_process_sentence(text, span)] expected = ['______f\n hello hello'] self.assertEqual(expected, actual)
def test_ocr_artifacts_in_post_process_sentence1(self): sentence = '''~~``~~~~```~~ >> << ""''' text = '''Something. ''' + sentence + '''That's it.''' start = text.index(sentence) span = (start, start + len(sentence)) actual = [text[start:end] for start, end in post_process_sentence(text, span)] expected = [] self.assertEqual(expected, actual)
def test_ocr_artifacts_in_post_process_sentence3(): sentence = '''\\ ba Ba ba Q F ''' text = '''Something. ''' + sentence + '''That's it.''' start = text.index(sentence) span = (start, start + len(sentence)) actual = [ text[start:end] for start, end in post_process_sentence(text, span) ] expected = [] assert_equal(expected, actual)