def test_titles_in_post_process_sentence1(self):
        sentence = '''This is a title
    
    And this is the next sentence.
    '''

        text = '''Something. ''' + sentence + ''' That's it.'''
        start = text.index(sentence)
        span = (start, start + len(sentence))

        actual = [text[start:end].strip() for start, end in post_process_sentence(text, span)]
        expected = ['This is a title', 'And this is the next sentence.']
        self.assertEqual(expected, actual)
    def test_ocr_artifacts_in_post_process_sentence2(self):
        sentence = '''\\
        
        ______f
        hello hello
          
        '''
        text = '''Something. ''' + sentence + '''That's it.'''
        start = text.index(sentence)
        span = (start, start + len(sentence))

        actual = [text[start:end] for start, end in post_process_sentence(text, span)]
        expected = ['______f\n        hello hello']
        self.assertEqual(expected, actual)
    def test_ocr_artifacts_in_post_process_sentence1(self):
        sentence = '''~~``~~~~```~~
        
        >>
        
        <<
        
        ""'''
        text = '''Something. ''' + sentence + '''That's it.'''
        start = text.index(sentence)
        span = (start, start + len(sentence))

        actual = [text[start:end] for start, end in post_process_sentence(text, span)]
        expected = []
        self.assertEqual(expected, actual)
def test_ocr_artifacts_in_post_process_sentence3():
    sentence = '''\\
    
    ba
     
    Ba ba
    
    Q
    
    F

    '''
    text = '''Something. ''' + sentence + '''That's it.'''
    start = text.index(sentence)
    span = (start, start + len(sentence))

    actual = [
        text[start:end] for start, end in post_process_sentence(text, span)
    ]
    expected = []
    assert_equal(expected, actual)