def test_richstringtokenizer_sentences(self): text = "Hello everyone, this is me speaking. And me ! Why not me ? Blup" tokenizer = RichStringTokenizer(text, token_min_size=1, token_max_size=4) sentences = tokenizer.find_sentences(text) self.assertEqual(len(sentences), 4) self.assertEqual(text[sentences[0].start : sentences[0].end], "Hello everyone, this is me speaking.") self.assertEqual(text[sentences[1].start : sentences[1].end], "And me !") self.assertEqual(text[sentences[2].start : sentences[2].end], "Why not me ?") self.assertEqual(text[sentences[3].start : sentences[3].end], "Blup")
def test_find_sentences(self): text = "Hello everyone, this is me speaking. And me." sentences = RichStringTokenizer.find_sentences(text) self.assertEqual(sentences[0], Sentence(indice=0, start=0, end=38)) self.assertEqual(sentences[1], Sentence(indice=1, start=39, end=46))