def test_richstringtokenizer_loadtext(self):
     text = "Hello everyone, this is   me speaking. And me."
     tokenizer = RichStringTokenizer(text, token_min_size=1, token_max_size=3)
     tokens = list(tokenizer)
     self.assertEqual(len(tokens), 18)
     tokenizer.load_text("Hello everyone")
     tokens = list(tokenizer)
     self.assertEqual(len(tokens), 3)
 def test_richstringtokenizer_sentences(self):
     text = "Hello everyone, this is   me speaking. And me ! Why not me ? Blup"
     tokenizer = RichStringTokenizer(text, token_min_size=1, token_max_size=4)
     sentences = tokenizer.find_sentences(text)
     self.assertEqual(len(sentences), 4)
     self.assertEqual(text[sentences[0].start : sentences[0].end], "Hello everyone, this is   me speaking.")
     self.assertEqual(text[sentences[1].start : sentences[1].end], "And me !")
     self.assertEqual(text[sentences[2].start : sentences[2].end], "Why not me ?")
     self.assertEqual(text[sentences[3].start : sentences[3].end], "Blup")
 def test_find_sentences(self):
     text = "Hello everyone, this is   me speaking. And me."
     sentences = RichStringTokenizer.find_sentences(text)
     self.assertEqual(sentences[0], Sentence(indice=0, start=0, end=38))
     self.assertEqual(sentences[1], Sentence(indice=1, start=39, end=46))