def test_sentence_tokenizer_utils_with_punctuation(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin', punctuation=['.', '?', '!']) self.assertIsInstance( trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)
def test_sentence_tokenizer_utils_with_strict(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin', strict=True, punctuation=['.', '?', '!'], strict_punctuation=[',']) self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)
def test_sentence_tokenizer_trainer_pickle(self): with patch.object(BaseSentenceTokenizerTrainer, 'pickle_sentence_tokenizer') as mock: trainer = BaseSentenceTokenizerTrainer('latin') trainer.pickle_sentence_tokenizer('mock.p', trainer) mock.assert_called_once_with('mock.p', trainer)
def test_sentence_tokenizer_utils_with_abbreviations(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.']) self.assertIsInstance( trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)
def test_sentence_tokenizer_utils(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin') self.assertIsInstance( trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)
def test_sentence_tokenizer_utils_with_abbreviations(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.']) self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)
def test_sentence_tokenizer_utils(self): """Test sentence tokenization trainer""" trainer = BaseSentenceTokenizerTrainer('latin') self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text), PunktSentenceTokenizer)