Beispiel #1
0
 def test_sentence_tokenizer_utils_with_punctuation(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin',
                                            punctuation=['.', '?', '!'])
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Beispiel #2
0
 def test_sentence_tokenizer_utils_with_strict(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin',
                     strict=True,
                     punctuation=['.', '?', '!'],
                     strict_punctuation=[','])
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)
Beispiel #3
0
 def test_sentence_tokenizer_trainer_pickle(self):
     with patch.object(BaseSentenceTokenizerTrainer,
                       'pickle_sentence_tokenizer') as mock:
         trainer = BaseSentenceTokenizerTrainer('latin')
         trainer.pickle_sentence_tokenizer('mock.p', trainer)
     mock.assert_called_once_with('mock.p', trainer)
Beispiel #4
0
 def test_sentence_tokenizer_utils_with_abbreviations(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.'])
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Beispiel #5
0
 def test_sentence_tokenizer_utils(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin')
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Beispiel #6
0
 def test_sentence_tokenizer_trainer_pickle(self):
     with patch.object(BaseSentenceTokenizerTrainer, 'pickle_sentence_tokenizer') as mock:
         trainer = BaseSentenceTokenizerTrainer('latin')
         trainer.pickle_sentence_tokenizer('mock.p', trainer)
     mock.assert_called_once_with('mock.p', trainer)
Beispiel #7
0
 def test_sentence_tokenizer_utils_with_abbreviations(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.'])
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)
Beispiel #8
0
 def test_sentence_tokenizer_utils(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin')
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)