def test_unigram_train_smoothed(self): unigram_model = UnigramModel(UnigramTests.train_str) unigram_model._calc_probs() # test one of the probability calculations # For character 'z' prob_test = log10( (1.0 + UnigramModel.SMOOTHING_DEFAULT) / (len(UnigramTests.train_str) + UnigramModel.SMOOTHING_DEFAULT * len(unigram_model.char_dict))) self.assertAlmostEqual(unigram_model.probs_dict['z'], prob_test, places=2)
def test_prob(self): train_str_unique = UnigramTests.str_unique_chars( UnigramTests.train_str) unigram_model = UnigramModel(UnigramTests.train_str, smoothing=0.0) self.assertEqual(len(unigram_model.char_dict), len(train_str_unique)) unigram_model._calc_probs() # test one of the probability calculations # For character 'z' prob_test = log10(1.0 / len(UnigramTests.train_str)) self.assertAlmostEqual(unigram_model.probs_dict['z'], prob_test, places=2)