Beispiel #1
0
    def test_unigram_train_smoothed(self):
        unigram_model = UnigramModel(UnigramTests.train_str)

        unigram_model._calc_probs()

        # test one of the probability calculations
        # For character 'z'
        prob_test = log10(
            (1.0 + UnigramModel.SMOOTHING_DEFAULT) /
            (len(UnigramTests.train_str) +
             UnigramModel.SMOOTHING_DEFAULT * len(unigram_model.char_dict)))
        self.assertAlmostEqual(unigram_model.probs_dict['z'],
                               prob_test,
                               places=2)
Beispiel #2
0
    def test_prob(self):

        train_str_unique = UnigramTests.str_unique_chars(
            UnigramTests.train_str)
        unigram_model = UnigramModel(UnigramTests.train_str, smoothing=0.0)
        self.assertEqual(len(unigram_model.char_dict), len(train_str_unique))

        unigram_model._calc_probs()

        # test one of the probability calculations
        # For character 'z'
        prob_test = log10(1.0 / len(UnigramTests.train_str))
        self.assertAlmostEqual(unigram_model.probs_dict['z'],
                               prob_test,
                               places=2)