Beispiel #1
0
    def test_unigram_test_str_smooth(self):
        unigram_model = UnigramModel(UnigramTests.train_str)

        test_sentence_op = unigram_model.get_string_prob(
            UnigramTests.input_str)

        total_prob_exp = 0.0
        cumul_prob_exp = []
        single_prob_exp = {}
        char_dict = dict()
        char_dict.update(unigram_model.char_dict)

        for char in UnigramTests.input_str:
            char_dict[char] = 0

        for char in UnigramTests.input_str:
            if char in unigram_model.char_dict:
                current_prob = UnigramModel.calc_prob(
                    char_count=unigram_model.char_dict[char],
                    total_count=unigram_model.training_size,
                    smoothing=UnigramModel.SMOOTHING_DEFAULT,
                    vocab_size=len(char_dict))
            else:
                current_prob = UnigramModel.calc_prob(
                    char_count=0,
                    total_count=unigram_model.training_size,
                    smoothing=UnigramModel.SMOOTHING_DEFAULT,
                    vocab_size=len(char_dict))
            single_prob_exp[char] = current_prob
            total_prob_exp += current_prob
            cumul_prob_exp.append((char, total_prob_exp))

        self.assertEqual(len(char_dict), len(unigram_model.char_dict))
        self.assertEqual(len(unigram_model.probs_dict),
                         len(unigram_model.char_dict))
        self.assertAlmostEqual(test_sentence_op[0], total_prob_exp, places=2)
        self.assertEqual(single_prob_exp, test_sentence_op[1])
        self.assertEqual(cumul_prob_exp, test_sentence_op[2])
Beispiel #2
0
    def test_unigram_test_str(self):
        unigram_model = UnigramModel(UnigramTests.train_str, smoothing=0.0)

        test_sentence_op = unigram_model.get_string_prob(
            UnigramTests.input_str)
        self.assertEqual(len(unigram_model.probs_dict),
                         len(unigram_model.char_dict))

        total_prob_exp = 0.0
        cumul_prob_exp = []
        single_prob_exp = {}
        for char in UnigramTests.input_str:
            if char in unigram_model.probs_dict:
                current_prob = UnigramModel.calc_prob(
                    char_count=unigram_model.char_dict[char],
                    total_count=unigram_model.training_size)
                total_prob_exp += current_prob
                cumul_prob_exp.append((char, total_prob_exp))
                single_prob_exp[char] = current_prob

        self.assertAlmostEqual(test_sentence_op[0], total_prob_exp, places=2)
        self.assertEqual(single_prob_exp, test_sentence_op[1])
        self.assertEqual(cumul_prob_exp, test_sentence_op[2])