def test_unigram_test_str_smooth(self): unigram_model = UnigramModel(UnigramTests.train_str) test_sentence_op = unigram_model.get_string_prob( UnigramTests.input_str) total_prob_exp = 0.0 cumul_prob_exp = [] single_prob_exp = {} char_dict = dict() char_dict.update(unigram_model.char_dict) for char in UnigramTests.input_str: char_dict[char] = 0 for char in UnigramTests.input_str: if char in unigram_model.char_dict: current_prob = UnigramModel.calc_prob( char_count=unigram_model.char_dict[char], total_count=unigram_model.training_size, smoothing=UnigramModel.SMOOTHING_DEFAULT, vocab_size=len(char_dict)) else: current_prob = UnigramModel.calc_prob( char_count=0, total_count=unigram_model.training_size, smoothing=UnigramModel.SMOOTHING_DEFAULT, vocab_size=len(char_dict)) single_prob_exp[char] = current_prob total_prob_exp += current_prob cumul_prob_exp.append((char, total_prob_exp)) self.assertEqual(len(char_dict), len(unigram_model.char_dict)) self.assertEqual(len(unigram_model.probs_dict), len(unigram_model.char_dict)) self.assertAlmostEqual(test_sentence_op[0], total_prob_exp, places=2) self.assertEqual(single_prob_exp, test_sentence_op[1]) self.assertEqual(cumul_prob_exp, test_sentence_op[2])
def test_unigram_test_str(self): unigram_model = UnigramModel(UnigramTests.train_str, smoothing=0.0) test_sentence_op = unigram_model.get_string_prob( UnigramTests.input_str) self.assertEqual(len(unigram_model.probs_dict), len(unigram_model.char_dict)) total_prob_exp = 0.0 cumul_prob_exp = [] single_prob_exp = {} for char in UnigramTests.input_str: if char in unigram_model.probs_dict: current_prob = UnigramModel.calc_prob( char_count=unigram_model.char_dict[char], total_count=unigram_model.training_size) total_prob_exp += current_prob cumul_prob_exp.append((char, total_prob_exp)) single_prob_exp[char] = current_prob self.assertAlmostEqual(test_sentence_op[0], total_prob_exp, places=2) self.assertEqual(single_prob_exp, test_sentence_op[1]) self.assertEqual(cumul_prob_exp, test_sentence_op[2])