class LanguageModelTests(unittest.TestCase): @classmethod def setUpClass(cls): print("\LanguageModelTests starts") print("==========") @classmethod def tearDownClass(cls): print("==========") print("LanguageModelTests has ended") def setUp(self): self.lm = LanguageModel(3) self.token_sequences = [['the', 'cat', 'runs'], ['the', 'dog', 'runs']] self.lm.train(self.token_sequences) def test_get_ngrams(self): print("id: " + self.id()) self.lm.n = 4 input_tokens = ['the', 'cat', 'in', 'the', 'hat'] result_ngrams = [ (None, None, None, 'the'), (None, None, 'the', 'cat'), (None, 'the', 'cat', 'in'), ('the', 'cat', 'in', 'the'), ('cat', 'in', 'the', 'hat'), ('in', 'the', 'hat', None), ('the', 'hat', None, None), ('hat', None, None, None) ] self.assertEqual(self.lm.get_ngrams(input_tokens), result_ngrams) def test_train_vocabulary_and_counts(self): print("id: " + self.id()) self.assertEqual(self.lm.vocabulary, {None, 'the', 'cat', 'runs', 'dog'}) result_counts = { (None, None): { 'the': 2 }, (None, 'the'): { 'cat': 1, 'dog': 1 }, ('the', 'cat'): { 'runs': 1 }, ('cat', 'runs'): { None: 1 }, ('runs', None): { None: 2 }, ('the', 'dog'): { 'runs': 1 }, ('dog', 'runs'): { None: 1 } } self.assertEqual(self.lm.counts, result_counts) def test_normalize(self): print("id: " + self.id()) input_words = {'cat': 1, 'dog': 1} result_probabilities = {'cat': 0.5, 'dog': 0.5} self.assertEqual(self.lm.normalize(input_words), result_probabilities) def test_normalize_sum_probabilies(self): print("id: " + self.id()) input_words = {'cat': 1, 'dog': 1} probabilities = self.lm.normalize(input_words) prob_sum = 0 for key in probabilities: prob_sum += probabilities[key] self.assertEqual(prob_sum, 1) def test_predict_next(self): print("id: " + self.id()) input_tokens = [None, "zero", None, 'the', 'dog'] result_probabilities = {'runs': 1} self.assertEqual(self.lm.p_next(input_tokens), result_probabilities) def test_sample(self): print("id: " + self.id()) input_probability_distribution = {'heads': 0.5, 'tails': 0.5} predicted_word = self.lm.sample(input_probability_distribution)[0] self.assertIn(predicted_word, input_probability_distribution)
def test_p_next_sums_to_one(): lm = LanguageModel(3) data = open_file('kn_test.txt') lm.train(data) assert sum(lm.p_next(['this', 'text']).values()) == 1