Example #1
0
class LanguageModelTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        print("\LanguageModelTests starts")
        print("==========")

    @classmethod
    def tearDownClass(cls):
        print("==========")
        print("LanguageModelTests has ended")

    def setUp(self):
        self.lm = LanguageModel(3)
        self.token_sequences = [['the', 'cat', 'runs'], ['the', 'dog', 'runs']]
        self.lm.train(self.token_sequences)

    def test_get_ngrams(self):
        print("id: " + self.id())
        self.lm.n = 4
        input_tokens = ['the', 'cat', 'in', 'the', 'hat']
        result_ngrams = [
            (None, None, None, 'the'), (None, None, 'the', 'cat'),
            (None, 'the', 'cat', 'in'), ('the', 'cat', 'in', 'the'),
            ('cat', 'in', 'the', 'hat'), ('in', 'the', 'hat', None),
            ('the', 'hat', None, None), ('hat', None, None, None)
        ]
        self.assertEqual(self.lm.get_ngrams(input_tokens), result_ngrams)

    def test_train_vocabulary_and_counts(self):
        print("id: " + self.id())
        self.assertEqual(self.lm.vocabulary,
                         {None, 'the', 'cat', 'runs', 'dog'})

        result_counts = {
            (None, None): {
                'the': 2
            },
            (None, 'the'): {
                'cat': 1,
                'dog': 1
            },
            ('the', 'cat'): {
                'runs': 1
            },
            ('cat', 'runs'): {
                None: 1
            },
            ('runs', None): {
                None: 2
            },
            ('the', 'dog'): {
                'runs': 1
            },
            ('dog', 'runs'): {
                None: 1
            }
        }
        self.assertEqual(self.lm.counts, result_counts)

    def test_normalize(self):
        print("id: " + self.id())
        input_words = {'cat': 1, 'dog': 1}
        result_probabilities = {'cat': 0.5, 'dog': 0.5}
        self.assertEqual(self.lm.normalize(input_words), result_probabilities)

    def test_normalize_sum_probabilies(self):
        print("id: " + self.id())
        input_words = {'cat': 1, 'dog': 1}
        probabilities = self.lm.normalize(input_words)

        prob_sum = 0
        for key in probabilities:
            prob_sum += probabilities[key]
        self.assertEqual(prob_sum, 1)

    def test_predict_next(self):
        print("id: " + self.id())
        input_tokens = [None, "zero", None, 'the', 'dog']
        result_probabilities = {'runs': 1}
        self.assertEqual(self.lm.p_next(input_tokens), result_probabilities)

    def test_sample(self):
        print("id: " + self.id())
        input_probability_distribution = {'heads': 0.5, 'tails': 0.5}
        predicted_word = self.lm.sample(input_probability_distribution)[0]
        self.assertIn(predicted_word, input_probability_distribution)
Example #2
0
def test_p_next_sums_to_one():
    lm = LanguageModel(3)
    data = open_file('kn_test.txt')
    lm.train(data)
    assert sum(lm.p_next(['this', 'text']).values()) == 1