def test_entries(self): dictogram = Dictogram(self.fish_words) # Verify histogram as dictionary of entries like {word: count} assert len(dictogram) == 5 self.assertCountEqual(dictogram, self.fish_dict) # Ignore item order # Verify histogram as list of entries like [(word, count)] listogram = dictogram.items() assert len(listogram) == 5 self.assertCountEqual(listogram, self.fish_list) # Ignore item order
def test_entries(): fish_words = ['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish'] fish_list = [('one', 1), ('fish', 4), ('two', 1), ('red', 1), ('blue', 1)] fish_dict = {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1} case = unittest.TestCase() dictogram = Dictogram(fish_words) # Verify histogram as dictionary of entries like {word: count} assert len(dictogram) == 5 case.assertCountEqual(dictogram, fish_dict) # Ignore item order # Verify histogram as list of entries like [(word, count)] listogram = dictogram.items() assert len(listogram) == 5 case.assertCountEqual(listogram, fish_list) # Ignore item order
def test_sample(self): histogram = Dictogram(self.fish_words) # Create a list of 10,000 word samples from histogram samples_list = [histogram.sample() for _ in range(10000)] # Create a histogram to count frequency of each word samples_hist = Dictogram(samples_list) # Check each word in original histogram for word, count in histogram.items(): # Calculate word's observed frequency observed_freq = count / histogram.tokens # Calculate word's sampled frequency samples = samples_hist.frequency(word) sampled_freq = samples / samples_hist.tokens # Verify word's sampled frequency is close to observed frequency lower_bound = observed_freq * 0.9 # 10% below = 90% = 0.9 upper_bound = observed_freq * 1.1 # 10% above = 110% = 1.1 assert lower_bound <= sampled_freq <= upper_bound