def test_frequency(): dictogram = Dictogram(fish_words) # Verify frequency count of all words assert dictogram.frequency('one') == 1 assert dictogram.frequency('two') == 1 assert dictogram.frequency('red') == 1 assert dictogram.frequency('blue') == 1 assert dictogram.frequency('fish') == 4
def test_frequency(self): histogram = Dictogram(self.fish_words) # Verify frequency count of all words assert histogram.frequency('one') == 1 assert histogram.frequency('two') == 1 assert histogram.frequency('red') == 1 assert histogram.frequency('blue') == 1 assert histogram.frequency('fish') == 4 # Verify frequency count of unseen words assert histogram.frequency('food') == 0
def test_frequency(): fish_words = ['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish'] histogram = Dictogram(fish_words) # Verify frequency count of all words assert histogram.frequency('one') == 1 assert histogram.frequency('two') == 1 assert histogram.frequency('red') == 1 assert histogram.frequency('blue') == 1 assert histogram.frequency('fish') == 4 # Verify frequency count of unseen words assert histogram.frequency('food') == 0
def test_add_count(self): histogram = Dictogram(self.fish_words) # Add more words to update frequency counts histogram.add_count('two', 2) histogram.add_count('blue', 3) histogram.add_count('fish', 4) histogram.add_count('food', 5) # Verify updated frequency count of all words assert histogram.frequency('one') == 1 assert histogram.frequency('two') == 3 assert histogram.frequency('red') == 1 assert histogram.frequency('blue') == 4 assert histogram.frequency('fish') == 8 assert histogram.frequency('food') == 5 # Verify count of distinct word types assert histogram.types == 6 # Verify total count of all word tokens assert histogram.tokens == 8 + 14
def test_sample(): dictogram = Dictogram(fish_words) # Create a list of 10,000 word samples from histogram samples_list = [dictogram.sample() for _ in range(10000)] # Create a histogram to count frequency of each word samples_hist = Dictogram(samples_list) # Check each word in original histogram for word, count in dictogram.dictionary_histogram.items(): # Calculate word's observed frequency observed_freq = count / dictogram.tokens # Calculate word's sampled frequency samples = samples_hist.frequency(word) sampled_freq = samples / samples_hist.tokens # Verify word's sampled frequency is close to observed frequency lower_bound = observed_freq * 0.9 # 10% below = 90% = 0.9 upper_bound = observed_freq * 1.1 # 10% above = 110% = 1.1 assert lower_bound <= sampled_freq <= upper_bound