Beispiel #1
0
def test_frequency():
    dictogram = Dictogram(fish_words)
    # Verify frequency count of all words
    assert dictogram.frequency('one') == 1
    assert dictogram.frequency('two') == 1
    assert dictogram.frequency('red') == 1
    assert dictogram.frequency('blue') == 1
    assert dictogram.frequency('fish') == 4
 def test_frequency(self):
     histogram = Dictogram(self.fish_words)
     # Verify frequency count of all words
     assert histogram.frequency('one') == 1
     assert histogram.frequency('two') == 1
     assert histogram.frequency('red') == 1
     assert histogram.frequency('blue') == 1
     assert histogram.frequency('fish') == 4
     # Verify frequency count of unseen words
     assert histogram.frequency('food') == 0
Beispiel #3
0
def test_frequency():
    fish_words = ['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish']

    histogram = Dictogram(fish_words)
    # Verify frequency count of all words
    assert histogram.frequency('one') == 1
    assert histogram.frequency('two') == 1
    assert histogram.frequency('red') == 1
    assert histogram.frequency('blue') == 1
    assert histogram.frequency('fish') == 4
    # Verify frequency count of unseen words
    assert histogram.frequency('food') == 0
 def test_add_count(self):
     histogram = Dictogram(self.fish_words)
     # Add more words to update frequency counts
     histogram.add_count('two', 2)
     histogram.add_count('blue', 3)
     histogram.add_count('fish', 4)
     histogram.add_count('food', 5)
     # Verify updated frequency count of all words
     assert histogram.frequency('one') == 1
     assert histogram.frequency('two') == 3
     assert histogram.frequency('red') == 1
     assert histogram.frequency('blue') == 4
     assert histogram.frequency('fish') == 8
     assert histogram.frequency('food') == 5
     # Verify count of distinct word types
     assert histogram.types == 6
     # Verify total count of all word tokens
     assert histogram.tokens == 8 + 14
Beispiel #5
0
def test_sample():
    dictogram = Dictogram(fish_words)
    # Create a list of 10,000 word samples from histogram
    samples_list = [dictogram.sample() for _ in range(10000)]
    # Create a histogram to count frequency of each word
    samples_hist = Dictogram(samples_list)
    # Check each word in original histogram
    for word, count in dictogram.dictionary_histogram.items():
        # Calculate word's observed frequency
        observed_freq = count / dictogram.tokens
        # Calculate word's sampled frequency
        samples = samples_hist.frequency(word)
        sampled_freq = samples / samples_hist.tokens
        # Verify word's sampled frequency is close to observed frequency
        lower_bound = observed_freq * 0.9  # 10% below = 90% = 0.9
        upper_bound = observed_freq * 1.1  # 10% above = 110% = 1.1
        assert lower_bound <= sampled_freq <= upper_bound