def test_frequency(): dictogram = EasyDictogram(fish_words) # Verify frequency count of all words assert dictogram.frequency('one') == 1 assert dictogram.frequency('two') == 1 assert dictogram.frequency('red') == 1 assert dictogram.frequency('blue') == 1 assert dictogram.frequency('fish') == 4
def test_contains(): dictogram = EasyDictogram(fish_words).dictionary_histogram # All of these words should be found for word in fish_words: assert word in dictogram # None of these words should be found for word in ('fishy', 'food'): assert word not in dictogram
def build_markov(self, word_list): markov_chain = {} for i in range(len(word_list) - 1): #get the current word and the word after current_word = word_list[i] next_word = word_list[i + 1] if current_word in markov_chain.keys(): #already there #get the histogram for that word in the chain histogram = markov_chain[current_word] #add to count histogram.dictionary_histogram[ next_word] = histogram.dictionary_histogram.get( next_word, 0) + 1 else: #first entry markov_chain[current_word] = EasyDictogram([next_word]) return markov_chain
def test_sample(): dictogram = EasyDictogram(fish_words) # Create a list of 10,000 word samples from histogram samples_list = [dictogram.sample() for _ in range(10000)] # Create a histogram to count frequency of each word samples_hist = EasyDictogram(samples_list) # Check each word in original histogram for word, count in dictogram.dictionary_histogram.items(): # Calculate word's observed frequency observed_freq = count / dictogram.tokens # Calculate word's sampled frequency samples = samples_hist.frequency(word) sampled_freq = samples / samples_hist.tokens # Verify word's sampled frequency is close to observed frequency lower_bound = observed_freq * 0.9 # 10% below = 90% = 0.9 upper_bound = observed_freq * 1.1 # 10% above = 110% = 1.1 assert lower_bound <= sampled_freq <= upper_bound
def test_entries(): dictogram = EasyDictogram(fish_words).dictionary_histogram # Verify histogram as dictionary of entries like {word: count} assert len(dictogram) == 5 assert len(dictogram) == len(fish_dict)
def test_types(): dictogram = EasyDictogram(fish_words) # Verify count of distinct word types assert len(set(fish_words)) == 5 assert dictogram.types == 5
def test_tokens(): dictogram = EasyDictogram(fish_words) # Verify total count of all word tokens assert len(fish_words) == 8 assert dictogram.tokens == 8