def test_most_frequent_content_words_overlap(self): """Content words overlap 60% with the example solution.""" example_content_words = { 'Elton', 'Emma', 'Harriet', 'Jane', 'Knightley', 'Miss', 'Mr.', 'Mrs.', 'Weston', 'Woodhouse', 'could', 'every', 'good', 'know', 'little', 'might', 'much', 'must', 'never', 'one', 'said', 'say', 'thing', 'think', 'would'} content_words = [w[0] for w in most_frequent_content_words(self.emma)] overlap = len(example_content_words.intersection(set(content_words))) self.assertTrue(overlap > 15)
def test_most_frequent_content_words_minimal_frequency(self): """Least frequent content word occurs at least 100 times.""" frequencies = [w[1] for w in most_frequent_content_words(self.emma)] self.assertTrue(min(frequencies) > 100)