예제 #1
0
 def test_most_frequent_content_words_overlap(self):
     """Content words overlap 60% with the example solution."""
     example_content_words = {
         'Elton', 'Emma', 'Harriet', 'Jane', 'Knightley', 'Miss', 'Mr.',
         'Mrs.', 'Weston', 'Woodhouse', 'could', 'every', 'good', 'know',
         'little', 'might', 'much', 'must', 'never', 'one', 'said', 'say',
         'thing', 'think', 'would'}
     content_words = [w[0] for w in most_frequent_content_words(self.emma)]
     overlap = len(example_content_words.intersection(set(content_words)))
     self.assertTrue(overlap > 15)
예제 #2
0
 def test_most_frequent_content_words_minimal_frequency(self):
     """Least frequent content word occurs at least 100 times."""
     frequencies = [w[1] for w in most_frequent_content_words(self.emma)]
     self.assertTrue(min(frequencies) > 100)