def test_frequency_for_specific_word(self, sample: tuple[str, dict[str,
                                                                    int]]):
     """calculate_frequency_for_word should return the actual count."""
     sample_text, word_counts = sample
     target_word, frequency = word_counts.popitem() if word_counts else (
         "word", 0)
     analyzer = WordFrequencyAnalyzer(sample_text)
     self.assertEqual(frequency,
                      analyzer.calculate_frequency_for_word(target_word))
Exemple #2
0
    def test_calculate_most_frequent_n_words(self):
        wfa = WordFrequencyAnalyzer()
        number = 3

        self.assertEqual(
            wfa.calculate_most_frequent_n_words(self.TEXT, number), [
                WordFrequency('the', 3),
                WordFrequency('a', 2),
                WordFrequency('favorite', 1)
            ])
    def test_n_most_common_words(self, sample: tuple[str, dict[str, int]],
                                 n: int):
        """calculate_most_frequent_n_words should return correctly sorted most frequent words."""
        sample_text, word_counts = sample
        most_common = [
            WordFrequency(word.lower(), frequency)
            for word, frequency in sorted(word_counts.items(),
                                          key=lambda item: (-item[1], item[0]))
            if frequency > 0
        ][:n]

        analyzer = WordFrequencyAnalyzer(sample_text)
        self.assertEqual(most_common,
                         analyzer.calculate_most_frequent_n_words(n))
 def test_word_frequencies_returns_correct_counts(self,
                                                  sample: tuple[str,
                                                                dict[str,
                                                                     int]]):
     """word_frequency should return the correct word counts."""
     sample_text, word_counts = sample
     analyzer = WordFrequencyAnalyzer(sample_text)
     self.assertDictEqual(word_counts, analyzer.word_frequencies)
Exemple #5
0
    def test_word_frequencies_gets_cached_for_subsequent_calls(self):
        """Accessing `word_frequency` attribute is costly and should be cached."""
        mock_text = Mock()
        mock_text.lower.return_value = "text"
        analyzer = WordFrequencyAnalyzer(mock_text)

        # Access the `word_frequencies` attribute 10 times and assert
        # the return value is correct each time.
        for _ in range(10):
            self.assertEqual({"text": 1}, analyzer.word_frequencies)

        # Assert that the function body only ran once by inspecting the
        # number of calls of the patched `self.text.lower`.
        mock_text.lower.assert_called_once()
Exemple #6
0
 def test_analyzer_has_insightful_repr(self):
     """A WordFrequencyAnalyzer should have an insightful repr."""
     test_cases = (
         ("", "<WordFrequencyAnalyzer text=''>"),
         ("one two three", "<WordFrequencyAnalyzer text='one two three'>"),
         ("01234567890123456789012345678901",
          "<WordFrequencyAnalyzer text='01234567890123456789012345678901'>"
          ),
         ("012345678901234567890123456789012345678901234567890123456789",
          "<WordFrequencyAnalyzer text='01234567890123456789012345678...'>"
          ),
     )
     for text, expected_representation in test_cases:
         instance = WordFrequencyAnalyzer(text)
         with self.subTest(text=text,
                           expected_representation=expected_representation):
             self.assertEqual(expected_representation, repr(instance))
Exemple #7
0
    def test_word_frequencies_returns_the_correct_counts(self):
        """The word_frequencies method should return proper word counts."""
        test_cases = (
            ("", {}),
            (string.punctuation + string.whitespace, {}),
            ("word", {
                "word": 1
            }),
            ("Word", {
                "word": 1
            }),
            ("WORD", {
                "word": 1
            }),
            ("word WORD", {
                "word": 2
            }),
            ("A B C D E", {
                "a": 1,
                "b": 1,
                "c": 1,
                "d": 1,
                "e": 1
            }),
            ("a a a b b c", {
                "a": 3,
                "b": 2,
                "c": 1
            }),
            ("aaa aa a", {
                "aaa": 1,
                "aa": 1,
                "a": 1
            }),
            # Test case that includes all characters in [a-zA-Z] as "words"
            (" ".join(string.ascii_letters),
             dict(zip(string.ascii_lowercase, itertools.repeat(2)))))

        for text, word_counts in test_cases:
            with self.subTest(text=text, word_counts=word_counts):
                analyzer = WordFrequencyAnalyzer(text)
                self.assertEqual(word_counts, analyzer.word_frequencies)
Exemple #8
0
    def test_calculate_frequency_for_word(self):
        wfa = WordFrequencyAnalyzer()
        word = "The"

        self.assertEqual(wfa.calculate_frequency_for_word(self.TEXT, word), 3)
Exemple #9
0
    def test_calculate_highest_frequency(self):
        wfa = WordFrequencyAnalyzer()

        self.assertEqual(wfa.calculate_highest_frequency(self.TEXT), 3)
Exemple #10
0
class WordFrequencyTestCases(unittest.TestCase):
    """
    Manual test cases for the WordFrequencyAnalyzer.

    These tests supplement the test scenarios that are automatically
    generated by `hypothesis` in `test_word_frequency_invariants.py`.
    """
    def setUp(self):
        """Set up values that are commonly used within the test methods."""
        self.analyzer = WordFrequencyAnalyzer(Mock())

    def test_analyzer_has_insightful_repr(self):
        """A WordFrequencyAnalyzer should have an insightful repr."""
        test_cases = (
            ("", "<WordFrequencyAnalyzer text=''>"),
            ("one two three", "<WordFrequencyAnalyzer text='one two three'>"),
            ("01234567890123456789012345678901",
             "<WordFrequencyAnalyzer text='01234567890123456789012345678901'>"
             ),
            ("012345678901234567890123456789012345678901234567890123456789",
             "<WordFrequencyAnalyzer text='01234567890123456789012345678...'>"
             ),
        )
        for text, expected_representation in test_cases:
            instance = WordFrequencyAnalyzer(text)
            with self.subTest(text=text,
                              expected_representation=expected_representation):
                self.assertEqual(expected_representation, repr(instance))

    def test_word_frequencies_returns_the_correct_counts(self):
        """The word_frequencies method should return proper word counts."""
        test_cases = (
            ("", {}),
            (string.punctuation + string.whitespace, {}),
            ("word", {
                "word": 1
            }),
            ("Word", {
                "word": 1
            }),
            ("WORD", {
                "word": 1
            }),
            ("word WORD", {
                "word": 2
            }),
            ("A B C D E", {
                "a": 1,
                "b": 1,
                "c": 1,
                "d": 1,
                "e": 1
            }),
            ("a a a b b c", {
                "a": 3,
                "b": 2,
                "c": 1
            }),
            ("aaa aa a", {
                "aaa": 1,
                "aa": 1,
                "a": 1
            }),
            # Test case that includes all characters in [a-zA-Z] as "words"
            (" ".join(string.ascii_letters),
             dict(zip(string.ascii_lowercase, itertools.repeat(2)))))

        for text, word_counts in test_cases:
            with self.subTest(text=text, word_counts=word_counts):
                analyzer = WordFrequencyAnalyzer(text)
                self.assertEqual(word_counts, analyzer.word_frequencies)

    def test_word_frequencies_gets_cached_for_subsequent_calls(self):
        """Accessing `word_frequency` attribute is costly and should be cached."""
        mock_text = Mock()
        mock_text.lower.return_value = "text"
        analyzer = WordFrequencyAnalyzer(mock_text)

        # Access the `word_frequencies` attribute 10 times and assert
        # the return value is correct each time.
        for _ in range(10):
            self.assertEqual({"text": 1}, analyzer.word_frequencies)

        # Assert that the function body only ran once by inspecting the
        # number of calls of the patched `self.text.lower`.
        mock_text.lower.assert_called_once()

    def test_calculate_highest_frequency_returns_zero_without_words(self):
        """calculate_highest_frequency returns `0` for an empty Counter."""
        # Isolate this test from the `word_frequencies` property by
        # mocking the property.
        mocked_property = property(Mock(return_value=collections.Counter()))

        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            self.assertEqual(0, self.analyzer.calculate_highest_frequency())

    def test_calculate_highest_frequency_works_with_two_most_common_words(
            self):
        """Return the highest frequency regardless of how many many words have it."""
        # Mock the `word_frequencies` property to get a return value
        # regardless of whether the actual implementation works.
        high = 10
        word_counts = dict(zip(string.ascii_lowercase, itertools.repeat(high)))
        mocked_property = property(
            Mock(return_value=collections.Counter(word_counts)))

        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            self.assertEqual(high, self.analyzer.calculate_highest_frequency())

    def test_calculate_frequency_for_word_returns_zero_for_non_existing_word(
            self):
        """Words not in the text have a frequency of `0`."""
        word_counts = {"one": 10, "two": 20, "three": 30}
        mocked_property = property(
            Mock(return_value=collections.Counter(word_counts)))
        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            self.assertEqual(
                0, self.analyzer.calculate_frequency_for_word("unknownword"))

    def test_calculate_frequency_for_word_ignores_case(self):
        """The frequency look-up for a word should be case-insensitive."""
        word_counts = {"one": 10, "two": 20, "three": 30}
        mocked_property = property(
            Mock(return_value=collections.Counter(word_counts)))
        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            for word, count in word_counts.items():
                self.assertEqual(
                    count,
                    self.analyzer.calculate_frequency_for_word(word.upper()))

    def test_calculate_most_frequent_n_words_raises_ValueError_for_negative_n(
            self):
        """A negative number of words is not a valid number."""
        with self.assertRaises(ValueError):
            self.analyzer.calculate_most_frequent_n_words(-1)

    def test_calculate_most_frequent_n_words_returns_list_of_WordFrequency_instances(
            self):
        """This method should return WordFrequency instances in the right order."""
        word_counts = {"one": 30, "two": 20, "three": 10}
        mocked_property = property(
            Mock(return_value=collections.Counter(word_counts)))
        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            most_common = self.analyzer.calculate_most_frequent_n_words(3)

        for word_frequency, (word, frequency) in zip(most_common,
                                                     word_counts.items()):
            self.assertIsInstance(word_frequency, WordFrequency)
            self.assertEqual(word, word_frequency.word)
            self.assertEqual(frequency, word_frequency.frequency)

    def test_calculate_most_frequent_n_words_alphabetic_order_for_equal_frequencies(
            self):
        """Words with equal frequencies should be returned in alphabetic order."""
        words = ["ddd", "aaa", "ccc", "eee", "xxx", "yyy", "zzz", "bbb"]
        word_counts = dict(zip(words, itertools.repeat(30)))
        mocked_property = property(
            Mock(return_value=collections.Counter(word_counts)))
        with patch.object(WordFrequencyAnalyzer,
                          "word_frequencies",
                          new=mocked_property):
            most_common = self.analyzer.calculate_most_frequent_n_words(3)

        self.assertEqual(["aaa", "bbb", "ccc"], [e.word for e in most_common])
Exemple #11
0
 def setUp(self):
     """Set up values that are commonly used within the test methods."""
     self.analyzer = WordFrequencyAnalyzer(Mock())
 def test_most_common_word_frequency(self, sample: tuple[str, int]):
     """word_frequency should return the most common word frequency."""
     sample_text, frequency_most_common = sample
     analyzer = WordFrequencyAnalyzer(sample_text)
     self.assertEqual(frequency_most_common,
                      analyzer.calculate_highest_frequency())
 def test_word_frequencies_accepts_valid_utf8(self, sample_text: str):
     """word_frequencies should return Counter for valid utf-8 text."""
     analyzer = WordFrequencyAnalyzer(sample_text)
     self.assertIsInstance(analyzer.word_frequencies, collections.Counter)