def test_init(self):
        result = file_indexer.Histogram()

        self.assertIsInstance(result._words, collections.defaultdict)
        self.assertEqual(result._words, {})
        self.assertEqual(result._words['spam'], 0)  # test the lambda
        self.assertEqual(result._total, 0)
    def test_dunder_add_base(self):
        obj1 = file_indexer.Histogram()
        obj1._words.update({
            'word1': 50,
            'word2': 40,
            'word3': 30,
            'word4': 20,
            'word5': 10,
        })
        obj1._total = 150
        obj2 = file_indexer.Histogram()
        obj2._words.update({
            'word2': 20,
            'word4': 50,
            'word6': 80,
        })
        obj2._total = 150

        result = obj1 + obj2

        self.assertEqual(
            result._words, {
                'word1': 50,
                'word2': 60,
                'word3': 30,
                'word4': 70,
                'word5': 10,
                'word6': 80,
            })
        self.assertEqual(result._total, 300)
        self.assertEqual(obj1._words, {
            'word1': 50,
            'word2': 40,
            'word3': 30,
            'word4': 20,
            'word5': 10,
        })
        self.assertEqual(obj1._total, 150)
        self.assertEqual(obj2._words, {
            'word2': 20,
            'word4': 50,
            'word6': 80,
        })
        self.assertEqual(obj2._total, 150)
    def test_distinct_words(self):
        obj = file_indexer.Histogram()
        obj._words.update({
            'word1': 50,
            'word2': 40,
            'word3': 30,
            'word4': 20,
            'word5': 10,
        })
        obj._total = 150

        self.assertEqual(obj.distinct_words, 5)
    def make_results(self, file_cnt=5):
        result = [file_indexer.Histogram() for i in range(file_cnt)]
        for hist in result:
            hist._words.update({
                'word1': 50,
                'word2': 40,
                'word3': 30,
                'word4': 20,
                'word5': 10,
            })
            hist._total = 150

        return result
    def test_add(self):
        obj = file_indexer.Histogram()

        obj.add('SpAm')
        obj.add('sPaM')
        obj.add('spam')
        obj.add('spammER')

        self.assertEqual(obj._words, {
            'spam': 3,
            'spammer': 1,
        })
        self.assertEqual(obj._total, 4)
    def test_negative_workers(self, mock_from_file, mock_Pool, mock_cpu_count):
        files = ['file%d' % i for i in range(5)]
        out = six.StringIO()
        mock_Pool.return_value.map.return_value = self.make_results()
        mock_from_file.side_effect = lambda x: file_indexer.Histogram()

        self.assertRaises(ValueError,
                          file_indexer.main,
                          files,
                          out,
                          workers=-1)
        self.assertFalse(mock_cpu_count.called)
        self.assertFalse(mock_Pool.called)
        self.assertFalse(mock_from_file.called)
    def test_top_words_small_count(self):
        obj = file_indexer.Histogram()
        obj._words.update({
            'word1': 50,
            'word2': 40,
            'word3': 30,
            'word4': 20,
            'word5': 10,
        })
        obj._total = 150

        result = obj.top_words(4)

        self.assertEqual(result, [
            ('word1', 50),
            ('word2', 40),
            ('word3', 30),
            ('word4', 20),
        ])
    def test_top_words_too_few(self):
        obj = file_indexer.Histogram()
        obj._words.update({
            'word1': 50,
            'word2': 40,
            'word3': 30,
            'word4': 20,
            'word5': 10,
        })
        obj._total = 150

        result = obj.top_words()  # default word count is 10

        self.assertEqual(result, [
            ('word1', 50),
            ('word2', 40),
            ('word3', 30),
            ('word4', 20),
            ('word5', 10),
        ])
    def test_top_words_small_count_with_tie(self):
        obj = file_indexer.Histogram()
        obj._words.update({
            'word1': 50,
            'word2': 40,
            'word3': 40,
            'word4': 40,
            'word5': 10,
        })
        obj._total = 150

        result = obj.top_words(3)

        self.assertEqual(
            result,
            [
                ('word1', 50),
                ('word4', 40),  # words will be lexicographically sorted
                ('word3', 40),
                ('word2', 40),
            ])
    def test_workers_specified(self, mock_from_file, mock_Pool,
                               mock_cpu_count):
        files = ['file%d' % i for i in range(5)]
        out = six.StringIO()
        mock_Pool.return_value.map.return_value = self.make_results()
        mock_from_file.side_effect = lambda x: file_indexer.Histogram()

        file_indexer.main(files, out, workers=7)

        self.assertEqual(
            out.getvalue(), 'Total number of words: 750\n'
            'Total distinct words: 5\n'
            '\n'
            'Top 5 word(s):\n'
            '    word1: 250\n'
            '    word2: 200\n'
            '    word3: 150\n'
            '    word4: 100\n'
            '    word5: 50\n')
        self.assertFalse(mock_cpu_count.called)
        mock_Pool.assert_called_once_with(7)
        mock_Pool.return_value.map.assert_called_once_with(
            file_indexer.histogram_from_file, files)
        self.assertFalse(mock_from_file.called)
    def test_iadd_bad_other(self):
        obj = file_indexer.Histogram()

        result = obj.__iadd__('other')

        self.assertEqual(result, NotImplemented)