def test_init(self): result = file_indexer.Histogram() self.assertIsInstance(result._words, collections.defaultdict) self.assertEqual(result._words, {}) self.assertEqual(result._words['spam'], 0) # test the lambda self.assertEqual(result._total, 0)
def test_dunder_add_base(self): obj1 = file_indexer.Histogram() obj1._words.update({ 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) obj1._total = 150 obj2 = file_indexer.Histogram() obj2._words.update({ 'word2': 20, 'word4': 50, 'word6': 80, }) obj2._total = 150 result = obj1 + obj2 self.assertEqual( result._words, { 'word1': 50, 'word2': 60, 'word3': 30, 'word4': 70, 'word5': 10, 'word6': 80, }) self.assertEqual(result._total, 300) self.assertEqual(obj1._words, { 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) self.assertEqual(obj1._total, 150) self.assertEqual(obj2._words, { 'word2': 20, 'word4': 50, 'word6': 80, }) self.assertEqual(obj2._total, 150)
def test_distinct_words(self): obj = file_indexer.Histogram() obj._words.update({ 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) obj._total = 150 self.assertEqual(obj.distinct_words, 5)
def make_results(self, file_cnt=5): result = [file_indexer.Histogram() for i in range(file_cnt)] for hist in result: hist._words.update({ 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) hist._total = 150 return result
def test_add(self): obj = file_indexer.Histogram() obj.add('SpAm') obj.add('sPaM') obj.add('spam') obj.add('spammER') self.assertEqual(obj._words, { 'spam': 3, 'spammer': 1, }) self.assertEqual(obj._total, 4)
def test_negative_workers(self, mock_from_file, mock_Pool, mock_cpu_count): files = ['file%d' % i for i in range(5)] out = six.StringIO() mock_Pool.return_value.map.return_value = self.make_results() mock_from_file.side_effect = lambda x: file_indexer.Histogram() self.assertRaises(ValueError, file_indexer.main, files, out, workers=-1) self.assertFalse(mock_cpu_count.called) self.assertFalse(mock_Pool.called) self.assertFalse(mock_from_file.called)
def test_top_words_small_count(self): obj = file_indexer.Histogram() obj._words.update({ 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) obj._total = 150 result = obj.top_words(4) self.assertEqual(result, [ ('word1', 50), ('word2', 40), ('word3', 30), ('word4', 20), ])
def test_top_words_too_few(self): obj = file_indexer.Histogram() obj._words.update({ 'word1': 50, 'word2': 40, 'word3': 30, 'word4': 20, 'word5': 10, }) obj._total = 150 result = obj.top_words() # default word count is 10 self.assertEqual(result, [ ('word1', 50), ('word2', 40), ('word3', 30), ('word4', 20), ('word5', 10), ])
def test_top_words_small_count_with_tie(self): obj = file_indexer.Histogram() obj._words.update({ 'word1': 50, 'word2': 40, 'word3': 40, 'word4': 40, 'word5': 10, }) obj._total = 150 result = obj.top_words(3) self.assertEqual( result, [ ('word1', 50), ('word4', 40), # words will be lexicographically sorted ('word3', 40), ('word2', 40), ])
def test_workers_specified(self, mock_from_file, mock_Pool, mock_cpu_count): files = ['file%d' % i for i in range(5)] out = six.StringIO() mock_Pool.return_value.map.return_value = self.make_results() mock_from_file.side_effect = lambda x: file_indexer.Histogram() file_indexer.main(files, out, workers=7) self.assertEqual( out.getvalue(), 'Total number of words: 750\n' 'Total distinct words: 5\n' '\n' 'Top 5 word(s):\n' ' word1: 250\n' ' word2: 200\n' ' word3: 150\n' ' word4: 100\n' ' word5: 50\n') self.assertFalse(mock_cpu_count.called) mock_Pool.assert_called_once_with(7) mock_Pool.return_value.map.assert_called_once_with( file_indexer.histogram_from_file, files) self.assertFalse(mock_from_file.called)
def test_iadd_bad_other(self): obj = file_indexer.Histogram() result = obj.__iadd__('other') self.assertEqual(result, NotImplemented)