def test_group_processing(self): FILES = ['test_data/a_modest_proposal.txt', 'test_data/metamorphosis.txt', 'test_data/leviathan.txt'] texts = [open(name, 'r').read() for name in FILES] top10 = json.loads(open('test_data/combined.json', 'r').read()) result = group(word_count.s(text) for text in texts).apply_async() self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
def test_group_processing(self): FILES = [ 'test_data/a_modest_proposal.txt', 'test_data/metamorphosis.txt', 'test_data/leviathan.txt' ] texts = [open(name, 'r').read() for name in FILES] top10 = json.loads(open('test_data/combined.json', 'r').read()) result = group(word_count.s(text) for text in texts).apply_async() self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import from celery import group import sys from worker import word_count from collector import reduce_word_count, top_ten if len(sys.argv) == 1: print( "Simple distributed file indexer: counts top 10 words in files in\n" "provided directories.\n" "Usage: python scheduler.py FILE1 FILE2 ...") else: texts = [open(name, 'r').read() for name in sys.argv[1:]] result = group(word_count.s(text) for text in texts).apply_async() print top_ten(reduce_word_count(result.get()))
def test_AModestProposal(self): word_map = Counter( json.load(open('test_data/a_modest_proposal.json', 'r'))) top10 = json.load(open('test_data/a_modest_proposal_top_10.json', 'r')) self.assertEqual(top_ten(word_map), top10)
# modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import from celery import group import sys from worker import word_count from collector import reduce_word_count, top_ten if len(sys.argv) == 1: print ( "Simple distributed file indexer: counts top 10 words in files in\n" "provided directories.\n" "Usage: python scheduler.py FILE1 FILE2 ..." ) else: texts = [open(name, "r").read() for name in sys.argv[1:]] result = group(word_count.s(text) for text in texts).apply_async() print top_ten(reduce_word_count(result.get()))
def test_single_dict(self): self.assertEqual(top_ten(Counter({'word': 1})), {'word': 1})
def test_empty(self): self.assertEqual(top_ten(Counter({})), {})
def test_AModestProposal(self): word_map = Counter(json.load( open('test_data/a_modest_proposal.json', 'r'))) top10 = json.load(open('test_data/a_modest_proposal_top_10.json', 'r')) self.assertEqual(top_ten(word_map), top10)