Ejemplo n.º 1
0
    def test_group_processing(self):
        FILES = ['test_data/a_modest_proposal.txt',
                 'test_data/metamorphosis.txt',
                 'test_data/leviathan.txt']
        texts = [open(name, 'r').read() for name in FILES]
        top10 = json.loads(open('test_data/combined.json', 'r').read())

        result = group(word_count.s(text) for text in texts).apply_async()
        self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
Ejemplo n.º 2
0
    def test_group_processing(self):
        FILES = [
            'test_data/a_modest_proposal.txt', 'test_data/metamorphosis.txt',
            'test_data/leviathan.txt'
        ]
        texts = [open(name, 'r').read() for name in FILES]
        top10 = json.loads(open('test_data/combined.json', 'r').read())

        result = group(word_count.s(text) for text in texts).apply_async()
        self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
Ejemplo n.º 3
0
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import

from celery import group
import sys
from worker import word_count
from collector import reduce_word_count, top_ten

if len(sys.argv) == 1:
    print(
        "Simple distributed file indexer: counts top 10 words in files in\n"
        "provided directories.\n"
        "Usage: python scheduler.py FILE1 FILE2 ...")
else:
    texts = [open(name, 'r').read() for name in sys.argv[1:]]
    result = group(word_count.s(text) for text in texts).apply_async()
    print top_ten(reduce_word_count(result.get()))
Ejemplo n.º 4
0
    def test_AModestProposal(self):
        word_map = Counter(
            json.load(open('test_data/a_modest_proposal.json', 'r')))
        top10 = json.load(open('test_data/a_modest_proposal_top_10.json', 'r'))

        self.assertEqual(top_ten(word_map), top10)
Ejemplo n.º 5
0
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import

from celery import group
import sys
from worker import word_count
from collector import reduce_word_count, top_ten


if len(sys.argv) == 1:
    print (
        "Simple distributed file indexer: counts top 10 words in files in\n"
        "provided directories.\n"
        "Usage: python scheduler.py FILE1 FILE2 ..."
    )
else:
    texts = [open(name, "r").read() for name in sys.argv[1:]]
    result = group(word_count.s(text) for text in texts).apply_async()
    print top_ten(reduce_word_count(result.get()))
Ejemplo n.º 6
0
 def test_single_dict(self):
     self.assertEqual(top_ten(Counter({'word': 1})), {'word': 1})
Ejemplo n.º 7
0
 def test_empty(self):
     self.assertEqual(top_ten(Counter({})), {})
Ejemplo n.º 8
0
    def test_AModestProposal(self):
        word_map = Counter(json.load(
            open('test_data/a_modest_proposal.json', 'r')))
        top10 = json.load(open('test_data/a_modest_proposal_top_10.json', 'r'))

        self.assertEqual(top_ten(word_map), top10)
Ejemplo n.º 9
0
 def test_single_dict(self):
     self.assertEqual(top_ten(Counter({'word': 1})), {'word': 1})
Ejemplo n.º 10
0
 def test_empty(self):
     self.assertEqual(top_ten(Counter({})), {})