def test_empty(self): job = MRSparkWordcount([]) job.sandbox() with job.make_runner() as runner: runner.run() self.assertEqual(sorted(to_lines(runner.cat_output())), [])
def test_spark_mrjob(self): text = b'one fish\ntwo fish\nred fish\nblue fish\n' job = MRSparkWordcount(['-r', 'inline']) job.sandbox(stdin=BytesIO(text)) counts = {} with job.make_runner() as runner: runner.run() for line in to_lines(runner.cat_output()): k, v = safeeval(line) counts[k] = v self.assertEqual(counts, dict(blue=1, fish=4, one=1, red=1, two=1))
def test_spark_mrjob(self): text = b'one fish\ntwo fish\nred fish\nblue fish\n' job = MRSparkWordcount(['-r', 'inline']) job.sandbox(stdin=BytesIO(text)) counts = {} with job.make_runner() as runner: runner.run() for line in to_lines(runner.cat_output()): k, v = safeeval(line) counts[k] = v self.assertEqual(counts, dict( blue=1, fish=4, one=1, red=1, two=1))
def test_count_words(self): job = MRSparkWordcount([]) job.sandbox( stdin=BytesIO(b'Mary had a little lamb\nlittle lamb\nlittle lamb')) with job.make_runner() as runner: runner.run() output = sorted( safeeval(line) for line in to_lines(runner.cat_output())) self.assertEqual(output, [ ('a', 1), ('had', 1), ('lamb', 3), ('little', 3), ('mary', 1), ])