def test_empty(self):
        job = MRSparkWordcount([])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(to_lines(runner.cat_output())), [])
Beispiel #2
0
    def test_spark_mrjob(self):
        text = b'one fish\ntwo fish\nred fish\nblue fish\n'

        job = MRSparkWordcount(['-r', 'inline'])
        job.sandbox(stdin=BytesIO(text))

        counts = {}

        with job.make_runner() as runner:
            runner.run()

            for line in to_lines(runner.cat_output()):
                k, v = safeeval(line)
                counts[k] = v

        self.assertEqual(counts, dict(blue=1, fish=4, one=1, red=1, two=1))
Beispiel #3
0
    def test_spark_mrjob(self):
        text = b'one fish\ntwo fish\nred fish\nblue fish\n'

        job = MRSparkWordcount(['-r', 'inline'])
        job.sandbox(stdin=BytesIO(text))

        counts = {}

        with job.make_runner() as runner:
            runner.run()

            for line in to_lines(runner.cat_output()):
                k, v = safeeval(line)
                counts[k] = v

        self.assertEqual(counts, dict(
            blue=1, fish=4, one=1, red=1, two=1))
    def test_count_words(self):
        job = MRSparkWordcount([])
        job.sandbox(
            stdin=BytesIO(b'Mary had a little lamb\nlittle lamb\nlittle lamb'))

        with job.make_runner() as runner:
            runner.run()

            output = sorted(
                safeeval(line) for line in to_lines(runner.cat_output()))

            self.assertEqual(output, [
                ('a', 1),
                ('had', 1),
                ('lamb', 3),
                ('little', 3),
                ('mary', 1),
            ])