Python MRGroup Examples, tests.mr_group.MRGroup Python Examples

Example #1

0

Show file

File: test_sim.py Project: okomestudio/mrjob

    def test_no_sort_values(self):
        # don't sort values if not requested (#660)

        job = MRGroup(['-r', self.RUNNER])
        job.sandbox(stdin=BytesIO(self._INPUT))

        with job.make_runner() as runner:
            runner.run()
            output = list(job.parse_output(runner.cat_output()))

            self.assertEqual(
                sorted(output),
                [('a', ['alligator', 'actuary', 'artichoke']),
                 ('b', ['bowling', 'balloon', 'baby'])])

Example #2

0

Show file

File: test_local.py Project: Affirm/mrjob

    def test_sort_in_memory_on_windows(self):
        self.start(patch('platform.system', return_value='Windows'))

        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertFalse(self.check_call.called)

Example #3

0

Show file

File: test_local.py Project: Affirm/mrjob

    def test_missing_sort_bin(self):
        # patching check_call to raise an exception causes pickling issues in
        # multiprocessing, so just use a binary that doesn't exist
        job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertTrue(self._sort_lines_in_memory.called)

Example #4

0

Show file

    def _test_environment_variables(self, *args):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            # don't bother with output; already tested this above

            self.assertTrue(self.check_call.called)
            env = self.check_call.call_args[1]['env']

            self.assertEqual(env['LC_ALL'], 'C')
            self.assertEqual(env['TMP'], runner._get_local_tmp_dir())
            self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir())

            self.assertNotIn('TEMP', env)  # this was for Windows sort

Example #5

0

Show file

File: test_local.py Project: Affirm/mrjob

    def test_custom_sort_bin(self):
        job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbabies\nbuffaloes\nbears\nbicycles'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']),
                 ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])])

        self.assertTrue(self.check_call.called)
        sort_args = self.check_call.call_args[0][0]

        self.assertEqual(sort_args[:2], ['sort', '-r'])

Example #6

0

Show file

File: test_local.py Project: Affirm/mrjob

    def _test_environment_variables(self, *args):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            # don't bother with output; already tested this above

            self.assertTrue(self.check_call.called)
            env = self.check_call.call_args[1]['env']

            self.assertEqual(env['LC_ALL'], 'C')
            self.assertEqual(env['TMP'], runner._get_local_tmp_dir())
            self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir())

            self.assertNotIn('TEMP', env)  # this was for Windows sort

Example #7

0

Show file

File: test_local.py Project: Affirm/mrjob

    def test_empty_sort_bin_means_default(self):
        job = MRGroup(['-r', 'local', '--sort-bin', ''])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertFalse(self._sort_lines_in_memory.called)

        sort_args = self.check_call.call_args[0][0]
        self.assertEqual(sort_args[:6],
                         ['sort', '-t', '\t', '-k', '1,1', '-s'])

Example #8

0

Show file

File: test_sim.py Project: Streaky75/mrjob

    def test_no_sort_values(self):
        # don't sort values if not requested (#660)

        job = MRGroup(['-r', self.RUNNER])
        job.sandbox(stdin=BytesIO(self._INPUT))

        with job.make_runner() as runner:
            runner.run()
            output = list(job.parse_output(runner.cat_output()))

            self.assertEqual(sorted(output),
                             [('a', ['alligator', 'actuary', 'artichoke']),
                              ('b', ['bowling', 'balloon', 'baby'])])

Example #9

0

Show file

    def test_sort_in_memory_on_windows(self):
        self.start(patch('platform.system', return_value='Windows'))

        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertFalse(self.check_call.called)

Example #10

0

Show file

    def test_missing_sort_bin(self):
        # patching check_call to raise an exception causes pickling issues in
        # multiprocessing, so just use a binary that doesn't exist
        job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertTrue(self._sort_lines_in_memory.called)

Example #11

0

Show file

    def test_custom_sort_bin(self):
        job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r'])
        job.sandbox(
            stdin=BytesIO(b'apples\nbabies\nbuffaloes\nbears\nbicycles'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']),
                 ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])])

        self.assertTrue(self.check_call.called)
        sort_args = self.check_call.call_args[0][0]

        self.assertEqual(sort_args[:2], ['sort', '-r'])

Example #12

0

Show file

    def test_default_sort_bin(self):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertFalse(self._sort_lines_in_memory.called)

        sort_args = self.check_call.call_args[0][0]
        self.assertEqual(sort_args[:6],
                         ['sort', '-t', '\t', '-k', '1,1', '-s'])