Example #1
0
    def test_no_sort_values(self):
        # don't sort values if not requested (#660)

        job = MRGroup(['-r', self.RUNNER])
        job.sandbox(stdin=BytesIO(self._INPUT))

        with job.make_runner() as runner:
            runner.run()
            output = list(job.parse_output(runner.cat_output()))

            self.assertEqual(
                sorted(output),
                [('a', ['alligator', 'actuary', 'artichoke']),
                 ('b', ['bowling', 'balloon', 'baby'])])
Example #2
0
    def test_sort_in_memory_on_windows(self):
        self.start(patch('platform.system', return_value='Windows'))

        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertFalse(self.check_call.called)
Example #3
0
    def test_missing_sort_bin(self):
        # patching check_call to raise an exception causes pickling issues in
        # multiprocessing, so just use a binary that doesn't exist
        job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertTrue(self._sort_lines_in_memory.called)
Example #4
0
    def _test_environment_variables(self, *args):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            # don't bother with output; already tested this above

            self.assertTrue(self.check_call.called)
            env = self.check_call.call_args[1]['env']

            self.assertEqual(env['LC_ALL'], 'C')
            self.assertEqual(env['TMP'], runner._get_local_tmp_dir())
            self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir())

            self.assertNotIn('TEMP', env)  # this was for Windows sort
Example #5
0
    def test_custom_sort_bin(self):
        job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbabies\nbuffaloes\nbears\nbicycles'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']),
                 ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])])

        self.assertTrue(self.check_call.called)
        sort_args = self.check_call.call_args[0][0]

        self.assertEqual(sort_args[:2], ['sort', '-r'])
Example #6
0
    def _test_environment_variables(self, *args):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            # don't bother with output; already tested this above

            self.assertTrue(self.check_call.called)
            env = self.check_call.call_args[1]['env']

            self.assertEqual(env['LC_ALL'], 'C')
            self.assertEqual(env['TMP'], runner._get_local_tmp_dir())
            self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir())

            self.assertNotIn('TEMP', env)  # this was for Windows sort
Example #7
0
    def test_empty_sort_bin_means_default(self):
        job = MRGroup(['-r', 'local', '--sort-bin', ''])
        job.sandbox(stdin=BytesIO(
            b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']), ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertFalse(self._sort_lines_in_memory.called)

        sort_args = self.check_call.call_args[0][0]
        self.assertEqual(sort_args[:6],
                         ['sort', '-t', '\t', '-k', '1,1', '-s'])
Example #8
0
    def test_no_sort_values(self):
        # don't sort values if not requested (#660)

        job = MRGroup(['-r', self.RUNNER])
        job.sandbox(stdin=BytesIO(self._INPUT))

        with job.make_runner() as runner:
            runner.run()
            output = list(job.parse_output(runner.cat_output()))

            self.assertEqual(sorted(output),
                             [('a', ['alligator', 'actuary', 'artichoke']),
                              ('b', ['bowling', 'balloon', 'baby'])])
Example #9
0
    def test_sort_in_memory_on_windows(self):
        self.start(patch('platform.system', return_value='Windows'))

        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertFalse(self.check_call.called)
Example #10
0
    def test_missing_sort_bin(self):
        # patching check_call to raise an exception causes pickling issues in
        # multiprocessing, so just use a binary that doesn't exist
        job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertTrue(self._sort_lines_in_memory.called)
Example #11
0
    def test_custom_sort_bin(self):
        job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r'])
        job.sandbox(
            stdin=BytesIO(b'apples\nbabies\nbuffaloes\nbears\nbicycles'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(job.parse_output(runner.cat_output())),
                [('a', ['apples']),
                 ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])])

        self.assertTrue(self.check_call.called)
        sort_args = self.check_call.call_args[0][0]

        self.assertEqual(sort_args[:2], ['sort', '-r'])
Example #12
0
    def test_default_sort_bin(self):
        job = MRGroup(['-r', 'local'])
        job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears'))

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(job.parse_output(runner.cat_output())),
                             [('a', ['apples']),
                              ('b', ['buffaloes', 'bears'])])

        self.assertTrue(self.check_call.called)
        self.assertFalse(self._sort_lines_in_memory.called)

        sort_args = self.check_call.call_args[0][0]
        self.assertEqual(sort_args[:6],
                         ['sort', '-t', '\t', '-k', '1,1', '-s'])