def test_no_sort_values(self): # don't sort values if not requested (#660) job = MRGroup(['-r', self.RUNNER]) job.sandbox(stdin=BytesIO(self._INPUT)) with job.make_runner() as runner: runner.run() output = list(job.parse_output(runner.cat_output())) self.assertEqual(sorted(output), [('a', ['alligator', 'actuary', 'artichoke']), ('b', ['bowling', 'balloon', 'baby'])])
def test_sort_in_memory_on_windows(self): self.start(patch('platform.system', return_value='Windows')) job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual(sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertFalse(self.check_call.called)
def test_no_sort_values(self): # don't sort values if not requested (#660) job = MRGroup(['-r', self.RUNNER]) job.sandbox(stdin=BytesIO(self._INPUT)) with job.make_runner() as runner: runner.run() output = list(job.parse_output(runner.cat_output())) self.assertEqual( sorted(output), [('a', ['alligator', 'actuary', 'artichoke']), ('b', ['bowling', 'balloon', 'baby'])])
def test_missing_sort_bin(self): # patching check_call to raise an exception causes pickling issues in # multiprocessing, so just use a binary that doesn't exist job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual(sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertTrue(self._sort_lines_in_memory.called)
def test_sort_in_memory_on_windows(self): self.start(patch('platform.system', return_value='Windows')) job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO( b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertFalse(self.check_call.called)
def test_missing_sort_bin(self): # patching check_call to raise an exception causes pickling issues in # multiprocessing, so just use a binary that doesn't exist job = MRGroup(['-r', 'local', '--sort-bin', 'bort-xslkjfsasdf']) job.sandbox(stdin=BytesIO( b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertTrue(self._sort_lines_in_memory.called)
def _test_environment_variables(self, *args): job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() # don't bother with output; already tested this above self.assertTrue(self.check_call.called) env = self.check_call.call_args[1]['env'] self.assertEqual(env['LC_ALL'], 'C') self.assertEqual(env['TMP'], runner._get_local_tmp_dir()) self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir()) self.assertNotIn('TEMP', env) # this was for Windows sort
def test_custom_sort_bin(self): job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r']) job.sandbox( stdin=BytesIO(b'apples\nbabies\nbuffaloes\nbears\nbicycles')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])]) self.assertTrue(self.check_call.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:2], ['sort', '-r'])
def test_default_sort_bin(self): job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual(sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertFalse(self._sort_lines_in_memory.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:6], ['sort', '-t', '\t', '-k', '1,1', '-s'])
def test_custom_sort_bin(self): job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r']) job.sandbox(stdin=BytesIO( b'apples\nbabies\nbuffaloes\nbears\nbicycles')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])]) self.assertTrue(self.check_call.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:2], ['sort', '-r'])
def _test_environment_variables(self, *args): job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO( b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() # don't bother with output; already tested this above self.assertTrue(self.check_call.called) env = self.check_call.call_args[1]['env'] self.assertEqual(env['LC_ALL'], 'C') self.assertEqual(env['TMP'], runner._get_local_tmp_dir()) self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir()) self.assertNotIn('TEMP', env) # this was for Windows sort
def test_empty_sort_bin_means_default(self): job = MRGroup(['-r', 'local', '--sort-bin', '']) job.sandbox(stdin=BytesIO( b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertFalse(self._sort_lines_in_memory.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:6], ['sort', '-t', '\t', '-k', '1,1', '-s'])