def test_bad_setup_command(self): bar_path = os.path.join(self.tmp_dir, "bar") baz_path = os.path.join(self.tmp_dir, "baz") job = MROSWalkJob( [ "-r", "local", "--setup", "touch %s" % bar_path, "--setup", "false", # always "fails" "--setup", "touch %s" % baz_path, "--cleanup-on-failure=ALL", ] ) job.sandbox() with job.make_runner() as r: self.assertRaises(Exception, r.run) # first command got run but not third one self.assertTrue(os.path.exists(bar_path)) self.assertFalse(os.path.exists(baz_path))
def test_stdin_bypasses_wrapper_script(self): job = MROSWalkJob([ '-r', 'local', '--setup', 'cat > stdin.txt', ]) job.sandbox(stdin=BytesIO(b'some input\n')) # local mode doesn't currently pipe input into stdin # (see issue #567), so this test would hang if it failed def alarm_handler(*args, **kwargs): raise Exception('Setup script stalled on stdin') try: self._old_alarm_handler = signal.signal( signal.SIGALRM, alarm_handler) signal.alarm(10) with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get('./stdin.txt'), 0) # input gets passed through by identity mapper self.assertEqual(path_to_size.get(None), 'some input') finally: signal.alarm(0) signal.signal(signal.SIGALRM, self._old_alarm_handler)
def test_setup_command(self): job = MROSWalkJob(["-r", "local", "--setup", "touch bar"]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertIn("./bar", path_to_size)
def test_setup_script(self): job = MROSWalkJob(["-r", "local", "--setup", self.foo_sh + "#"]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get("./foo.sh"), self.foo_sh_size) self.assertIn("./foo.sh-made-this", path_to_size)
def test_file_upload(self): job = MROSWalkJob(["-r", "local", "--file", self.foo_sh, "--file", self.foo_sh + "#bar.sh"]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get("./foo.sh"), self.foo_sh_size) self.assertEqual(path_to_size.get("./bar.sh"), self.foo_sh_size)
def test_archive_upload(self): job = MROSWalkJob(["-r", "local", "--archive", self.foo_tar_gz, "--archive", self.foo_tar_gz + "#foo"]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get("./foo.tar.gz/foo.py"), self.foo_py_size) self.assertEqual(path_to_size.get("./foo/foo.py"), self.foo_py_size)
def test_python_archive(self): job = MROSWalkJob(["-r", "local", "--setup", "export PYTHONPATH=%s#/:$PYTHONPATH" % self.foo_tar_gz]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) # foo.py should be there, and getsize() should be patched to return # double the number of bytes self.assertEqual(path_to_size.get("./foo.tar.gz/foo.py"), self.foo_py_size * 2)
def test_setup_command(self): job = MROSWalkJob( ['-r', 'spark', '--spark-master', _LOCAL_CLUSTER_MASTER, '--setup', 'touch bar']) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output(r.cat_output())) self.assertIn('./bar', path_to_size)
def test_setup_command(self): job = MROSWalkJob( ['-r', 'local', '--setup', 'touch bar']) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertIn('./bar', path_to_size)
def test_setup_script(self): job = MROSWalkJob( ['-r', 'local', '--setup', self.foo_sh + '#']) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size) self.assertIn('./foo.sh-made-this', path_to_size)
def test_file_upload(self): job = MROSWalkJob(['-r', 'local', '--file', self.foo_sh, '--file', self.foo_sh + '#bar.sh', ]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size) self.assertEqual(path_to_size.get('./bar.sh'), self.foo_sh_size)
def test_deprecated_python_archive_option(self): job = MROSWalkJob( ['-r', 'local', '--python-archive', self.foo_tar_gz]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) # foo.py should be there, and getsize() should be patched to return # double the number of bytes self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'), self.foo_py_size * 2)
def test_wrapper_script_only_writes_to_stderr(self): job = MROSWalkJob(["-r", "local", "--setup", "echo stray output"]) job.sandbox() with no_handlers_for_logger("mrjob.local"): stderr = StringIO() log_to_stream("mrjob.local", stderr, debug=True) with job.make_runner() as r: r.run() output = b"".join(r.stream_output()) # stray ouput should be in stderr, not the job's output self.assertIn("stray output", stderr.getvalue()) self.assertNotIn(b"stray output", output)
def test_archive_upload(self): job = MROSWalkJob(['--runner=local', '--no-bootstrap-mrjob', '--archive', self.foo_tar_gz, '--archive', self.foo_tar_gz + '#foo', ]) job.sandbox() with job.make_runner() as r: r.run() path_to_size = dict(job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'), self.foo_py_size) self.assertEqual(path_to_size.get('./foo/foo.py'), self.foo_py_size)
def test_bad_setup_command(self): bar_path = os.path.join(self.tmp_dir, 'bar') baz_path = os.path.join(self.tmp_dir, 'baz') job = MROSWalkJob([ '-r', 'local', '--setup', 'touch %s' % bar_path, '--setup', 'false', # always "fails" '--setup', 'touch %s' % baz_path, ]) job.sandbox() with job.make_runner() as r: self.assertRaises(Exception, r.run) # first command got run but not third one self.assertTrue(os.path.exists(bar_path)) self.assertFalse(os.path.exists(baz_path))
def test_wrapper_script_only_writes_to_stderr(self): job = MROSWalkJob([ '-r', 'local', '--setup', 'echo stray output', ]) job.sandbox() with no_handlers_for_logger('mrjob.local'): stderr = StringIO() log_to_stream('mrjob.local', stderr, debug=True) with job.make_runner() as r: r.run() output = b''.join(r.stream_output()) # stray ouput should be in stderr, not the job's output self.assertIn('stray output', stderr.getvalue()) self.assertNotIn(b'stray output', output)
def test_wrapper_script_only_writes_to_stderr(self): job = MROSWalkJob([ '-r', 'local', '--setup', 'echo stray output', ]) job.sandbox() with no_handlers_for_logger('mrjob.local'): stderr = StringIO() log_to_stream('mrjob.local', stderr) with job.make_runner() as r: r.run() output = b''.join(r.stream_output()) # stray ouput should be in stderr, not the job's output self.assertIn('stray output', stderr.getvalue()) self.assertNotIn(b'stray output', output)
def test_archive_upload(self): job = MROSWalkJob([ '-r', 'local', '--archive', self.foo_tar_gz, '--archive', self.foo_tar_gz + '#foo', ]) job.sandbox() with job.make_runner() as r: with no_handlers_for_logger('mrjob.local'): r.run() path_to_size = dict( job.parse_output_line(line) for line in r.stream_output()) self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'), self.foo_py_size) self.assertEqual(path_to_size.get('./foo/foo.py'), self.foo_py_size)