def test_no_jar_steps(self): jar_path = self.makefile('dora.jar') job = MRJustAJar(['-r', 'local', '--jar', jar_path]) job.sandbox() self.assertRaises(NotImplementedError, job.make_runner)
def test_local_jar(self): fake_jar = os.path.join(self.tmp_dir, "fake.jar") open(fake_jar, "w").close() job = MRJustAJar(["-r", "hadoop", "--jar", fake_jar]) job.sandbox() with job.make_runner() as runner: runner.run() with open(os.environ["MOCK_HADOOP_LOG"]) as hadoop_log: hadoop_jar_lines = [line for line in hadoop_log if line.startswith("jar ")] self.assertEqual(len(hadoop_jar_lines), 1) self.assertEqual(hadoop_jar_lines[0].rstrip(), "jar " + fake_jar)
def test_local_jar(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() job = MRJustAJar(['-r', 'hadoop', '--jar', fake_jar]) job.sandbox() with job.make_runner() as runner: runner.run() with open(os.environ['MOCK_HADOOP_LOG']) as hadoop_log: hadoop_jar_lines = [line for line in hadoop_log if line.startswith('jar ')] self.assertEqual(len(hadoop_jar_lines), 1) self.assertEqual(hadoop_jar_lines[0].rstrip(), 'jar ' + fake_jar)
def test_local_jar(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() job = MRJustAJar(['-r', 'hadoop', '--jar', fake_jar]) job.sandbox() with job.make_runner() as runner: runner.run() hadoop_cmd_args = get_mock_hadoop_cmd_args() hadoop_jar_cmd_args = [args for args in hadoop_cmd_args if args and args[0] == 'jar'] self.assertEqual(len(hadoop_jar_cmd_args), 1) self.assertEqual(hadoop_jar_cmd_args[0], ['jar', fake_jar])
def test_jar_step_without_mr_job_script(self): jar_path = self.makefile('dora.jar') steps = MRJustAJar(['--jar', jar_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack')) runner.run() runner.cleanup()
def test_hdfs_jar_uri(self): # this could change, but for now, we pass URIs straight through mock_hdfs_jar = os.path.join(os.environ["MOCK_HDFS_ROOT"], "fake.jar") open(mock_hdfs_jar, "w").close() jar_uri = "hdfs:///fake.jar" job = MRJustAJar(["-r", "hadoop", "--jar", jar_uri]) job.sandbox() with job.make_runner() as runner: with logger_disabled("mrjob.hadoop"): # `hadoop jar` doesn't actually accept URIs self.assertRaises(CalledProcessError, runner.run) with open(os.environ["MOCK_HADOOP_LOG"]) as hadoop_log: hadoop_jar_lines = [line for line in hadoop_log if line.startswith("jar ")] self.assertEqual(len(hadoop_jar_lines), 1) self.assertEqual(hadoop_jar_lines[0].rstrip(), "jar " + jar_uri)
def test_hdfs_jar_uri(self): # this could change, but for now, we pass URIs straight through mock_hdfs_jar = os.path.join(os.environ['MOCK_HDFS_ROOT'], 'fake.jar') open(mock_hdfs_jar, 'w').close() jar_uri = 'hdfs:///fake.jar' job = MRJustAJar(['-r', 'hadoop', '--jar', jar_uri]) job.sandbox() with job.make_runner() as runner: with logger_disabled('mrjob.hadoop'): # `hadoop jar` doesn't actually accept URIs self.assertRaises(CalledProcessError, runner.run) with open(os.environ['MOCK_HADOOP_LOG']) as hadoop_log: hadoop_jar_lines = [ line for line in hadoop_log if line.startswith('jar ')] self.assertEqual(len(hadoop_jar_lines), 1) self.assertEqual(hadoop_jar_lines[0].rstrip(), 'jar ' + jar_uri)
def test_hdfs_jar_uri(self): # this could change, but for now, we pass URIs straight through mock_hdfs_jar = os.path.join(os.environ['MOCK_HDFS_ROOT'], 'fake.jar') open(mock_hdfs_jar, 'w').close() jar_uri = 'hdfs:///fake.jar' job = MRJustAJar(['-r', 'hadoop', '--jar', jar_uri]) job.sandbox() with job.make_runner() as runner: with logger_disabled('mrjob.hadoop'): # `hadoop jar` doesn't actually accept URIs self.assertRaises(CalledProcessError, runner.run) with open(os.environ['MOCK_HADOOP_LOG']) as hadoop_log: hadoop_jar_lines = [line for line in hadoop_log if line.startswith('jar ')] self.assertEqual(len(hadoop_jar_lines), 1) self.assertEqual(hadoop_jar_lines[0].rstrip(), 'jar ' + jar_uri)
def test_hdfs_jar_uri(self): # this could change, but for now, we pass URIs straight through mock_hdfs_jar = os.path.join(get_mock_hdfs_root(), 'fake.jar') open(mock_hdfs_jar, 'w').close() jar_uri = 'hdfs:///fake.jar' job = MRJustAJar(['-r', 'hadoop', '--jar', jar_uri]) job.sandbox() with job.make_runner() as runner: with logger_disabled('mrjob.hadoop'): # `hadoop jar` doesn't actually accept URIs self.assertRaises(StepFailedException, runner.run) hadoop_cmd_args = get_mock_hadoop_cmd_args() hadoop_jar_cmd_args = [args for args in hadoop_cmd_args if args and args[0] == 'jar'] self.assertEqual(len(hadoop_jar_cmd_args), 1) self.assertEqual(hadoop_jar_cmd_args[0], ['jar', jar_uri])