def test_input_output_interpolation(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() input1 = os.path.join(self.tmp_dir, 'input1') open(input1, 'w').close() input2 = os.path.join(self.tmp_dir, 'input2') open(input2, 'w').close() job = MRJarAndStreaming( ['-r', 'hadoop', '--jar', fake_jar, input1, input2]) job.sandbox() add_mock_hadoop_output([b'']) # need this for streaming step with job.make_runner() as runner: runner.run() hadoop_cmd_args = get_mock_hadoop_cmd_args() hadoop_jar_cmd_args = [ args for args in hadoop_cmd_args if args and args[0] == 'jar' ] self.assertEqual(len(hadoop_jar_cmd_args), 2) jar_args, streaming_args = hadoop_jar_cmd_args self.assertEqual(len(jar_args), 5) self.assertEqual(jar_args[0], 'jar') self.assertEqual(jar_args[1], fake_jar) self.assertEqual(jar_args[2], 'stuff') # check input is interpolated input_arg = ','.join( runner._upload_mgr.uri(path) for path in (input1, input2)) self.assertEqual(jar_args[3], input_arg) # check output of jar is input of next step jar_output_arg = jar_args[4] streaming_input_arg = streaming_args[streaming_args.index('-input') + 1] self.assertEqual(jar_output_arg, streaming_input_arg)
def test_input_output_interpolation(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() input1 = os.path.join(self.tmp_dir, 'input1') open(input1, 'w').close() input2 = os.path.join(self.tmp_dir, 'input2') open(input2, 'w').close() job = MRJarAndStreaming( ['-r', 'hadoop', '--jar', fake_jar, input1, input2]) job.sandbox() add_mock_hadoop_output([b'']) # need this for streaming step with job.make_runner() as runner: runner.run() with open(os.environ['MOCK_HADOOP_LOG']) as hadoop_log: hadoop_jar_lines = [ line for line in hadoop_log if line.startswith('jar ') ] self.assertEqual(len(hadoop_jar_lines), 2) jar_args = hadoop_jar_lines[0].rstrip().split() streaming_args = hadoop_jar_lines[1].rstrip().split() self.assertEqual(len(jar_args), 5) self.assertEqual(jar_args[0], 'jar') self.assertEqual(jar_args[1], fake_jar) self.assertEqual(jar_args[2], 'stuff') # check input is interpolated input_arg = ','.join( runner._upload_mgr.uri(path) for path in (input1, input2)) self.assertEqual(jar_args[3], input_arg) # check output of jar is input of next step jar_output_arg = jar_args[4] streaming_input_arg = streaming_args[ streaming_args.index('-input') + 1] self.assertEqual(jar_output_arg, streaming_input_arg)
def test_input_output_interpolation(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() input1 = os.path.join(self.tmp_dir, 'input1') open(input1, 'w').close() input2 = os.path.join(self.tmp_dir, 'input2') open(input2, 'w').close() job = MRJarAndStreaming( ['-r', 'hadoop', '--jar', fake_jar, input1, input2]) job.sandbox() add_mock_hadoop_output([b'']) # need this for streaming step with job.make_runner() as runner: runner.run() hadoop_cmd_args = get_mock_hadoop_cmd_args() hadoop_jar_cmd_args = [args for args in hadoop_cmd_args if args and args[0] == 'jar'] self.assertEqual(len(hadoop_jar_cmd_args), 2) jar_args, streaming_args = hadoop_jar_cmd_args self.assertEqual(len(jar_args), 5) self.assertEqual(jar_args[0], 'jar') self.assertEqual(jar_args[1], fake_jar) self.assertEqual(jar_args[2], 'stuff') # check input is interpolated input_arg = ','.join( runner._upload_mgr.uri(path) for path in (input1, input2)) self.assertEqual(jar_args[3], input_arg) # check output of jar is input of next step jar_output_arg = jar_args[4] streaming_input_arg = streaming_args[ streaming_args.index('-input') + 1] self.assertEqual(jar_output_arg, streaming_input_arg)
def test_input_output_interpolation(self): fake_jar = os.path.join(self.tmp_dir, 'fake.jar') open(fake_jar, 'w').close() input1 = os.path.join(self.tmp_dir, 'input1') open(input1, 'w').close() input2 = os.path.join(self.tmp_dir, 'input2') open(input2, 'w').close() job = MRJarAndStreaming( ['-r', 'hadoop', '--jar', fake_jar, input1, input2]) job.sandbox() add_mock_hadoop_output(['']) # need this for streaming step with job.make_runner() as runner: runner.run() with open(os.environ['MOCK_HADOOP_LOG']) as hadoop_log: hadoop_jar_lines = [ line for line in hadoop_log if line.startswith('jar ')] self.assertEqual(len(hadoop_jar_lines), 2) jar_args = hadoop_jar_lines[0].rstrip().split() streaming_args = hadoop_jar_lines[1].rstrip().split() self.assertEqual(len(jar_args), 5) self.assertEqual(jar_args[0], 'jar') self.assertEqual(jar_args[1], fake_jar) self.assertEqual(jar_args[2], 'stuff') # check input is interpolated input_arg = ','.join( runner._upload_mgr.uri(path) for path in (input1, input2)) self.assertEqual(jar_args[3], input_arg) # check output of jar is input of next step jar_output_arg = jar_args[4] streaming_input_arg = streaming_args[ streaming_args.index('-input') + 1] self.assertEqual(jar_output_arg, streaming_input_arg)
def test_input_output_interpolation(self): fake_jar = os.path.join(self.tmp_dir, "fake.jar") open(fake_jar, "w").close() input1 = os.path.join(self.tmp_dir, "input1") open(input1, "w").close() input2 = os.path.join(self.tmp_dir, "input2") open(input2, "w").close() job = MRJarAndStreaming(["-r", "hadoop", "--jar", fake_jar, input1, input2]) job.sandbox() add_mock_hadoop_output([""]) # need this for streaming step with job.make_runner() as runner: runner.run() with open(os.environ["MOCK_HADOOP_LOG"]) as hadoop_log: hadoop_jar_lines = [line for line in hadoop_log if line.startswith("jar ")] self.assertEqual(len(hadoop_jar_lines), 2) jar_args = hadoop_jar_lines[0].rstrip().split() streaming_args = hadoop_jar_lines[1].rstrip().split() self.assertEqual(len(jar_args), 5) self.assertEqual(jar_args[0], "jar") self.assertEqual(jar_args[1], fake_jar) self.assertEqual(jar_args[2], "stuff") # check input is interpolated input_arg = ",".join(runner._upload_mgr.uri(path) for path in (input1, input2)) self.assertEqual(jar_args[3], input_arg) # check output of jar is input of next step jar_output_arg = jar_args[4] streaming_input_arg = streaming_args[streaming_args.index("-input") + 1] self.assertEqual(jar_output_arg, streaming_input_arg)