def test_spark_mr_job(self): job = MRNullSpark() job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._spark_script_args(0), [ '--step-num=0', '--spark', '<step 0 input>', '<step 0 output>' ])
def test_spark_passthrough_arg(self): job = MRNullSpark(['--extra-spark-arg=--verbose']) job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._spark_script_args(0), [ '--step-num=0', '--spark', '--extra-spark-arg=--verbose', '<step 0 input>', '<step 0 output>' ])
def test_spark_mr_job(self): job = MRNullSpark() job.sandbox() with job.make_runner() as runner: self.assertEqual( runner._spark_script_args(0), ['--step-num=0', '--spark', '<step 0 input>', '<step 0 output>'])
def test_spark_passthrough_arg(self): job = MRNullSpark(['--extra-spark-arg=--verbose']) job.sandbox() with job.make_runner() as runner: self.assertEqual( runner._spark_script_args(0), ['--step-num=0', '--spark', '--extra-spark-arg=--verbose', '<step 0 input>', '<step 0 output>'])
def test_dont_upload_mrjob_zip(self): job = MRNullSpark(['-r', 'spark', '--spark-master', 'yarn']) job.sandbox() with job.make_runner() as runner: runner.run() self.assertTrue(exists(runner._mrjob_zip_path)) self.assertNotIn(runner._mrjob_zip_path, runner._upload_mgr.path_to_uri()) self.assertIn(runner._mrjob_zip_path, runner._spark_submit_args(0))
def test_eggs(self): egg1_path = self.makefile('dragon.egg') egg2_path = self.makefile('horton.egg') job = MRNullSpark( ['-r', 'spark', '--py-files', '%s,%s' % (egg1_path, egg2_path)]) job.sandbox() with job.make_runner() as runner: runner.run() py_files_arg = '%s,%s,%s' % (egg1_path, egg2_path, runner._mrjob_zip_path) self.assertIn(py_files_arg, runner._spark_submit_args(0))
def test_eggs(self): egg1_path = self.makefile('dragon.egg') egg2_path = self.makefile('horton.egg') job = MRNullSpark([ '-r', 'spark', '--py-files', '%s,%s' % (egg1_path, egg2_path)]) job.sandbox() with job.make_runner() as runner: runner.run() py_files_arg = '%s,%s,%s' % ( egg1_path, egg2_path, runner._mrjob_zip_path) self.assertIn(py_files_arg, runner._spark_submit_args(0))
def test_spark_file_arg(self): foo_path = self.makefile('foo') job = MRNullSpark(['--extra-file', foo_path]) job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._spark_script_args(0), [ '--step-num=0', '--spark', '--extra-file', 'foo', '<step 0 input>', '<step 0 output>' ]) name_to_path = runner._working_dir_mgr.name_to_path('file') self.assertIn('foo', name_to_path) self.assertEqual(name_to_path['foo'], foo_path)
def test_spark_file_arg(self): foo_path = self.makefile('foo') job = MRNullSpark(['--extra-file', foo_path]) job.sandbox() with job.make_runner() as runner: self.assertEqual( runner._spark_script_args(0), ['--step-num=0', '--spark', '--extra-file', 'foo', '<step 0 input>', '<step 0 output>']) name_to_path = runner._working_dir_mgr.name_to_path('file') self.assertIn('foo', name_to_path) self.assertEqual(name_to_path['foo'], foo_path)
def test_no_spark_steps(self): # just a sanity check; _STEP_TYPES is tested in a lot of ways job = MRNullSpark(['-r', 'local']) job.sandbox() self.assertRaises(NotImplementedError, job.make_runner)