def _test_spark_executor_memory(self, conf_value, megs): runner = LocalMRJobRunner( jobconf={'spark.executor.memory': conf_value}) self.assertEqual(runner._spark_master(), 'local-cluster[%d,1,%d]' % (cpu_count(), megs))
def test_default_spark_master(self): runner = LocalMRJobRunner() self.assertEqual(runner._spark_master(), 'local-cluster[%d,1,1024]' % cpu_count())
def test_partitioner(self): partitioner = 'org.apache.hadoop.mapreduce.Partitioner' runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner) self.assertEqual(runner._hadoop_conf_args(0, 1), ['-partitioner', partitioner])
def test_num_cores(self): runner = LocalMRJobRunner(num_cores=3) self.assertEqual(runner._spark_master(), 'local-cluster[3,1,1024]')
def test_hadoop_extra_args(self): extra_args = ['-foo', 'bar'] runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args) self.assertEqual(runner._hadoop_conf_args(0, 1), extra_args)
def test_empty(self): runner = LocalMRJobRunner(conf_paths=[]) self.assertEqual(runner._hadoop_conf_args(0, 1), [])
def test_environment_variables_021(self): runner = LocalMRJobRunner(hadoop_version='0.21', conf_paths=[]) with runner as runner: runner._setup_working_dir() self.assertIn('mapreduce_job_cache_local_archives', runner._subprocess_env('mapper', 0, 0).keys())
def test_empty(self): runner = LocalMRJobRunner(conf_path=False) match = JOB_NAME_RE.match(runner.get_job_name()) self.assertEqual(match.group(1), 'no_script') self.assertEqual(match.group(2), getpass.getuser())
def test_extra_kwargs_in_mrjob_conf_okay(self): with logger_disabled('mrjob.runner'): with LocalMRJobRunner(conf_path=self.mrjob_conf_path) as runner: self.assertEqual(runner._opts['setup_cmds'], ['echo foo']) self.assertNotIn('qux', runner._opts)