def test_hadoop_output_format(self): format = "org.apache.hadoop.mapred.SequenceFileOutputFormat" runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-outputformat", format]) # test multi-step job self.assertEqual(runner._hadoop_conf_args({}, 0, 2), []) self.assertEqual(runner._hadoop_conf_args({}, 1, 2), ["-outputformat", format])
def test_jobconf(self): jobconf = {"FOO": "bar", "BAZ": "qux", "BAX": "Arnold"} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-D", "BAX=Arnold", "-D", "BAZ=qux", "-D", "FOO=bar"]) runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version="0.18") self.assertEqual( runner._hadoop_conf_args({}, 0, 1), ["-jobconf", "BAX=Arnold", "-jobconf", "BAZ=qux", "-jobconf", "FOO=bar"] )
def test_hadoop_output_format(self): format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat' runner = LocalMRJobRunner(conf_path=False, hadoop_output_format=format) assert_equal(runner._hadoop_conf_args(0, 1), ['-outputformat', format]) # test multi-step job assert_equal(runner._hadoop_conf_args(0, 2), []) assert_equal(runner._hadoop_conf_args(1, 2), ['-outputformat', format])
def test_hadoop_input_format(self): format = 'org.apache.hadoop.mapred.SequenceFileInputFormat' runner = LocalMRJobRunner(conf_paths=[], hadoop_input_format=format) self.assertEqual(runner._hadoop_conf_args(0, 1), ['-inputformat', format]) # test multi-step job self.assertEqual(runner._hadoop_conf_args(0, 2), ['-inputformat', format]) self.assertEqual(runner._hadoop_conf_args(1, 2), [])
def test_hadoop_output_format(self): format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat' runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-outputformat', format]) # test multi-step job self.assertEqual(runner._hadoop_conf_args({}, 0, 2), []) self.assertEqual(runner._hadoop_conf_args({}, 1, 2), ['-outputformat', format])
def test_hadoop_output_format(self): format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat' runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'mapred.job.name=None > None', '-outputformat', format]) # test multi-step job self.assertEqual(runner._hadoop_conf_args({}, 0, 2), ['-D', 'mapred.job.name=None > None (step 1 of 2)']) self.assertEqual(runner._hadoop_conf_args({}, 1, 2), ['-D', 'mapred.job.name=None > None (step 2 of 2)', '-outputformat', format ])
def test_jobconf(self): jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) self.assertEqual(runner._hadoop_conf_args(0, 1), ['-D', 'BAX=Arnold', '-D', 'BAZ=qux', '-D', 'FOO=bar', ]) runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version='0.18') self.assertEqual(runner._hadoop_conf_args(0, 1), ['-jobconf', 'BAX=Arnold', '-jobconf', 'BAZ=qux', '-jobconf', 'FOO=bar', ])
def test_jobconf(self): jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'BAX=Arnold', '-D', 'BAZ=qux', '-D', 'FOO=bar', ]) runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version='0.18') self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-jobconf', 'BAX=Arnold', '-jobconf', 'BAZ=qux', '-jobconf', 'FOO=bar', ])
def test_empty_jobconf_values(self): # value of None means to omit that jobconf jobconf = {'foo': '', 'bar': None} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'foo='])
def test_cmdenv(self): cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'} runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-cmdenv', 'BAX=Arnold', '-cmdenv', 'BAZ=qux', '-cmdenv', 'FOO=bar', ])
def test_cmdenv(self): cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'} runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv) self.assertEqual(runner._hadoop_conf_args(0, 1), ['-cmdenv', 'BAX=Arnold', '-cmdenv', 'BAZ=qux', '-cmdenv', 'FOO=bar', ])
def test_partitioner(self): partitioner = 'org.apache.hadoop.mapreduce.Partitioner' runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'mapred.job.name=None > None', '-partitioner', partitioner, ])
def test_jobconf_job_name_custom(self): jobconf = {'BAX': 'Arnold', 'mapred.job.name': 'Foo'} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version='0.18') self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-jobconf', 'BAX=Arnold', '-jobconf', 'mapred.job.name=Foo' ])
def test_jobconf_from_step(self): jobconf = {'FOO': 'bar', 'BAZ': 'qux'} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) step = {'jobconf': {'BAZ': 'quux', 'BAX': 'Arnold'}} self.assertEqual(runner._hadoop_conf_args(step, 0, 1), ['-D', 'BAX=Arnold', '-D', 'BAZ=quux', '-D', 'FOO=bar', ])
def test_configuration_translation(self): jobconf = {'mapred.jobtracker.maxtasks.per.job': 1} with no_handlers_for_logger('mrjob.compat'): runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version='0.21') self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'mapred.jobtracker.maxtasks.per.job=1', '-D', 'mapreduce.jobtracker.maxtasks.perjob=1' ])
def test_hadoop_extra_args_comes_first(self): runner = LocalMRJobRunner( cmdenv={"FOO": "bar"}, conf_paths=[], hadoop_extra_args=["-libjar", "qux.jar"], hadoop_input_format="FooInputFormat", hadoop_output_format="BarOutputFormat", jobconf={"baz": "quz"}, partitioner="java.lang.Object", ) # hadoop_extra_args should come first conf_args = runner._hadoop_conf_args({}, 0, 1) self.assertEqual(conf_args[:2], ["-libjar", "qux.jar"]) self.assertEqual(len(conf_args), 12)
def test_hadoop_extra_args_comes_first(self): runner = LocalMRJobRunner( cmdenv={'FOO': 'bar'}, conf_paths=[], hadoop_extra_args=['-libjar', 'qux.jar'], hadoop_input_format='FooInputFormat', hadoop_output_format='BarOutputFormat', jobconf={'baz': 'quz'}, partitioner='java.lang.Object', ) # hadoop_extra_args should come first conf_args = runner._hadoop_conf_args({}, 0, 1) self.assertEqual(conf_args[:2], ['-libjar', 'qux.jar']) self.assertEqual(len(conf_args), 14)
def test_hadoop_extra_args_comes_first(self): runner = LocalMRJobRunner( cmdenv={'FOO': 'bar'}, conf_paths=[], hadoop_extra_args=['-libjar', 'qux.jar'], hadoop_input_format='FooInputFormat', hadoop_output_format='BarOutputFormat', jobconf={'baz': 'quz'}, partitioner='java.lang.Object', ) # hadoop_extra_args should come first conf_args = runner._hadoop_conf_args({}, 0, 1) self.assertEqual(conf_args[:2], ['-libjar', 'qux.jar']) self.assertEqual(len(conf_args), 12)
def test_hadoop_extra_args(self): extra_args = ['-foo', 'bar'] runner = LocalMRJobRunner(conf_path=False, hadoop_extra_args=extra_args) assert_equal(runner._hadoop_conf_args(0, 1), extra_args)
def test_jobconf_job_name_default(self): runner = LocalMRJobRunner(conf_paths=[], hadoop_version='0.18') self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-jobconf', 'mapred.job.name=None > None' ])
def test_partitioner(self): partitioner = 'org.apache.hadoop.mapreduce.Partitioner' runner = LocalMRJobRunner(conf_path=False, partitioner=partitioner) assert_equal(runner._hadoop_conf_args(0, 1), ['-partitioner', partitioner])
def test_hadoop_extra_args(self): extra_args = ['-foo', 'bar'] runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), extra_args + ['-D', 'mapred.job.name=None > None'])
def test_empty(self): runner = LocalMRJobRunner(conf_paths=[]) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'mapred.job.name=None > None'])
def test_partitioner(self): partitioner = 'org.apache.hadoop.mapreduce.Partitioner' runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner) self.assertEqual(runner._hadoop_conf_args(0, 1), ['-partitioner', partitioner])
def test_empty(self): runner = LocalMRJobRunner(conf_paths=[]) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), [])
def test_cmdenv(self): cmdenv = {"FOO": "bar", "BAZ": "qux", "BAX": "Arnold"} runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv) self.assertEqual( runner._hadoop_conf_args({}, 0, 1), ["-cmdenv", "BAX=Arnold", "-cmdenv", "BAZ=qux", "-cmdenv", "FOO=bar"] )
def test_jobconf_from_step(self): jobconf = {"FOO": "bar", "BAZ": "qux"} runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf) step = {"jobconf": {"BAZ": "quux", "BAX": "Arnold"}} self.assertEqual(runner._hadoop_conf_args(step, 0, 1), ["-D", "BAX=Arnold", "-D", "BAZ=quux", "-D", "FOO=bar"])
def test_hadoop_extra_args(self): extra_args = ['-foo', 'bar'] runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args) self.assertEqual(runner._hadoop_conf_args(0, 1), extra_args)
def test_empty(self): runner = LocalMRJobRunner(conf_paths=[]) self.assertEqual(runner._hadoop_conf_args(0, 1), [])
def test_partitioner(self): partitioner = 'org.apache.hadoop.mapreduce.Partitioner' runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-partitioner', partitioner])
def test_empty(self): runner = LocalMRJobRunner(conf_path=False) assert_equal(runner._hadoop_conf_args(0, 1), [])
def test_hadoop_extra_args(self): extra_args = ["-foo", "bar"] runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), extra_args)
def test_hadoop_extra_args(self): extra_args = ['-foo', 'bar'] runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args) self.assertEqual(runner._hadoop_conf_args({}, 0, 1), extra_args)