def steps(self): return [ MRJobStep(mapper=self.mapper, combiner=self.combiner, reducer=self.reducer), MRJobStep(reducer=self.reducer_top5) ]
def steps(self): return [ MRJobStep(mapper=self.mapper1, combiner=self.combiner1, reducer=self.reducer1), MRJobStep(reducer_init=self.reducer2_init, reducer=self.reducer2) ]
def test_render_jobconf(self): step = MRJobStep(mapper=identity_mapper, jobconf={'dfs.block.size': '134217728'}) self.assertEqual( step.description(0), { 'type': 'streaming', 'mapper': { 'type': 'script', }, 'jobconf': { 'dfs.block.size': '134217728', } })
def steps(self): return [ MRJobStep(mapper_init=self.mapper_init, mapper=self.mapper, combiner=self.combiner, reducer=self.reducer) ]
def test_render_reducer_first_mapper_not_implied(self): self.assertEqual( MRJobStep(reducer=identity_reducer).description(1), { 'type': 'streaming', 'reducer': { 'type': 'script', }, })
def test_render_mapper(self): self.assertEqual( MRJobStep(mapper=identity_mapper).description(0), { 'type': 'streaming', 'mapper': { 'type': 'script', }, })
def test_steps(self): j = self.SteppyJob(['--no-conf']) self.assertEqual( j.steps()[0], MRJobStep(mapper_init=j._yield_none, mapper_pre_filter='cat', reducer_cmd='wc -l')) self.assertEqual(j.steps()[1], JarStep('oh my jar', 's3://bookat/binks_jar.jar'))
def test_render_reducer_cmd_first_mapper_not_implied(self): self.assertEqual( MRJobStep(reducer_cmd='cat').description(1), { 'type': 'streaming', 'reducer': { 'type': 'command', 'command': 'cat', }, })
def test_render_mapper_cmd(self): self.assertEqual( MRJobStep(mapper_cmd='cat').description(0), { 'type': 'streaming', 'mapper': { 'type': 'command', 'command': 'cat', }, })
def test_render_reducer_pre_filter(self): self.assertEqual( MRJobStep(reducer=identity_reducer, reducer_pre_filter='cat').description(1), { 'type': 'streaming', 'reducer': { 'type': 'script', 'pre_filter': 'cat', }, })
def test_render_combiner(self): self.assertEqual( MRJobStep(combiner=identity_reducer).description(1), { 'type': 'streaming', 'mapper': { 'type': 'script', }, 'combiner': { 'type': 'script', }, })
def test_can_override_jobconf_method(self): # regression test for #656 j = self.SingleStepJobConfMethodJob(['--no-conf']) # overriding jobconf() should affect job_runner_kwargs() # but not step definitions self.assertEqual(j.job_runner_kwargs()['jobconf'], {'mapred.baz': 'bar'}) self.assertEqual( j.steps()[0], MRJobStep(mapper=j.mapper))
def _test_explicit(self, m=False, c=False, r=False, **kwargs): s = MRJobStep(**kwargs) self.assertEqual(s.has_explicit_mapper, m) self.assertEqual(s.has_explicit_combiner, c) self.assertEqual(s.has_explicit_reducer, r)
def test_get_regular_mapper(self): # this is the normal behavior self.assertEqual(MRJobStep(mapper=identity_mapper)['mapper'], identity_mapper)
def test_get_identity_mapper(self): # this is the weird behavior self.assertEqual(MRJobStep(mapper_final=identity_mapper)['mapper'], _IDENTITY_MAPPER)
def _streaming_step(self, n, *args, **kwargs): return MRJobStep(*args, **kwargs).description(n)
def steps(self): return [ MRJobStep(mapper_init=self.mapper_init, mapper=self.mapper) ]
def steps(self): return [ MRJobStep(mapper_init=self.mapper_init, mapper=self.mapper, mapper_final=self.mapper_final) ]