Ejemplo n.º 1
0
    def test_hadoop_extra_args_comes_first(self):
        job = MRWordCount(
            ['--cmdenv', 'FOO=bar',
             '--hadoop-arg', '-libjar', '--hadoop-arg', 'qux.jar',
             '--jobconf', 'baz=qux',
             '--partitioner', 'java.lang.Object'])
        job.HADOOP_INPUT_FORMAT = 'FooInputFormat'
        job.HADOOP_OUTPUT_FORMAT = 'BarOutputFormat'

        with job.make_runner() as runner:
            hadoop_args = runner._hadoop_args_for_step(0)
            self.assertEqual(hadoop_args[:2], ['-libjar', 'qux.jar'])
            self.assertEqual(len(hadoop_args), 12)
Ejemplo n.º 2
0
    def test_hadoop_extra_args_comes_first(self):
        job = MRWordCount([
            '--cmdenv', 'FOO=bar', '--hadoop-arg', '-libjar', '--hadoop-arg',
            'qux.jar', '--jobconf', 'baz=qux', '--partitioner',
            'java.lang.Object'
        ])
        job.HADOOP_INPUT_FORMAT = 'FooInputFormat'
        job.HADOOP_OUTPUT_FORMAT = 'BarOutputFormat'

        with job.make_runner() as runner:
            hadoop_args = runner._hadoop_args_for_step(0)
            self.assertEqual(hadoop_args[:2], ['-libjar', 'qux.jar'])
            self.assertEqual(len(hadoop_args), 12)
Ejemplo n.º 3
0
    def test_hadoop_input_format(self):
        input_format = "org.apache.hadoop.mapred.SequenceFileInputFormat"

        # one-step job
        job1 = MRWordCount()
        # no cmd-line argument for this because it's part of job semantics
        job1.HADOOP_INPUT_FORMAT = input_format
        with job1.make_runner() as runner1:
            self.assertEqual(runner1._hadoop_args_for_step(0), ["-inputformat", input_format])

        # multi-step job: only use -inputformat on the first step
        job2 = MRTwoStepJob()
        job2.HADOOP_INPUT_FORMAT = input_format
        with job2.make_runner() as runner2:
            self.assertEqual(runner2._hadoop_args_for_step(0), ["-inputformat", input_format])
            self.assertEqual(runner2._hadoop_args_for_step(1), [])
Ejemplo n.º 4
0
    def test_hadoop_input_format(self):
        input_format = 'org.apache.hadoop.mapred.SequenceFileInputFormat'

        # one-step job
        job1 = MRWordCount()
        # no cmd-line argument for this because it's part of job semantics
        job1.HADOOP_INPUT_FORMAT = input_format
        with job1.make_runner() as runner1:
            self.assertEqual(runner1._hadoop_args_for_step(0),
                             ['-inputformat', input_format])

        # multi-step job: only use -inputformat on the first step
        job2 = MRTwoStepJob()
        job2.HADOOP_INPUT_FORMAT = input_format
        with job2.make_runner() as runner2:
            self.assertEqual(runner2._hadoop_args_for_step(0),
                             ['-inputformat', input_format])
            self.assertEqual(runner2._hadoop_args_for_step(1), [])
Ejemplo n.º 5
0
    def test_hadoop_extra_args_comes_first(self):
        job = MRWordCount(
            [
                "--cmdenv",
                "FOO=bar",
                "--hadoop-arg",
                "-libjar",
                "--hadoop-arg",
                "qux.jar",
                "--jobconf",
                "baz=qux",
                "--partitioner",
                "java.lang.Object",
            ]
        )
        job.HADOOP_INPUT_FORMAT = "FooInputFormat"
        job.HADOOP_OUTPUT_FORMAT = "BarOutputFormat"

        with job.make_runner() as runner:
            hadoop_args = runner._hadoop_args_for_step(0)
            self.assertEqual(hadoop_args[:2], ["-libjar", "qux.jar"])
            self.assertEqual(len(hadoop_args), 12)