예제 #1
0
    def test_spark_mr_job(self):
        job = MRNullSpark()
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '<step 0 input>', '<step 0 output>'
            ])
예제 #2
0
    def test_spark_passthrough_arg(self):
        job = MRNullSpark(['--extra-spark-arg=--verbose'])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '--extra-spark-arg=--verbose',
                '<step 0 input>', '<step 0 output>'
            ])
예제 #3
0
파일: test_runner.py 프로젝트: Yelp/mrjob
    def test_spark_mr_job(self):
        job = MRNullSpark()
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(
                runner._spark_script_args(0),
                ['--step-num=0',
                 '--spark',
                 '<step 0 input>',
                 '<step 0 output>'])
예제 #4
0
파일: test_runner.py 프로젝트: Yelp/mrjob
    def test_spark_passthrough_arg(self):
        job = MRNullSpark(['--extra-spark-arg=--verbose'])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(
                runner._spark_script_args(0),
                ['--step-num=0',
                 '--spark',
                 '--extra-spark-arg=--verbose',
                 '<step 0 input>',
                 '<step 0 output>'])
예제 #5
0
    def test_dont_upload_mrjob_zip(self):
        job = MRNullSpark(['-r', 'spark', '--spark-master', 'yarn'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            self.assertTrue(exists(runner._mrjob_zip_path))

            self.assertNotIn(runner._mrjob_zip_path,
                             runner._upload_mgr.path_to_uri())

            self.assertIn(runner._mrjob_zip_path, runner._spark_submit_args(0))
예제 #6
0
파일: test_runner.py 프로젝트: Affirm/mrjob
    def test_dont_upload_mrjob_zip(self):
        job = MRNullSpark(['-r', 'spark', '--spark-master', 'yarn'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            self.assertTrue(exists(runner._mrjob_zip_path))

            self.assertNotIn(runner._mrjob_zip_path,
                             runner._upload_mgr.path_to_uri())

            self.assertIn(runner._mrjob_zip_path, runner._spark_submit_args(0))
예제 #7
0
    def test_eggs(self):
        egg1_path = self.makefile('dragon.egg')
        egg2_path = self.makefile('horton.egg')

        job = MRNullSpark(
            ['-r', 'spark', '--py-files',
             '%s,%s' % (egg1_path, egg2_path)])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            py_files_arg = '%s,%s,%s' % (egg1_path, egg2_path,
                                         runner._mrjob_zip_path)
            self.assertIn(py_files_arg, runner._spark_submit_args(0))
예제 #8
0
파일: test_runner.py 프로젝트: Affirm/mrjob
    def test_eggs(self):
        egg1_path = self.makefile('dragon.egg')
        egg2_path = self.makefile('horton.egg')

        job = MRNullSpark([
            '-r', 'spark',
            '--py-files', '%s,%s' % (egg1_path, egg2_path)])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            py_files_arg = '%s,%s,%s' % (
                egg1_path, egg2_path, runner._mrjob_zip_path)
            self.assertIn(py_files_arg, runner._spark_submit_args(0))
예제 #9
0
    def test_spark_file_arg(self):
        foo_path = self.makefile('foo')

        job = MRNullSpark(['--extra-file', foo_path])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '--extra-file', 'foo',
                '<step 0 input>', '<step 0 output>'
            ])

            name_to_path = runner._working_dir_mgr.name_to_path('file')
            self.assertIn('foo', name_to_path)
            self.assertEqual(name_to_path['foo'], foo_path)
예제 #10
0
    def test_spark_step_without_mr_job_script(self):
        steps = MRNullSpark()._steps_desc()

        # need to be able to call the script's spark() method
        self.assertRaises(ValueError,
                          EMRJobRunner,
                          steps=steps,
                          stdin=BytesIO())
예제 #11
0
파일: test_runner.py 프로젝트: Yelp/mrjob
    def test_spark_file_arg(self):
        foo_path = self.makefile('foo')

        job = MRNullSpark(['--extra-file', foo_path])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(
                runner._spark_script_args(0),
                ['--step-num=0',
                 '--spark',
                 '--extra-file',
                 'foo',
                 '<step 0 input>',
                 '<step 0 output>'])

            name_to_path = runner._working_dir_mgr.name_to_path('file')
            self.assertIn('foo', name_to_path)
            self.assertEqual(name_to_path['foo'], foo_path)
예제 #12
0
    def test_no_spark_steps(self):
        # just a sanity check; _STEP_TYPES is tested in a lot of ways
        job = MRNullSpark(['-r', 'local'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)