Example #1
0
 def test_jobconf(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, jobconf=jobconf)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-jobconf', 'BAX=Arnold',
                   '-jobconf', 'BAZ=qux',
                   '-jobconf', 'FOO=bar',])
Example #2
0
 def test_cmdenv(self):
     cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, cmdenv=cmdenv)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-cmdenv', 'BAX=Arnold',
                   '-cmdenv', 'BAZ=qux',
                   '-cmdenv', 'FOO=bar',])
Example #3
0
    def test_two_files(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._invoke_sort([self.a, self.b], self.out)

        with open(self.out) as out_f:
            self.assertEqual(list(out_f), ["A\n", "B\n", "alligator\n", "apple\n", "ball\n", "banana\n"])
Example #4
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n',
                          'alligator\n',
                          'apple\n'])
Example #5
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = MRJobRunner(conf_path=False, hadoop_output_format=format)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-outputformat', format])
     # test multi-step job
     assert_equal(runner._hadoop_conf_args(0, 2), [])
     assert_equal(runner._hadoop_conf_args(1, 2),
                  ['-outputformat', format])
Example #6
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._invoke_sort([self.a], self.out)

        with open(self.out) as out_f:
            self.assertEqual(list(out_f),
                             ['A\n',
                              'alligator\n',
                              'apple\n'])
Example #7
0
    def test_two_files(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a, self.b], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n',
                          'B\n',
                          'alligator\n',
                          'apple\n',
                          'ball\n',
                          'banana\n'])
Example #8
0
 def test_hadoop_extra_args_comes_first(self):
     runner = MRJobRunner(
         conf_path=False,
         cmdenv={'FOO': 'bar'},
         hadoop_input_format='FooInputFormat',
         hadoop_output_format='BarOutputFormat',
         jobconf={'baz': 'quz'},
         hadoop_extra_args=['-libjar', 'qux.jar'])
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args(0, 1)
     assert_equal(conf_args[:2], ['-libjar', 'qux.jar'])
     assert_equal(len(conf_args), 10)
Example #9
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        with no_handlers_for_logger():
            # sometimes we get a broken pipe error (IOError) on PyPy
            self.assertRaises((CalledProcessError, IOError),
                              runner._invoke_sort, [self.a, self.b], self.out)
Example #10
0
 def test_default(self):
     runner = MRJobRunner()
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True),
                      [sys.executable])
Example #11
0
 def test_environment_variables_non_windows(self):
     runner = MRJobRunner(conf_path=False)
     self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
Example #12
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = MRJobRunner(conf_path=False, hadoop_extra_args=extra_args)
     assert_equal(runner._hadoop_conf_args(0, 1), extra_args)
Example #13
0
    def test_environment_variables_non_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
Example #14
0
 def test_no_files(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertRaises(ValueError, runner._invoke_sort, [], self.out)
Example #15
0
 def test_default(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertEqual(runner._bootstrap_mrjob(), True)
Example #16
0
 def test_interpreter(self):
     runner = MRJobRunner(conf_paths=[], interpreter=['ruby'])
     self.assertEqual(runner._bootstrap_mrjob(), False)
Example #17
0
 def test_steps_python_bin(self):
     runner = MRJobRunner(steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True), ['python', '-v'])
Example #18
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=['ruby', '-v'],
                          steps_interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby', '-v'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #19
0
 def test_default(self):
     runner = MRJobRunner()
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True),
                      [sys.executable])
Example #20
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=["ruby", "-v"], steps_interpreter=["ruby"])
     self.assertEqual(runner._interpreter(), ["ruby", "-v"])
     self.assertEqual(runner._interpreter(steps=True), ["ruby"])
Example #21
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=["ruby"], steps_python_bin=["python", "-v"])
     self.assertEqual(runner._interpreter(), ["ruby"])
     self.assertEqual(runner._interpreter(steps=True), ["ruby"])
Example #22
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_path=False)
        assert_raises(CalledProcessError,
                      runner._invoke_sort, [self.a, self.b], self.out)
Example #23
0
 def test_no_files(self):
     runner = MRJobRunner(conf_path=False)
     assert_raises(ValueError,
                   runner._invoke_sort, [], self.out)
Example #24
0
 def test_environment_variables_windows(self):
     runner = MRJobRunner(conf_path=False)
     runner._sort_is_windows_sort = True
     self.environment_variable_checks(runner, ['TMP'])
Example #25
0
 def test_python_bin(self):
     runner = MRJobRunner(python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['python', '-v'])
     self.assertEqual(runner._interpreter(steps=True), [sys.executable])
Example #26
0
    def test_environment_variables_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._sort_is_windows_sort = True
        self.environment_variable_checks(runner, ['TMP'])
Example #27
0
 def test_steps_python_bin(self):
     runner = MRJobRunner(steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True), ['python', '-v'])
Example #28
0
 def test_python_bin(self):
     runner = MRJobRunner(python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['python', '-v'])
     self.assertEqual(runner._interpreter(steps=True), [sys.executable])
Example #29
0
 def test_interpreter(self):
     runner = MRJobRunner(interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #30
0
 def test_interpreter(self):
     runner = MRJobRunner(interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #31
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=['ruby', '-v'],
                          steps_interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby', '-v'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #32
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=['ruby'],
                          steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #33
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=['ruby'],
                          steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Example #34
0
 def test_no_bootstrap_mrjob(self):
     runner = MRJobRunner(conf_paths=[], bootstrap_mrjob=False)
     self.assertEqual(runner._bootstrap_mrjob(), False)
Example #35
0
 def test_default(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertEqual(runner._bootstrap_mrjob(), True)
Example #36
0
 def test_bootstrap_mrjob_overrides_interpreter(self):
     runner = MRJobRunner(
         conf_paths=[], interpreter=['ruby'], bootstrap_mrjob=True)
     self.assertEqual(runner._bootstrap_mrjob(), True)
Example #37
0
 def test_no_bootstrap_mrjob(self):
     runner = MRJobRunner(conf_paths=[], bootstrap_mrjob=False)
     self.assertEqual(runner._bootstrap_mrjob(), False)
Example #38
0
    def test_environment_variables_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._sort_is_windows_sort = True
        self.environment_variable_checks(runner, ['TMP'])
Example #39
0
 def test_interpreter(self):
     runner = MRJobRunner(conf_paths=[], interpreter=['ruby'])
     self.assertEqual(runner._bootstrap_mrjob(), False)
Example #40
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n', 'alligator\n', 'apple\n'])
Example #41
0
 def test_bootstrap_mrjob_overrides_interpreter(self):
     runner = MRJobRunner(
         conf_paths=[], interpreter=['ruby'], bootstrap_mrjob=True)
     self.assertEqual(runner._bootstrap_mrjob(), True)
Example #42
0
 def test_environment_variables_windows(self):
     runner = MRJobRunner(conf_path=False)
     runner._sort_is_windows_sort = True
     self.environment_variable_checks(runner, ['TMP'])
Example #43
0
 def test_empty(self):
     runner = MRJobRunner(conf_path=False)
     assert_equal(runner._hadoop_conf_args(0, 1), [])