コード例 #1
0
ファイル: test_dataproc.py プロジェクト: davidmarin/mrjob
    def test_default(self):
        mr_job = MRWordCount(["-r", "dataproc"])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {"mrjob-max-secs-idle": "360"})
コード例 #2
0
ファイル: test_dataproc.py プロジェクト: davidmarin/mrjob
    def test_persistent_cluster(self):
        mr_job = MRWordCount(["-r", "dataproc", "--max-hours-idle", "0.01"])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {"mrjob-max-secs-idle": "36"})
コード例 #3
0
ファイル: test_sim.py プロジェクト: yangzilong1986/mrjob
    def test_input_files(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\n')

        mr_job = MRWordCount(['-r', self.RUNNER,
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            results.extend(mr_job.parse_output(runner.cat_output()))

            self.assertGreater(runner.counters()[0]['count']['combiners'], 2)

        self.assertEqual(sorted(results),
                         [('file://' + input_path, 3),
                          ('file://' + input_gz_path, 1)])
コード例 #4
0
ファイル: test_inline.py プロジェクト: wanglt311/mrjob
    def test_input_files_and_setting_number_of_tasks(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'foo\n')
        input_gz.close()

        mr_job = MRWordCount(['-r', self.RUNNER,
                              '--jobconf=mapred.map.tasks=3',
                              '--jobconf=mapred.reduce.tasks=3',
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            for line in runner.stream_output():
                key, value = mr_job.parse_output_line(line)
                results.append((key, value))

            self.assertEqual(runner.counters()[0]['count']['combiners'], 3)

        self.assertEqual(sorted(results),
                         [(input_path, 3), (input_gz_path, 1)])
コード例 #5
0
    def test_default(self):
        job = MRWordCount(['-r', 'dataproc'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertFalse(self.mock_Popen.called)
コード例 #6
0
ファイル: test_dataproc.py プロジェクト: rpmirish12/Learning
    def test_default(self):
        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '360',
            })
コード例 #7
0
ファイル: test_hadoop.py プロジェクト: Pandafisher/mrjob
    def test_empty(self):
        job = MRWordCount(['-r', 'hadoop'])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertNotIn('-libjars', args)
コード例 #8
0
ファイル: test_hadoop.py プロジェクト: etiennebatise/mrjob
    def test_empty(self):
        job = MRWordCount(['-r', 'hadoop'])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertNotIn('-libjars', args)
コード例 #9
0
ファイル: test_dataproc.py プロジェクト: Jeremyfanfan/mrjob
    def test_default(self):
        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '360',
            })
コード例 #10
0
ファイル: test_dataproc.py プロジェクト: Jeremyfanfan/mrjob
    def test_persistent_cluster(self):
        mr_job = MRWordCount(['-r', 'dataproc', '--max-hours-idle', '0.01'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '36',
            })
コード例 #11
0
ファイル: test_dataproc.py プロジェクト: rpmirish12/Learning
    def test_persistent_cluster(self):
        mr_job = MRWordCount(['-r', 'dataproc', '--max-hours-idle', '0.01'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '36',
            })
コード例 #12
0
ファイル: test_runner.py プロジェクト: yzhanggithub/mrjob
    def test_job_passes_in_steps(self):
        job = MRWordCount([])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertTrue(runner._steps)

            runner.run()

            self.assertFalse(self.log.warning.called)
コード例 #13
0
ファイル: test_runner.py プロジェクト: Affirm/mrjob
    def test_job_passes_in_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            self.assertTrue(runner._steps)

            runner.run()

            self.assertFalse(self.log.warning.called)
コード例 #14
0
ファイル: test_runner.py プロジェクト: Affirm/mrjob
    def test_load_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner._steps = None

            runner.run()

            self.assertTrue(runner._steps)
            self.assertTrue(self.log.warning.called)
コード例 #15
0
    def test_missing_gcloud_bin(self):
        self.mock_Popen.side_effect = OSError(2, 'No such file or directory')

        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        self.assertTrue(runner._give_up_on_ssh_tunnel)
コード例 #16
0
ファイル: test_runner.py プロジェクト: dasalgadoc/mrjob
    def test_load_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner._steps = None

            runner.run()

            self.assertTrue(runner._steps)
            self.assertTrue(self.log.warning.called)
コード例 #17
0
    def test_error_from_gcloud_bin(self):
        self.mock_Popen.return_value.returncode = 255

        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])

        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertGreater(self.mock_Popen.call_count, 1)
        self.assertFalse(runner._give_up_on_ssh_tunnel)
コード例 #18
0
ファイル: test_hadoop.py プロジェクト: etiennebatise/mrjob
    def test_one_jar(self):
        job = MRWordCount([
            '-r', 'hadoop',
            '--libjar', '/path/to/a.jar',
        ])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertIn('-libjars', args)
            self.assertIn('/path/to/a.jar', args)
コード例 #19
0
ファイル: test_hadoop.py プロジェクト: Pandafisher/mrjob
    def test_one_jar(self):
        job = MRWordCount([
            '-r',
            'hadoop',
            '--libjar',
            '/path/to/a.jar',
        ])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertIn('-libjars', args)
            self.assertIn('/path/to/a.jar', args)
コード例 #20
0
    def test_custom_gcloud_bin(self):
        job = MRWordCount([
            '-r', 'dataproc', '--ssh-tunnel', '--gcloud-bin',
            '/path/to/gcloud -v'
        ])

        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args = self.mock_Popen.call_args[0][0]

        self.assertEqual(args[:4], ['/path/to/gcloud', '-v', 'compute', 'ssh'])
コード例 #21
0
    def test_open_ssh_tunnel(self):
        job = MRWordCount(
            ['-r', 'dataproc', '--ssh-tunnel', '--ssh-tunnel-is-open'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args = self.mock_Popen.call_args[0][0]

        self.assertIn('-L', args)
        self.assertIn('-N', args)
        self.assertIn('-n', args)
        self.assertIn('-q', args)

        self.assertIn('-g', args)
        self.assertIn('-4', args)
コード例 #22
0
    def test_log_messages(self):
        self.get_lines.return_value = [
            '18/04/17 22:06:15 INFO mapreduce.Job:  map 100% reduce 0%\n',
            '18/04/17 22:07:34 INFO mapreduce.Job: Counters: 1\n',
            '\tFile System Counters\n',
            '\t\tFILE: Number of bytes read=819\n',
        ]

        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()

        self.assertIn(call('  map 100% reduce 0%'),
                      self.log.info.call_args_list)

        self.assertIn(
            call('Counters: 1\n\tFile System Counters\n\t\tFILE:'
                 ' Number of bytes read=819'), self.log.info.call_args_list)
コード例 #23
0
ファイル: test_sim.py プロジェクト: okomestudio/mrjob
    def test_input_files(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\n')

        mr_job = MRWordCount(['-r', self.RUNNER,
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            results.extend(mr_job.parse_output(runner.cat_output()))

            self.assertGreater(runner.counters()[0]['count']['combiners'], 2)

        self.assertEqual(sorted(results),
                         [(input_path, 3), (input_gz_path, 1)])
コード例 #24
0
    def test_default_ssh_tunnel(self):
        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args_tuple, kwargs = self.mock_Popen.call_args
        args = args_tuple[0]

        self.assertEqual(kwargs, dict(stdin=PIPE, stdout=PIPE, stderr=PIPE))

        self.assertEqual(args[:3], ['gcloud', 'compute', 'ssh'])

        self.assertIn('-L', args)
        self.assertIn('-N', args)
        self.assertIn('-n', args)
        self.assertIn('-q', args)

        self.assertNotIn('-g', args)
        self.assertNotIn('-4', args)

        self.mock_Popen.stdin.called_once_with(b'\n\n')
コード例 #25
0
ファイル: test_runner.py プロジェクト: okomestudio/mrjob
    def test_dash_for_stdin(self):
        job = MRWordCount(['-'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()
コード例 #26
0
ファイル: test_runner.py プロジェクト: Affirm/mrjob
 def make_runner(self, *args):
     mr_job = MRWordCount(args)
     mr_job.sandbox()
     return mr_job.make_runner()
コード例 #27
0
ファイル: test_runner.py プロジェクト: yzhanggithub/mrjob
 def make_runner(self, *args):
     mr_job = MRWordCount(args)
     mr_job.sandbox()
     return mr_job.make_runner()
コード例 #28
0
ファイル: test_runner.py プロジェクト: yzhanggithub/mrjob
    def test_dash_for_stdin(self):
        job = MRWordCount(['-'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()
コード例 #29
0
ファイル: test_runner.py プロジェクト: yzhanggithub/mrjob
    def test_stdin_is_fine(self):
        job = MRWordCount([])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()
コード例 #30
0
ファイル: test_runner.py プロジェクト: okomestudio/mrjob
    def test_stdin_is_fine(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()