Esempi in Python per MRWordCount.sandbox, esempi in Python per tests.mr_word_count.MRWordCount.sandbox

Esempio n. 1

0

Mostra file

File: test_dataproc.py Progetto: davidmarin/mrjob

    def test_default(self):
        mr_job = MRWordCount(["-r", "dataproc"])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {"mrjob-max-secs-idle": "360"})

Esempio n. 2

0

Mostra file

File: test_dataproc.py Progetto: davidmarin/mrjob

    def test_persistent_cluster(self):
        mr_job = MRWordCount(["-r", "dataproc", "--max-hours-idle", "0.01"])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {"mrjob-max-secs-idle": "36"})

Esempio n. 3

0

Mostra file

File: test_sim.py Progetto: yangzilong1986/mrjob

    def test_input_files(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\n')

        mr_job = MRWordCount(['-r', self.RUNNER,
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            results.extend(mr_job.parse_output(runner.cat_output()))

            self.assertGreater(runner.counters()[0]['count']['combiners'], 2)

        self.assertEqual(sorted(results),
                         [('file://' + input_path, 3),
                          ('file://' + input_gz_path, 1)])

Esempio n. 4

0

Mostra file

File: test_inline.py Progetto: wanglt311/mrjob

    def test_input_files_and_setting_number_of_tasks(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'foo\n')
        input_gz.close()

        mr_job = MRWordCount(['-r', self.RUNNER,
                              '--jobconf=mapred.map.tasks=3',
                              '--jobconf=mapred.reduce.tasks=3',
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            for line in runner.stream_output():
                key, value = mr_job.parse_output_line(line)
                results.append((key, value))

            self.assertEqual(runner.counters()[0]['count']['combiners'], 3)

        self.assertEqual(sorted(results),
                         [(input_path, 3), (input_gz_path, 1)])

Esempio n. 5

0

Mostra file

    def test_default(self):
        job = MRWordCount(['-r', 'dataproc'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertFalse(self.mock_Popen.called)

Esempio n. 6

0

Mostra file

File: test_dataproc.py Progetto: rpmirish12/Learning

    def test_default(self):
        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '360',
            })

Esempio n. 7

0

Mostra file

File: test_hadoop.py Progetto: Pandafisher/mrjob

    def test_empty(self):
        job = MRWordCount(['-r', 'hadoop'])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertNotIn('-libjars', args)

Esempio n. 8

0

Mostra file

File: test_hadoop.py Progetto: etiennebatise/mrjob

    def test_empty(self):
        job = MRWordCount(['-r', 'hadoop'])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertNotIn('-libjars', args)

Esempio n. 9

0

Mostra file

File: test_dataproc.py Progetto: Jeremyfanfan/mrjob

    def test_default(self):
        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '360',
            })

Esempio n. 10

0

Mostra file

File: test_dataproc.py Progetto: Jeremyfanfan/mrjob

    def test_persistent_cluster(self):
        mr_job = MRWordCount(['-r', 'dataproc', '--max-hours-idle', '0.01'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '36',
            })

Esempio n. 11

0

Mostra file

File: test_dataproc.py Progetto: rpmirish12/Learning

    def test_persistent_cluster(self):
        mr_job = MRWordCount(['-r', 'dataproc', '--max-hours-idle', '0.01'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()
            self.assertRanIdleTimeoutScriptWith(runner, {
                'mrjob-max-secs-idle': '36',
            })

Esempio n. 12

0

Mostra file

File: test_runner.py Progetto: yzhanggithub/mrjob

    def test_job_passes_in_steps(self):
        job = MRWordCount([])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertTrue(runner._steps)

            runner.run()

            self.assertFalse(self.log.warning.called)

Esempio n. 13

0

Mostra file

File: test_runner.py Progetto: Affirm/mrjob

    def test_job_passes_in_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            self.assertTrue(runner._steps)

            runner.run()

            self.assertFalse(self.log.warning.called)

Esempio n. 14

0

Mostra file

File: test_runner.py Progetto: Affirm/mrjob

    def test_load_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner._steps = None

            runner.run()

            self.assertTrue(runner._steps)
            self.assertTrue(self.log.warning.called)

Esempio n. 15

0

Mostra file

    def test_missing_gcloud_bin(self):
        self.mock_Popen.side_effect = OSError(2, 'No such file or directory')

        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        self.assertTrue(runner._give_up_on_ssh_tunnel)

Esempio n. 16

0

Mostra file

File: test_runner.py Progetto: dasalgadoc/mrjob

    def test_load_steps(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner._steps = None

            runner.run()

            self.assertTrue(runner._steps)
            self.assertTrue(self.log.warning.called)

Esempio n. 17

0

Mostra file

    def test_error_from_gcloud_bin(self):
        self.mock_Popen.return_value.returncode = 255

        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])

        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertGreater(self.mock_Popen.call_count, 1)
        self.assertFalse(runner._give_up_on_ssh_tunnel)

Esempio n. 18

0

Mostra file

File: test_hadoop.py Progetto: etiennebatise/mrjob

    def test_one_jar(self):
        job = MRWordCount([
            '-r', 'hadoop',
            '--libjar', '/path/to/a.jar',
        ])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertIn('-libjars', args)
            self.assertIn('/path/to/a.jar', args)

Esempio n. 19

0

Mostra file

File: test_hadoop.py Progetto: Pandafisher/mrjob

    def test_one_jar(self):
        job = MRWordCount([
            '-r',
            'hadoop',
            '--libjar',
            '/path/to/a.jar',
        ])
        job.sandbox()

        with job.make_runner() as runner:
            runner._add_job_files_for_upload()
            args = runner._args_for_streaming_step(0)

            self.assertIn('-libjars', args)
            self.assertIn('/path/to/a.jar', args)

Esempio n. 20

0

Mostra file

    def test_custom_gcloud_bin(self):
        job = MRWordCount([
            '-r', 'dataproc', '--ssh-tunnel', '--gcloud-bin',
            '/path/to/gcloud -v'
        ])

        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args = self.mock_Popen.call_args[0][0]

        self.assertEqual(args[:4], ['/path/to/gcloud', '-v', 'compute', 'ssh'])

Esempio n. 21

0

Mostra file

    def test_open_ssh_tunnel(self):
        job = MRWordCount(
            ['-r', 'dataproc', '--ssh-tunnel', '--ssh-tunnel-is-open'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args = self.mock_Popen.call_args[0][0]

        self.assertIn('-L', args)
        self.assertIn('-N', args)
        self.assertIn('-n', args)
        self.assertIn('-q', args)

        self.assertIn('-g', args)
        self.assertIn('-4', args)

Esempio n. 22

0

Mostra file

    def test_log_messages(self):
        self.get_lines.return_value = [
            '18/04/17 22:06:15 INFO mapreduce.Job:  map 100% reduce 0%\n',
            '18/04/17 22:07:34 INFO mapreduce.Job: Counters: 1\n',
            '\tFile System Counters\n',
            '\t\tFILE: Number of bytes read=819\n',
        ]

        mr_job = MRWordCount(['-r', 'dataproc'])
        mr_job.sandbox()

        with mr_job.make_runner() as runner:
            runner.run()

        self.assertIn(call('  map 100% reduce 0%'),
                      self.log.info.call_args_list)

        self.assertIn(
            call('Counters: 1\n\tFile System Counters\n\t\tFILE:'
                 ' Number of bytes read=819'), self.log.info.call_args_list)

Esempio n. 23

0

Mostra file

File: test_sim.py Progetto: okomestudio/mrjob

    def test_input_files(self):
        input_path = os.path.join(self.tmp_dir, 'input')
        with open(input_path, 'wb') as input_file:
            input_file.write(b'bar\nqux\nfoo\n')

        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\n')

        mr_job = MRWordCount(['-r', self.RUNNER,
                              input_path, input_gz_path])
        mr_job.sandbox()

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            results.extend(mr_job.parse_output(runner.cat_output()))

            self.assertGreater(runner.counters()[0]['count']['combiners'], 2)

        self.assertEqual(sorted(results),
                         [(input_path, 3), (input_gz_path, 1)])

Esempio n. 24

0

Mostra file

    def test_default_ssh_tunnel(self):
        job = MRWordCount(['-r', 'dataproc', '--ssh-tunnel'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

        self.assertEqual(self.mock_Popen.call_count, 1)
        args_tuple, kwargs = self.mock_Popen.call_args
        args = args_tuple[0]

        self.assertEqual(kwargs, dict(stdin=PIPE, stdout=PIPE, stderr=PIPE))

        self.assertEqual(args[:3], ['gcloud', 'compute', 'ssh'])

        self.assertIn('-L', args)
        self.assertIn('-N', args)
        self.assertIn('-n', args)
        self.assertIn('-q', args)

        self.assertNotIn('-g', args)
        self.assertNotIn('-4', args)

        self.mock_Popen.stdin.called_once_with(b'\n\n')

Esempio n. 25

0

Mostra file

File: test_runner.py Progetto: okomestudio/mrjob

    def test_dash_for_stdin(self):
        job = MRWordCount(['-'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

Esempio n. 26

0

Mostra file

File: test_runner.py Progetto: Affirm/mrjob

 def make_runner(self, *args):
     mr_job = MRWordCount(args)
     mr_job.sandbox()
     return mr_job.make_runner()

Esempio n. 27

0

Mostra file

File: test_runner.py Progetto: yzhanggithub/mrjob

 def make_runner(self, *args):
     mr_job = MRWordCount(args)
     mr_job.sandbox()
     return mr_job.make_runner()

Esempio n. 28

0

Mostra file

File: test_runner.py Progetto: yzhanggithub/mrjob

    def test_dash_for_stdin(self):
        job = MRWordCount(['-'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

Esempio n. 29

0

Mostra file

File: test_runner.py Progetto: yzhanggithub/mrjob

    def test_stdin_is_fine(self):
        job = MRWordCount([])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

Esempio n. 30

0

Mostra file

File: test_runner.py Progetto: okomestudio/mrjob

    def test_stdin_is_fine(self):
        job = MRWordCount()
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()