Example #1
0
    def test_no_mapper(self):
        # read from STDIN, a local file, and a remote file
        stdin = BytesIO(b'foo\nbar\n')

        local_input_path = os.path.join(self.tmp_dir, 'input')
        with open(local_input_path, 'wb') as local_input_file:
            local_input_file.write(b'one fish two fish\nred fish blue fish\n')

        remote_input_path = 'gs://walrus/data/foo'
        self.put_gcs_multi({remote_input_path: b'foo\n'})

        mr_job = MRNoMapper(
            ['-r', 'dataproc', '-v', '-', local_input_path, remote_input_path])
        mr_job.sandbox(stdin=stdin)

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            # setup fake output
            self.put_job_output_parts(
                runner,
                [b'1\t["blue", "one", "red", "two"]\n', b'4\t["fish"]\n'])

            results.extend(mr_job.parse_output(runner.cat_output()))

        self.assertEqual(sorted(results), [(1, ['blue', 'one', 'red', 'two']),
                                           (4, ['fish'])])
Example #2
0
    def test_no_mapper(self):
        # read from STDIN, a local file, and a remote file
        stdin = BytesIO(b'foo\nbar\n')

        local_input_path = os.path.join(self.tmp_dir, 'input')
        with open(local_input_path, 'wb') as local_input_file:
            local_input_file.write(b'one fish two fish\nred fish blue fish\n')

        remote_input_path = 'gs://walrus/data/foo'
        self.put_gcs_multi({
            remote_input_path: b'foo\n'
        })

        mr_job = MRNoMapper(['-r', 'dataproc', '-v',
                             '-', local_input_path, remote_input_path])
        mr_job.sandbox(stdin=stdin)

        results = []

        with mr_job.make_runner() as runner:
            runner.run()

            # setup fake output
            self.put_job_output_parts(runner, [
                b'1\t["blue", "one", "red", "two"]\n',
                b'4\t["fish"]\n'])

            results.extend(mr_job.parse_output(runner.cat_output()))

        self.assertEqual(sorted(results),
                         [(1, ['blue', 'one', 'red', 'two']),
                          (4, ['fish'])])
Example #3
0
    def test_step_with_no_mapper(self):
        mr_job = MRNoMapper(['-r', self.RUNNER])

        mr_job.sandbox(
            stdin=BytesIO(b'one fish two fish\nred fish blue fish\n'))

        with mr_job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(mr_job.parse_output(runner.cat_output())),
                             [(1, ['blue', 'one', 'red', 'two']),
                              (4, ['fish'])])
Example #4
0
    def test_step_with_no_mapper(self):
        mr_job = MRNoMapper(['-r', self.RUNNER])

        mr_job.sandbox(stdin=BytesIO(
            b'one fish two fish\nred fish blue fish\n'))

        with mr_job.make_runner() as runner:
            runner.run()

            self.assertEqual(
                sorted(mr_job.parse_output(runner.cat_output())),
                [(1, ['blue', 'one', 'red', 'two']),
                 (4, ['fish'])])