예제 #1
0
파일: test_local.py 프로젝트: alanhdu/mrjob
    def test_mapper_pre_filter(self):
        data = b"x\ny\nz\n"
        job = FilterJob(["--mapper-filter", "cat -e", "--runner=local"])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(), [{"type": "streaming", "mapper": {"type": "script", "pre_filter": "cat -e"}}]
            )

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b"x$", b"y$", b"z$"])
예제 #2
0
    def test_mapper_pre_filter(self):
        data = b'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'cat -e'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
예제 #3
0
파일: test_local.py 프로젝트: SeanOC/mrjob
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
예제 #4
0
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            self.assertEqual(
                ''.join(r.stream_output()),
                'x$\ny$\nz$\n')
예제 #5
0
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            self.assertEqual(
                ''.join(r.stream_output()),
                'x$\ny$\nz$\n')
예제 #6
0
    def test_combiner_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--combiner-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                },
                'combiner': {
                    'type': 'script',
                    'pre_filter': 'cat -e',
                }
            }])

            r.run()
            lines = [line.strip() for line in list(r.stream_output())]
            self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
예제 #7
0
    def test_reducer_pre_filter(self):
        data = b'x\ny\nz\n'
        job = FilterJob(['--reducer-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                    },
                    'reducer': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
예제 #8
0
    def test_pre_filter_failure(self):
        # regression test for #1524

        data = b'x\ny\nz\n'
        # grep will return exit code 1 because there are no matches
        job = FilterJob(['--mapper-filter', 'grep w', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'grep w'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [])
예제 #9
0
    def test_pre_filter_on_compressed_data(self):
        # regression test for #1061
        input_gz_path = self.makefile('data.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'x\ny\nz\n')
        input_gz.close()

        job = FilterJob(
            ['--mapper-filter', 'cat -e', '--runner=local', input_gz_path])
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'cat -e'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])