コード例 #1
0
ファイル: test_local.py プロジェクト: alanhdu/mrjob
    def test_mapper_pre_filter(self):
        data = b"x\ny\nz\n"
        job = FilterJob(["--mapper-filter", "cat -e", "--runner=local"])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(), [{"type": "streaming", "mapper": {"type": "script", "pre_filter": "cat -e"}}]
            )

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b"x$", b"y$", b"z$"])
コード例 #2
0
    def test_mapper_pre_filter(self):
        data = b'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'cat -e'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
コード例 #3
0
ファイル: test_local.py プロジェクト: SeanOC/mrjob
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
コード例 #4
0
ファイル: test_local.py プロジェクト: eklitzke/mrjob
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            self.assertEqual(
                ''.join(r.stream_output()),
                'x$\ny$\nz$\n')
コード例 #5
0
ファイル: test_local.py プロジェクト: gcorreaq/mrjob
    def test_mapper_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            self.assertEqual(
                ''.join(r.stream_output()),
                'x$\ny$\nz$\n')
コード例 #6
0
    def test_combiner_pre_filter(self):
        data = 'x\ny\nz\n'
        job = FilterJob(['--combiner-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=StringIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                },
                'combiner': {
                    'type': 'script',
                    'pre_filter': 'cat -e',
                }
            }])

            r.run()
            lines = [line.strip() for line in list(r.stream_output())]
            self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
コード例 #7
0
ファイル: test_local.py プロジェクト: northaviva/mrjob
    def test_reducer_pre_filter(self):
        data = b'x\ny\nz\n'
        job = FilterJob(['--reducer-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                    },
                    'reducer': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
コード例 #8
0
    def test_pre_filter_failure(self):
        # regression test for #1524

        data = b'x\ny\nz\n'
        # grep will return exit code 1 because there are no matches
        job = FilterJob(['--mapper-filter', 'grep w', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'grep w'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [])