def test_mapper_pre_filter(self): data = b"x\ny\nz\n" job = FilterJob(["--mapper-filter", "cat -e", "--runner=local"]) job.sandbox(stdin=BytesIO(data)) with job.make_runner() as r: self.assertEqual( r._get_steps(), [{"type": "streaming", "mapper": {"type": "script", "pre_filter": "cat -e"}}] ) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertEqual(sorted(lines), [b"x$", b"y$", b"z$"])
def test_mapper_pre_filter(self): data = b'x\ny\nz\n' job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local']) job.sandbox(stdin=BytesIO(data)) with job.make_runner() as r: self.assertEqual(r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', 'pre_filter': 'cat -e' } }]) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
def test_mapper_pre_filter(self): data = 'x\ny\nz\n' job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local']) job.sandbox(stdin=StringIO(data)) with job.make_runner() as r: self.assertEqual( r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', 'pre_filter': 'cat -e'}}]) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
def test_mapper_pre_filter(self): data = 'x\ny\nz\n' job = FilterJob(['--mapper-filter', 'cat -e', '--runner=local']) job.sandbox(stdin=StringIO(data)) with job.make_runner() as r: self.assertEqual( r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', 'pre_filter': 'cat -e'}}]) r.run() self.assertEqual( ''.join(r.stream_output()), 'x$\ny$\nz$\n')
def test_combiner_pre_filter(self): data = 'x\ny\nz\n' job = FilterJob(['--combiner-filter', 'cat -e', '--runner=local']) job.sandbox(stdin=StringIO(data)) with job.make_runner() as r: self.assertEqual(r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', }, 'combiner': { 'type': 'script', 'pre_filter': 'cat -e', } }]) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertItemsEqual(lines, ['x$', 'y$', 'z$'])
def test_reducer_pre_filter(self): data = b'x\ny\nz\n' job = FilterJob(['--reducer-filter', 'cat -e', '--runner=local']) job.sandbox(stdin=BytesIO(data)) with job.make_runner() as r: self.assertEqual( r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', }, 'reducer': { 'type': 'script', 'pre_filter': 'cat -e'}}]) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
def test_pre_filter_failure(self): # regression test for #1524 data = b'x\ny\nz\n' # grep will return exit code 1 because there are no matches job = FilterJob(['--mapper-filter', 'grep w', '--runner=local']) job.sandbox(stdin=BytesIO(data)) with job.make_runner() as r: self.assertEqual(r._get_steps(), [{ 'type': 'streaming', 'mapper': { 'type': 'script', 'pre_filter': 'grep w' } }]) r.run() lines = [line.strip() for line in list(r.stream_output())] self.assertEqual(sorted(lines), [])