def test_multi_step_counters(self): stdin = BytesIO(b"foo\nbar\n") mr_job = MRCountingJob(["-r", "local", "-"]) mr_job.sandbox(stdin=stdin) with mr_job.make_runner() as runner: runner.run() self.assertEqual( runner.counters(), [{"group": {"counter_name": 2}}, {"group": {"counter_name": 2}}, {"group": {"counter_name": 2}}], )
def test_multi_step_counters(self): stdin = StringIO('foo\nbar\n') mr_job = MRCountingJob(['-r', 'local', '-']) mr_job.sandbox(stdin=stdin) with mr_job.make_runner() as runner: runner.run() self.assertEqual(runner.counters(), [{'group': {'counter_name': 2}}, {'group': {'counter_name': 2}}, {'group': {'counter_name': 2}}])
def test_multi_step_counters(self): # read from STDIN, a regular file, and a .gz stdin = StringIO('foo\nbar\n') mr_job = MRCountingJob(['-c', self.mrjob_conf_path, '-']) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) results.append((key, value)) assert_equal(runner._counters, [{'group': {'counter_name': 2}}, {'group': {'counter_name': 2}}, {'group': {'counter_name': 2}}])
def test_local_output_dir_and_step_output_dir(self): input1_path = self.makefile('input1') input2_path = self.makefile('input2') output_dir = self.makedirs('output') step_output_dir = self.makedirs('step_output') # this has three steps, which lets us test step numbering job = MRCountingJob([ '-r', 'local', '--output-dir', output_dir, '--step-output-dir', step_output_dir, input1_path, input2_path]) job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._num_steps(), 3) input_uris_0 = runner._step_input_uris(0) self.assertEqual([os.path.basename(uri) for uri in input_uris_0], ['input1', 'input2']) self.assertEqual([uri[:8] for uri in input_uris_0], ['file:///', 'file:///']) output_uri_0 = runner._step_output_uri(0) self.assertEqual(output_uri_0, to_uri(os.path.join(step_output_dir, '0000'))) input_uris_1 = runner._step_input_uris(1) self.assertEqual(input_uris_1, [output_uri_0]) output_uri_1 = runner._step_output_uri(1) self.assertEqual(output_uri_1, to_uri(os.path.join(step_output_dir, '0001'))) input_uris_2 = runner._step_input_uris(2) self.assertEqual(input_uris_2, [output_uri_1]) output_uri_2 = runner._step_output_uri(2) self.assertEqual(output_uri_2, to_uri(output_dir))
def test_multi_step_counters(self): stdin = StringIO('foo\nbar\n') mr_job = MRCountingJob(['-r', 'local', '-']) mr_job.sandbox(stdin=stdin) with mr_job.make_runner() as runner: runner.run() self.assertEqual(runner.counters(), [{ 'group': { 'counter_name': 2 } }, { 'group': { 'counter_name': 2 } }, { 'group': { 'counter_name': 2 } }])
def test_local_output_dir_and_step_output_dir(self): input1_path = self.makefile('input1') input2_path = self.makefile('input2') output_dir = self.makedirs('output') step_output_dir = self.makedirs('step_output') # this has three steps, which lets us test step numbering job = MRCountingJob([ '-r', 'local', '--output-dir', output_dir, '--step-output-dir', step_output_dir, input1_path, input2_path ]) job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._num_steps(), 3) input_uris_0 = runner._step_input_uris(0) self.assertEqual([os.path.basename(uri) for uri in input_uris_0], ['input1', 'input2']) self.assertEqual([uri[:8] for uri in input_uris_0], ['file:///', 'file:///']) output_uri_0 = runner._step_output_uri(0) self.assertEqual(output_uri_0, to_uri(os.path.join(step_output_dir, '0000'))) input_uris_1 = runner._step_input_uris(1) self.assertEqual(input_uris_1, [output_uri_0]) output_uri_1 = runner._step_output_uri(1) self.assertEqual(output_uri_1, to_uri(os.path.join(step_output_dir, '0001'))) input_uris_2 = runner._step_input_uris(2) self.assertEqual(input_uris_2, [output_uri_1]) output_uri_2 = runner._step_output_uri(2) self.assertEqual(output_uri_2, to_uri(output_dir))
def test_output_dir_and_step_output_dir(self): input1_path = self.makefile('input1') input2_path = self.makefile('input2') # this has three steps, which lets us test step numbering job = MRCountingJob([ '-r', 'hadoop', '--hadoop-bin', 'false', # shouldn't run; just in case '--output-dir', 'hdfs:///tmp/output', '--step-output-dir', 'hdfs://tmp/step-output', input1_path, input2_path]) job.sandbox() with job.make_runner() as runner: self.assertEqual(runner._num_steps(), 3) runner._add_job_files_for_upload() input_uris_0 = runner._step_input_uris(0) self.assertEqual([os.path.basename(uri) for uri in input_uris_0], ['input1', 'input2']) output_uri_0 = runner._step_output_uri(0) self.assertEqual(output_uri_0, 'hdfs://tmp/step-output/0000') input_uris_1 = runner._step_input_uris(1) self.assertEqual(input_uris_1, [output_uri_0]) output_uri_1 = runner._step_output_uri(1) self.assertEqual(output_uri_1, 'hdfs://tmp/step-output/0001') input_uris_2 = runner._step_input_uris(2) self.assertEqual(input_uris_2, [output_uri_1]) output_uri_2 = runner._step_output_uri(2) self.assertEqual(output_uri_2, 'hdfs:///tmp/output')