def test_dont_take_down_cluster_on_failure(self): runner1 = DataprocJobRunner(conf_paths=[]) runner1._launch_cluster() cluster_id = runner1._cluster_id mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox() self.mock_jobs_succeed = False with mr_job.make_runner() as runner2: self.assertIsInstance(runner2, DataprocJobRunner) with logger_disabled('mrjob.dataproc'): self.assertRaises(StepFailedException, runner2.run) cluster2 = runner2._get_cluster(runner2._cluster_id) self.assertEqual(_cluster_state_name(cluster2.status.state), 'RUNNING') # job shouldn't get terminated by cleanup cluster1 = runner1._get_cluster(runner1._cluster_id) self.assertEqual(_cluster_state_name(cluster1.status.state), 'RUNNING')
def _simulate_progress(self, project_id, region, cluster_name): cluster_key = (project_id, region, cluster_name) cluster = self.mock_clusters[cluster_key] state_name = _cluster_state_name(cluster.status.state) if state_name == 'DELETING': del self.mock_clusters[cluster_key] else: # just move from STARTING to RUNNING cluster.status.state = _cluster_state_value('RUNNING')
def test_failed_job(self): mr_job = MRTwoStepJob(['-r', 'dataproc', '-v']) mr_job.sandbox() with no_handlers_for_logger('mrjob.dataproc'): stderr = StringIO() log_to_stream('mrjob.dataproc', stderr) self.mock_jobs_succeed = False with mr_job.make_runner() as runner: self.assertIsInstance(runner, DataprocJobRunner) self.assertRaises(StepFailedException, runner.run) self.assertIn(' => ERROR\n', stderr.getvalue()) cluster_id = runner.get_cluster_id() # job should get terminated cluster = runner._get_cluster(cluster_id) self.assertEqual(_cluster_state_name(cluster.status.state), 'DELETING')
def test_end_to_end(self): # read from STDIN, a local file, and a remote file stdin = BytesIO(b'foo\nbar\n') local_input_path = os.path.join(self.tmp_dir, 'input') with open(local_input_path, 'wb') as local_input_file: local_input_file.write(b'bar\nqux\n') remote_input_path = 'gs://walrus/data/foo' self.put_gcs_multi({ remote_input_path: b'foo\n' }) mr_job = MRHadoopFormatJob(['-r', 'dataproc', '-v', '-', local_input_path, remote_input_path, '--jobconf', 'x=y']) mr_job.sandbox(stdin=stdin) results = [] mock_gcs_fs_snapshot = deepcopy(self.mock_gcs_fs) fake_gcs_output = [ b'1\t"qux"\n2\t"bar"\n', b'2\t"foo"\n5\tnull\n' ] with mr_job.make_runner() as runner: self.assertIsInstance(runner, DataprocJobRunner) # make sure that initializing the runner doesn't affect GCS # (Issue #50) self.assertEqual(self.mock_gcs_fs, mock_gcs_fs_snapshot) runner.run() # setup fake output self.put_job_output_parts(runner, fake_gcs_output) results.extend(mr_job.parse_output(runner.cat_output())) local_tmp_dir = runner._get_local_tmp_dir() # make sure cleanup hasn't happened yet self.assertTrue(os.path.exists(local_tmp_dir)) self.assertTrue(any(runner.fs.ls(runner.get_output_dir()))) name_match = _JOB_KEY_RE.match(runner._job_key) self.assertEqual(name_match.group(1), 'mr_hadoop_format_job') self.assertEqual(name_match.group(2), getpass.getuser()) # make sure our input and output formats are attached to # the correct steps jobs = list(runner._list_jobs()) self.assertEqual(len(jobs), 2) # put earliest job first jobs.sort(key=lambda j: j.reference.job_id) step_0_args = jobs[0].hadoop_job.args step_1_args = jobs[1].hadoop_job.args self.assertIn('-inputformat', step_0_args) self.assertNotIn('-outputformat', step_0_args) self.assertNotIn('-inputformat', step_1_args) self.assertIn('-outputformat', step_1_args) # make sure jobconf got through self.assertIn('-D', step_0_args) self.assertIn('x=y', step_0_args) self.assertIn('-D', step_1_args) # job overrides jobconf in step 1 self.assertIn('x=z', step_1_args) # make sure mrjob.zip is created and uploaded as a bootstrap file self.assertTrue(os.path.exists(runner._mrjob_zip_path)) self.assertIn(runner._mrjob_zip_path, runner._upload_mgr.path_to_uri()) self.assertIn(runner._mrjob_zip_path, runner._bootstrap_dir_mgr.paths()) cluster_id = runner.get_cluster_id() self.assertEqual(sorted(results), [(1, 'qux'), (2, 'bar'), (2, 'foo'), (5, None)]) # make sure cleanup happens self.assertFalse(os.path.exists(local_tmp_dir)) # we don't clean-up the output dir as we're relying on lifecycle # management output_dirs = list(runner.fs.ls(runner.get_output_dir())) self.assertEqual(len(fake_gcs_output), len(output_dirs)) # job should get terminated cluster = runner._get_cluster(cluster_id) self.assertEqual(_cluster_state_name(cluster.status.state), 'DELETING')