def test_attach_to_existing_cluster(self): runner1 = DataprocJobRunner(conf_paths=[]) runner1._launch_cluster() cluster_id = runner1._cluster_id stdin = BytesIO(b'foo\nbar\n') mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner2: runner2.run() # Generate fake output self.put_job_output_parts(runner2, [ b'1\t"bar"\n1\t"foo"\n2\tnull\n' ]) # Issue 182: don't create the bootstrap script when # attaching to another cluster self.assertIsNone(runner2._master_bootstrap_script_path) results.extend(mr_job.parse_output(runner2.cat_output())) self.assertEqual(sorted(results), [(1, 'bar'), (1, 'foo'), (2, None)])
def test_dont_take_down_cluster_on_failure(self): runner1 = DataprocJobRunner(conf_paths=[]) runner1._launch_cluster() cluster_id = runner1._cluster_id mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox() self.mock_jobs_succeed = False with mr_job.make_runner() as runner2: self.assertIsInstance(runner2, DataprocJobRunner) with logger_disabled('mrjob.dataproc'): self.assertRaises(StepFailedException, runner2.run) cluster2 = runner2._get_cluster(runner2._cluster_id) self.assertEqual(_cluster_state_name(cluster2.status.state), 'RUNNING') # job shouldn't get terminated by cleanup cluster1 = runner1._get_cluster(runner1._cluster_id) self.assertEqual(_cluster_state_name(cluster1.status.state), 'RUNNING')
def _test_instance_groups(self, opts, **kwargs): """Run a job with the given option dictionary, and check for for instance, number, and optional bid price for each instance role. Specify expected instance group info like: <role>=(num_instances, instance_type, bid_price) """ runner = DataprocJobRunner(**opts) # cluster_body = runner.api_client.cluster_create() fake_bootstrap_script = 'gs://fake-bucket/fake-script.sh' runner._master_bootstrap_script_path = fake_bootstrap_script runner._upload_mgr.add(fake_bootstrap_script) runner._upload_mgr.add(_MAX_HOURS_IDLE_BOOTSTRAP_ACTION_PATH) cluster_id = runner._launch_cluster() cluster_body = runner._api_cluster_get(cluster_id) conf = cluster_body['config'] role_to_actual = dict( master=self._gce_instance_group_summary(conf['masterConfig']), core=self._gce_instance_group_summary(conf['workerConfig']), task=self._gce_instance_group_summary( conf.get('secondaryWorkerConfig'))) role_to_expected = kwargs.copy() role_to_expected.setdefault('master', (1, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('core', (2, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('task', self._gce_instance_group_summary(dict())) self.assertEqual(role_to_actual, role_to_expected)
def _test_instance_groups(self, opts, **kwargs): """Run a job with the given option dictionary, and check for for instance, number, and optional bid price for each instance role. Specify expected instance group info like: <role>=(num_instances, instance_type, bid_price) """ runner = DataprocJobRunner(**opts) # cluster_body = runner.api_client.cluster_create() fake_bootstrap_script = 'gs://fake-bucket/fake-script.sh' runner._master_bootstrap_script_path = fake_bootstrap_script runner._upload_mgr.add(fake_bootstrap_script) runner._upload_mgr.add(_MAX_HOURS_IDLE_BOOTSTRAP_ACTION_PATH) cluster_id = runner._launch_cluster() cluster_body = runner._api_cluster_get(cluster_id) conf = cluster_body['config'] role_to_actual = dict( master=self._gce_instance_group_summary(conf['masterConfig']), core=self._gce_instance_group_summary(conf['workerConfig']), task=self._gce_instance_group_summary(conf.get('secondaryWorkerConfig')) ) role_to_expected = kwargs.copy() role_to_expected.setdefault('master', (1, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('core', (2, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('task', self._gce_instance_group_summary(dict())) self.assertEqual(role_to_actual, role_to_expected)