def _test_instance_groups(self, opts, **kwargs): """Run a job with the given option dictionary, and check for for instance, number, and optional bid price for each instance role. Specify expected instance group info like: <role>=(num_instances, instance_type, bid_price) """ runner = DataprocJobRunner(**opts) # cluster_body = runner.api_client.cluster_create() fake_bootstrap_script = 'gs://fake-bucket/fake-script.sh' runner._master_bootstrap_script_path = fake_bootstrap_script runner._upload_mgr.add(fake_bootstrap_script) runner._upload_mgr.add(_MAX_MINS_IDLE_BOOTSTRAP_ACTION_PATH) cluster_id = runner._launch_cluster() cluster = runner._get_cluster(cluster_id) conf = cluster.config role_to_actual = dict( master=self._gce_instance_group_summary(conf.master_config), core=self._gce_instance_group_summary(conf.worker_config), task=self._gce_instance_group_summary(conf.secondary_worker_config) ) role_to_expected = kwargs.copy() role_to_expected.setdefault('master', (1, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('core', (2, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault( 'task', self._gce_instance_group_summary(dict())) self.assertEqual(role_to_actual, role_to_expected)
def test_dont_take_down_cluster_on_failure(self): runner1 = DataprocJobRunner(conf_paths=[]) runner1._launch_cluster() cluster_id = runner1._cluster_id mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox() self.mock_jobs_succeed = False with mr_job.make_runner() as runner2: self.assertIsInstance(runner2, DataprocJobRunner) with logger_disabled('mrjob.dataproc'): self.assertRaises(StepFailedException, runner2.run) cluster2 = runner2._get_cluster(runner2._cluster_id) self.assertEqual(_cluster_state_name(cluster2.status.state), 'RUNNING') # job shouldn't get terminated by cleanup cluster1 = runner1._get_cluster(runner1._cluster_id) self.assertEqual(_cluster_state_name(cluster1.status.state), 'RUNNING')