def assert_new_tmp_bucket(self, location, **runner_kwargs): """Assert that if we create an DataprocJobRunner with the given keyword args, it'll create a new tmp bucket with the given location constraint. """ bucket_cache = self._gcs_client._cache_buckets existing_buckets = set(bucket_cache.keys()) runner = DataprocJobRunner(conf_paths=[], **runner_kwargs) bucket_name, path = parse_gcs_uri(runner._cloud_tmp_dir) runner._create_fs_tmp_bucket(bucket_name, location=location) self.assertTrue(bucket_name.startswith('mrjob-')) self.assertNotIn(bucket_name, existing_buckets) self.assertEqual(path, 'tmp/') current_bucket = bucket_cache[bucket_name] self.assertEqual(current_bucket['location'], location) # Verify that we setup bucket lifecycle rules of 28-day retention first_lifecycle_rule = current_bucket['lifecycle']['rule'][0] self.assertEqual(first_lifecycle_rule['action'], dict(type='Delete')) self.assertEqual(first_lifecycle_rule['condition'], dict(age=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS))
def test_attach_to_existing_cluster(self): runner = DataprocJobRunner(conf_paths=[]) cluster_body = runner.api_client.cluster_create() cluster_id = cluster_body['clusterName'] stdin = BytesIO(b'foo\nbar\n') mr_job = MRTwoStepJob( ['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() # Generate fake output self.put_job_output_parts(runner, [b'1\t"bar"\n1\t"foo"\n2\tnull\n']) # Issue 182: don't create the bootstrap script when # attaching to another cluster self.assertIsNone(runner._master_bootstrap_script_path) results.extend(mr_job.parse_output(runner.cat_output())) self.assertEqual(sorted(results), [(1, 'bar'), (1, 'foo'), (2, None)])
def test_attach_to_existing_cluster(self): runner = DataprocJobRunner(conf_paths=[]) cluster_body = runner.api_client.cluster_create() cluster_id = cluster_body['clusterName'] stdin = BytesIO(b'foo\nbar\n') mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() # Generate fake output self.put_job_output_parts(runner, [ b'1\t"bar"\n1\t"foo"\n2\tnull\n' ]) # Issue 182: don't create the bootstrap script when # attaching to another cluster self.assertIsNone(runner._master_bootstrap_script_path) for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) results.append((key, value)) self.assertEqual(sorted(results), [(1, 'bar'), (1, 'foo'), (2, None)])
def test_no_bootstrap_script_if_not_needed(self): runner = DataprocJobRunner(conf_paths=[], bootstrap_mrjob=False, bootstrap_python=False) runner._add_bootstrap_files_for_upload() self.assertIsNone(runner._master_bootstrap_script_path)
def test_dont_take_down_cluster_on_failure(self): runner1 = DataprocJobRunner(conf_paths=[]) runner1._launch_cluster() cluster_id = runner1._cluster_id mr_job = MRTwoStepJob(['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox() self.mock_jobs_succeed = False with mr_job.make_runner() as runner2: self.assertIsInstance(runner2, DataprocJobRunner) with logger_disabled('mrjob.dataproc'): self.assertRaises(StepFailedException, runner2.run) cluster2 = runner2._get_cluster(runner2._cluster_id) self.assertEqual(_cluster_state_name(cluster2.status.state), 'RUNNING') # job shouldn't get terminated by cleanup cluster1 = runner1._get_cluster(runner1._cluster_id) self.assertEqual(_cluster_state_name(cluster1.status.state), 'RUNNING')
def _test_instance_groups(self, opts, **kwargs): """Run a job with the given option dictionary, and check for for instance, number, and optional bid price for each instance role. Specify expected instance group info like: <role>=(num_instances, instance_type, bid_price) """ runner = DataprocJobRunner(**opts) # cluster_body = runner.api_client.cluster_create() fake_bootstrap_script = 'gs://fake-bucket/fake-script.sh' runner._master_bootstrap_script_path = fake_bootstrap_script runner._upload_mgr.add(fake_bootstrap_script) runner._upload_mgr.add(_MAX_MINS_IDLE_BOOTSTRAP_ACTION_PATH) cluster_id = runner._launch_cluster() cluster = runner._get_cluster(cluster_id) conf = cluster.config role_to_actual = dict( master=self._gce_instance_group_summary(conf.master_config), core=self._gce_instance_group_summary(conf.worker_config), task=self._gce_instance_group_summary(conf.secondary_worker_config) ) role_to_expected = kwargs.copy() role_to_expected.setdefault('master', (1, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('core', (2, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault( 'task', self._gce_instance_group_summary(dict())) self.assertEqual(role_to_actual, role_to_expected)
def _test_instance_groups(self, opts, **kwargs): """Run a job with the given option dictionary, and check for for instance, number, and optional bid price for each instance role. Specify expected instance group info like: <role>=(num_instances, instance_type, bid_price) """ runner = DataprocJobRunner(**opts) # cluster_body = runner.api_client.cluster_create() fake_bootstrap_script = 'gs://fake-bucket/fake-script.sh' runner._master_bootstrap_script_path = fake_bootstrap_script runner._upload_mgr.add(fake_bootstrap_script) runner._upload_mgr.add(_MAX_HOURS_IDLE_BOOTSTRAP_ACTION_PATH) cluster_id = runner._launch_cluster() cluster_body = runner._api_cluster_get(cluster_id) conf = cluster_body['config'] role_to_actual = dict( master=self._gce_instance_group_summary(conf['masterConfig']), core=self._gce_instance_group_summary(conf['workerConfig']), task=self._gce_instance_group_summary(conf.get('secondaryWorkerConfig')) ) role_to_expected = kwargs.copy() role_to_expected.setdefault('master', (1, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('core', (2, DEFAULT_GCE_INSTANCE)) role_to_expected.setdefault('task', self._gce_instance_group_summary(dict())) self.assertEqual(role_to_actual, role_to_expected)
def assert_new_tmp_bucket(self, location, **runner_kwargs): """Assert that if we create an DataprocJobRunner with the given keyword args, it'll create a new tmp bucket with the given location constraint. """ existing_buckets = set(self.mock_gcs_fs) runner = DataprocJobRunner(conf_paths=[], **runner_kwargs) bucket_name, path = parse_gcs_uri(runner._cloud_tmp_dir) runner._create_fs_tmp_bucket(bucket_name, location=location) self.assertTrue(bucket_name.startswith('mrjob-')) self.assertNotIn(bucket_name, existing_buckets) self.assertEqual(path, 'tmp/') current_bucket = runner.fs.get_bucket(bucket_name) self.assertEqual(current_bucket.location, location.upper()) # Verify that we setup bucket lifecycle rules of 28-day retention first_lifecycle_rule = current_bucket.lifecycle_rules[0] self.assertEqual(first_lifecycle_rule['action'], dict(type='Delete')) self.assertEqual(first_lifecycle_rule['condition'], dict(age=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS))
def setUp(self): super(UpdateStepInterpretationTestCase, self).setUp() self.runner = DataprocJobRunner() self.get_lines = self.start( patch( 'mrjob.dataproc.DataprocJobRunner._get_new_driver_output_lines', return_value=[])) self.step_interpretation = {}
def test_cross_region_explicit_tmp_uri(self): self._make_bucket('walrus', EU_WEST_GCE_REGION) runner = DataprocJobRunner(region=US_EAST_GCE_REGION, cloud_tmp_dir='gs://walrus/tmp/') self.assertEqual(runner._cloud_tmp_dir, 'gs://walrus/tmp/') # tmp bucket shouldn't influence region (it did in 0.4.x) self.assertEqual(runner._region(), US_EAST_GCE_REGION)
def _test_mode(self, mode): r = DataprocJobRunner(conf_paths=[]) with patch.multiple(r, _cleanup_cluster=mock.DEFAULT, _cleanup_job=mock.DEFAULT, _cleanup_local_tmp=mock.DEFAULT, _cleanup_logs=mock.DEFAULT, _cleanup_cloud_tmp=mock.DEFAULT) as mock_dict: r.cleanup(mode=mode) yield mock_dict
def test_bootstrap_mrjob_uses_python_bin(self): # use all the bootstrap options runner = DataprocJobRunner(conf_paths=[], bootstrap_mrjob=True, python_bin=["anaconda"]) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) with open(runner._master_bootstrap_script_path, "r") as f: content = f.read() self.assertIn("sudo anaconda -m compileall -q -f", content)
def test_bootstrap_script_respects_sh_bin(self): runner = DataprocJobRunner(conf_paths=[]) self.start(patch('mrjob.dataproc.DataprocJobRunner._sh_bin', return_value=['/bin/bash'])) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) with open(runner._master_bootstrap_script_path) as f: lines = list(f) self.assertEqual(lines[0].strip(), '#!/bin/bash')
def test_bootstrap_script_respects_sh_pre_commands(self): runner = DataprocJobRunner(conf_paths=[]) self.start(patch('mrjob.dataproc.DataprocJobRunner._sh_pre_commands', return_value=['garply', 'quux'])) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) with open(runner._master_bootstrap_script_path) as f: lines = list(f) self.assertEqual([line.strip() for line in lines[1:3]], ['garply', 'quux'])
def test_usr_bin_env(self): runner = DataprocJobRunner(conf_paths=[], bootstrap_mrjob=True, sh_bin="bash -e") runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) self.assertTrue(os.path.exists(runner._master_bootstrap_script_path)) with open(runner._master_bootstrap_script_path) as f: lines = [line.rstrip() for line in f] self.assertEqual(lines[0], "#!/usr/bin/env bash -e")
def test_bootstrap_mrjob_uses_python_bin(self): # use all the bootstrap options runner = DataprocJobRunner(conf_paths=[], bootstrap_mrjob=True, python_bin=['anaconda']) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) with open(runner._master_bootstrap_script_path, 'r') as f: content = f.read() self.assertIn('sudo anaconda -m compileall -q -f', content)
def test_usr_bin_env(self): runner = DataprocJobRunner(conf_paths=[], bootstrap_mrjob=True, sh_bin='bash -e') runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) self.assertTrue(os.path.exists(runner._master_bootstrap_script_path)) with open(runner._master_bootstrap_script_path) as f: lines = [line.rstrip() for line in f] self.assertEqual(lines[0], '#!/usr/bin/env bash -e')
def make_runner(self): """Make a runner based on command-line arguments, so we can launch this job on EMR, on Hadoop, or locally. :rtype: :py:class:`mrjob.runner.MRJobRunner` """ if self.options.runner == 'emr': # avoid requiring dependencies (such as boto3) for other runners from mrjob.emr import EMRJobRunner return EMRJobRunner(**self.emr_job_runner_kwargs()) elif self.options.runner == 'dataproc': from mrjob.dataproc import DataprocJobRunner return DataprocJobRunner(**self.dataproc_job_runner_kwargs()) elif self.options.runner == 'hadoop': from mrjob.hadoop import HadoopJobRunner return HadoopJobRunner(**self.hadoop_job_runner_kwargs()) elif self.options.runner == 'inline': raise ValueError("inline is not supported in the multi-lingual" " launcher.") else: # run locally by default from mrjob.local import LocalMRJobRunner return LocalMRJobRunner(**self.local_job_runner_kwargs())
def test_dont_take_down_cluster_on_failure(self): runner = DataprocJobRunner(conf_paths=[]) cluster_body = runner.api_client.cluster_create() cluster_id = cluster_body['clusterName'] mr_job = MRTwoStepJob( ['-r', 'dataproc', '-v', '--cluster-id', cluster_id]) mr_job.sandbox() self._dataproc_client.job_get_advances_states = (collections.deque( ['SETUP_DONE', 'RUNNING', 'ERROR'])) with mr_job.make_runner() as runner: self.assertIsInstance(runner, DataprocJobRunner) with logger_disabled('mrjob.dataproc'): self.assertRaises(StepFailedException, runner.run) cluster = self.get_cluster_from_runner(runner, cluster_id) cluster_state = self._dataproc_client.get_state(cluster) self.assertEqual(cluster_state, 'RUNNING') # job shouldn't get terminated by cleanup cluster = ( self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id]) cluster_state = self._dataproc_client.get_state(cluster) self.assertEqual(cluster_state, 'RUNNING')
def test_zone_beats_region(self): runner = DataprocJobRunner(region='europe-west1', zone='europe-west1-a') self.assertTrue(self.log.warning.called) self.assertEqual(runner._opts['region'], None) self.assertEqual(runner._opts['zone'], 'europe-west1-a')
def test_gcs_cat(self): self.put_gcs_multi({ 'gs://walrus/one': b'one_text', 'gs://walrus/two': b'two_text', 'gs://walrus/three': b'three_text', }) runner = DataprocJobRunner(cloud_tmp_dir='gs://walrus/tmp', conf_paths=[]) self.assertEqual(list(runner.fs.cat('gs://walrus/one')), [b'one_text'])
def test_command_line_beats_config(self): ZONE_CONF = dict(runners=dict(dataproc=dict(zone='us-west1-a'))) with mrjob_conf_patcher(ZONE_CONF): runner = DataprocJobRunner(region='europe-west1') # region takes precedence because it was set on the command line self.assertEqual(runner._opts['region'], 'europe-west1') self.assertEqual(runner._opts['zone'], None) # only a problem if you set region and zone # in the same config self.assertFalse(self.log.warning.called)
def test_create_master_bootstrap_script(self): # create a fake src tarball foo_py_path = os.path.join(self.tmp_dir, 'foo.py') with open(foo_py_path, 'w'): pass # use all the bootstrap options runner = DataprocJobRunner(conf_paths=[], bootstrap=[ PYTHON_BIN + ' ' + foo_py_path + '#bar.py', 'gs://walrus/scripts/ohnoes.sh#', # bootstrap_cmds 'echo "Hi!"', 'true', 'ls', # bootstrap_scripts 'speedups.sh', '/tmp/s.sh' ], bootstrap_mrjob=True) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) self.assertTrue(os.path.exists(runner._master_bootstrap_script_path)) with open(runner._master_bootstrap_script_path) as f: lines = [line.rstrip() for line in f] self.assertEqual(lines[0], '#!/bin/sh -ex') # check PWD gets stored self.assertIn('__mrjob_PWD=$PWD', lines) def assertScriptDownloads(path, name=None): uri = runner._upload_mgr.uri(path) name = runner._bootstrap_dir_mgr.name('file', path, name=name) self.assertIn( 'hadoop fs -copyToLocal %s $__mrjob_PWD/%s' % (uri, name), lines) self.assertIn( 'chmod a+x $__mrjob_PWD/%s' % (name,), lines) # check files get downloaded assertScriptDownloads(foo_py_path, 'bar.py') assertScriptDownloads('gs://walrus/scripts/ohnoes.sh') assertScriptDownloads(runner._mrjob_tar_gz_path) # check scripts get run # bootstrap self.assertIn(PYTHON_BIN + ' $__mrjob_PWD/bar.py', lines) self.assertIn('$__mrjob_PWD/ohnoes.sh', lines) self.assertIn('echo "Hi!"', lines) self.assertIn('true', lines) self.assertIn('ls', lines) self.assertIn('speedups.sh', lines) self.assertIn('/tmp/s.sh', lines) # bootstrap_mrjob mrjob_tar_gz_name = runner._bootstrap_dir_mgr.name( 'file', runner._mrjob_tar_gz_path) self.assertIn("__mrjob_PYTHON_LIB=$(" + PYTHON_BIN + " -c 'from" " distutils.sysconfig import get_python_lib;" " print(get_python_lib())')", lines) self.assertIn('sudo tar xfz $__mrjob_PWD/' + mrjob_tar_gz_name + ' -C $__mrjob_PYTHON_LIB', lines) self.assertIn('sudo ' + PYTHON_BIN + ' -m compileall -f' ' $__mrjob_PYTHON_LIB/mrjob && true', lines) # bootstrap_python if PY2: self.assertIn('sudo apt-get install -y python-pip python-dev', lines) else: self.assertIn('sudo apt-get install -y python3 python3-pip python3-dev', lines)
def test_create_master_bootstrap_script(self): # create a fake src tarball foo_py_path = os.path.join(self.tmp_dir, 'foo.py') with open(foo_py_path, 'w'): pass runner = DataprocJobRunner( conf_paths=[], bootstrap=[ PYTHON_BIN + ' ' + foo_py_path + '#bar.py', 'gs://walrus/scripts/ohnoes.sh#', 'echo "Hi!"', 'true', 'ls', 'speedups.sh', '/tmp/s.sh' ], bootstrap_mrjob=True) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) self.assertTrue(os.path.exists(runner._master_bootstrap_script_path)) with open(runner._master_bootstrap_script_path) as f: lines = [line.rstrip() for line in f] self.assertEqual(lines[0], '#!/bin/sh -ex') # check PWD gets stored self.assertIn('__mrjob_PWD=$PWD', lines) def assertScriptDownloads(path, name=None): uri = runner._upload_mgr.uri(path) name = runner._bootstrap_dir_mgr.name('file', path, name=name) self.assertIn( ' hadoop fs -copyToLocal %s $__mrjob_PWD/%s' % (uri, name), lines) self.assertIn(' chmod u+rx $__mrjob_PWD/%s' % (name, ), lines) # check files get downloaded assertScriptDownloads(foo_py_path, 'bar.py') assertScriptDownloads('gs://walrus/scripts/ohnoes.sh') assertScriptDownloads(runner._mrjob_zip_path) # check scripts get run # bootstrap self.assertIn(' ' + PYTHON_BIN + ' $__mrjob_PWD/bar.py', lines) self.assertIn(' $__mrjob_PWD/ohnoes.sh', lines) self.assertIn(' echo "Hi!"', lines) self.assertIn(' true', lines) self.assertIn(' ls', lines) self.assertIn(' speedups.sh', lines) self.assertIn(' /tmp/s.sh', lines) # bootstrap_mrjob mrjob_zip_name = runner._bootstrap_dir_mgr.name( 'file', runner._mrjob_zip_path) self.assertIn( " __mrjob_PYTHON_LIB=$(" + PYTHON_BIN + " -c 'from" " distutils.sysconfig import get_python_lib;" " print(get_python_lib())')", lines) self.assertIn( ' sudo unzip $__mrjob_PWD/' + mrjob_zip_name + ' -d $__mrjob_PYTHON_LIB', lines) self.assertIn( ' sudo ' + PYTHON_BIN + ' -m compileall -q -f' ' $__mrjob_PYTHON_LIB/mrjob && true', lines) # bootstrap_python if PY2: self.assertIn(' sudo apt-get install -y python-pip python-dev', lines) else: self.assertIn( ' sudo apt-get install -y python3 python3-pip python3-dev', lines)
def test_explicit_tmp_uri(self): self._make_bucket('walrus', US_EAST_GCE_REGION) runner = DataprocJobRunner(cloud_tmp_dir='gs://walrus/tmp/') self.assertEqual(runner._cloud_tmp_dir, 'gs://walrus/tmp/')
def test_reuse_mrjob_bucket_in_same_region(self): self._make_bucket('mrjob-1', DEFAULT_GCE_REGION) runner = DataprocJobRunner() self.assertEqual(runner._cloud_tmp_dir, 'gs://mrjob-1/tmp/')
def test_cannot_be_empty(self): runner = DataprocJobRunner(region='') self.assertEqual(runner._gce_region, 'us-central1')
def _quick_runner(self): r = DataprocJobRunner(conf_paths=[]) r._cluster_id = 'j-ESSEOWENS' r._ran_job = False return r
def test_default(self): runner = DataprocJobRunner() self.assertEqual(runner._opts['region'], 'us-west1') self.assertEqual(runner._opts['zone'], None) self.assertFalse(self.log.warning.called)
def test_explicit_zone(self): runner = DataprocJobRunner(zone='europe-west1-a') self.assertEqual(runner._opts['zone'], 'europe-west1-a')
def test_region_from_environment(self): with save_current_environment(): os.environ['CLOUDSDK_COMPUTE_REGION'] = 'us-east1' runner = DataprocJobRunner() self.assertEqual(runner._opts['region'], 'us-east1')
def test_explicit_region_beats_environment(self): with save_current_environment(): os.environ['CLOUDSDK_COMPUTE_REGION'] = 'us-east1' runner = DataprocJobRunner(region='europe-west1-a') self.assertEqual(runner._opts['region'], 'europe-west1-a')
def test_zone_from_environment(self): with save_current_environment(): os.environ['CLOUDSDK_COMPUTE_ZONE'] = 'us-west1-b' runner = DataprocJobRunner() self.assertEqual(runner._opts['zone'], 'us-west1-b')
def test_create_master_bootstrap_script(self): # create a fake src tarball foo_py_path = os.path.join(self.tmp_dir, "foo.py") with open(foo_py_path, "w"): pass # use all the bootstrap options runner = DataprocJobRunner( conf_paths=[], bootstrap=[ PYTHON_BIN + " " + foo_py_path + "#bar.py", "gs://walrus/scripts/ohnoes.sh#", # bootstrap_cmds 'echo "Hi!"', "true", "ls", # bootstrap_scripts "speedups.sh", "/tmp/s.sh", ], bootstrap_mrjob=True, ) runner._add_bootstrap_files_for_upload() self.assertIsNotNone(runner._master_bootstrap_script_path) self.assertTrue(os.path.exists(runner._master_bootstrap_script_path)) with open(runner._master_bootstrap_script_path) as f: lines = [line.rstrip() for line in f] self.assertEqual(lines[0], "#!/bin/sh -ex") # check PWD gets stored self.assertIn("__mrjob_PWD=$PWD", lines) def assertScriptDownloads(path, name=None): uri = runner._upload_mgr.uri(path) name = runner._bootstrap_dir_mgr.name("file", path, name=name) self.assertIn("hadoop fs -copyToLocal %s $__mrjob_PWD/%s" % (uri, name), lines) self.assertIn("chmod a+x $__mrjob_PWD/%s" % (name,), lines) # check files get downloaded assertScriptDownloads(foo_py_path, "bar.py") assertScriptDownloads("gs://walrus/scripts/ohnoes.sh") assertScriptDownloads(runner._mrjob_zip_path) # check scripts get run # bootstrap self.assertIn(PYTHON_BIN + " $__mrjob_PWD/bar.py", lines) self.assertIn("$__mrjob_PWD/ohnoes.sh", lines) self.assertIn('echo "Hi!"', lines) self.assertIn("true", lines) self.assertIn("ls", lines) self.assertIn("speedups.sh", lines) self.assertIn("/tmp/s.sh", lines) # bootstrap_mrjob mrjob_zip_name = runner._bootstrap_dir_mgr.name("file", runner._mrjob_zip_path) self.assertIn( "__mrjob_PYTHON_LIB=$(" + PYTHON_BIN + " -c 'from" " distutils.sysconfig import get_python_lib;" " print(get_python_lib())')", lines, ) self.assertIn("sudo unzip $__mrjob_PWD/" + mrjob_zip_name + " -d $__mrjob_PYTHON_LIB", lines) self.assertIn("sudo " + PYTHON_BIN + " -m compileall -q -f" " $__mrjob_PYTHON_LIB/mrjob && true", lines) # bootstrap_python if PY2: self.assertIn("sudo apt-get install -y python-pip python-dev", lines) else: self.assertIn("sudo apt-get install -y python3 python3-pip python3-dev", lines)
def test_default(self): runner = DataprocJobRunner() self.assertEqual(runner._gce_region, 'us-central1')
def test_explicit_region(self): runner = DataprocJobRunner(region='europe-west1') self.assertEqual(runner._gce_region, 'europe-west1')
def test_explicit_zone_beats_environment(self): with save_current_environment(): os.environ['CLOUDSDK_COMPUTE_ZONE'] = 'us-west1-b' runner = DataprocJobRunner(zone='europe-west1-a') self.assertEqual(runner._opts['zone'], 'europe-west1-a')