def test_no_temp_location(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() google_cloud_options = options.view_as(GoogleCloudOptions) google_cloud_options.staging_location = staging_dir self.update_options(options) google_cloud_options.temp_location = None with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertEqual('The --temp_location option must be specified.', cm.exception.message)
def test_requirements_file_not_present(self): staging_dir = tempfile.mkdtemp() with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_file = 'nosuchfile' dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--requirements_file command line option.' % 'nosuchfile')
def test_sdk_location_local_not_present(self): staging_dir = tempfile.mkdtemp() sdk_location = 'nosuchdir' with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location dependency.stage_job_resources(options) self.assertEqual( 'The file "%s" cannot be found. Its ' 'location was specified by the --sdk_location command-line option.' % sdk_location, cm.exception.message)
def test_with_extra_packages_missing_files(self): staging_dir = tempfile.mkdtemp() with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = ['nosuchfile.tar.gz'] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--extra_packages command line option.' % 'nosuchfile.tar.gz')
def test_with_extra_packages_missing_files(self): staging_dir = tempfile.mkdtemp() with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ 'nosuchfile.tar.gz' ] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--extra_packages command line option.' % 'nosuchfile.tar.gz')
def test_with_setup_file(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file( os.path.join(source_dir, 'setup.py'), 'notused') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = os.path.join( source_dir, 'setup.py') self.assertEqual( [dependency.WORKFLOW_TARBALL_FILE, names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources( options, # We replace the build setup command because a realistic one would # require the setuptools package to be installed. Note that we can't # use "touch" here to create the expected output tarball file, since # touch is not available on Windows, so we invoke python to produce # equivalent behavior. build_setup_args=[ 'python', '-c', 'open(__import__("sys").argv[1], "a")', os.path.join(source_dir, dependency.WORKFLOW_TARBALL_FILE)], temp_dir=source_dir)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.WORKFLOW_TARBALL_FILE)))
def test_with_setup_file(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file(os.path.join(source_dir, 'setup.py'), 'notused') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = os.path.join( source_dir, 'setup.py') self.assertEqual( [ dependency.WORKFLOW_TARBALL_FILE, names.PICKLED_MAIN_SESSION_FILE ], dependency.stage_job_resources( options, # We replace the build setup command because a realistic one would # require the setuptools package to be installed. Note that we can't # use "touch" here to create the expected output tarball file, since # touch is not available on Windows, so we invoke python to produce # equivalent behavior. build_setup_args=[ 'python', '-c', 'open(__import__("sys").argv[1], "a")', os.path.join(source_dir, dependency.WORKFLOW_TARBALL_FILE) ], temp_dir=source_dir)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.WORKFLOW_TARBALL_FILE)))
def create_job(self, job): """Submits for remote execution a job described by the workflow proto.""" # Stage job resources and add an environment proto with their paths. resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto # TODO(silviuc): Remove the debug logging eventually. logging.info('JOB: %s', job) request = dataflow.DataflowProjectsJobsCreateRequest() request.projectId = self.google_cloud_options.project request.job = job.proto try: response = self._client.projects_jobs.Create(request) except exceptions.BadStatusCodeError as e: logging.error('HTTP status %d trying to create job' ' at dataflow service endpoint %s', e.response.status, self.google_cloud_options.dataflow_endpoint) logging.fatal('details of server error: %s', e) raise logging.info('Create job: %s', response) # The response is a Job proto with the id for the new job. logging.info('Created job with id: [%s]', response.id) logging.info( 'To accesss the Dataflow monitoring console, please navigate to ' 'https://console.developers.google.com/project/%s/dataflow/job/%s', self.google_cloud_options.project, response.id) # Show the whitelisting warning. Projects should be whitelisted prior to # submitting jobs to Google Cloud Dataflow service. Please see documentation # for more information. # # TODO(altay): Remove once the whitelisting requirements are lifted. logging.warning( '\n\n***************************************************************\n' '* WARNING: PROJECT WHITELISTING REQUIRED. *' '\n***************************************************************\n' 'Please make sure your project is whitelisted for running\n' 'Python-based pipelines using the Google Cloud Dataflow service.\n\n' 'You may ignore this message if you have successfully ran\n' 'Python-based pipelines with this project on Google Cloud\n' 'Dataflow service before.\n\n' 'If your project is not whitelisted, your job will attempt to run\n' 'however it will fail to make any progress. Google Cloud Dataflow\n' 'service will automatically cancel your non-whitelisted job\n' 'after some time due to inactivity. You can also manually cancel\n' 'your job using the following command:\n\n' 'gcloud alpha dataflow jobs --project=%s cancel %s\n\n' 'Please refer to the documentation to learn more about whitelisting\n' 'your project at: %s' '\n***************************************************************\n\n', request.projectId, response.id, 'http://goo.gl/forms/o4w14whz9x' ) return response
def test_with_extra_packages_invalid_file_name(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file(os.path.join(source_dir, 'abc.tgz'), 'nothing') with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tgz') ] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The --extra_packages option expects a full path ending with ' '\'.tar.gz\' instead of %s' % os.path.join(source_dir, 'abc.tgz'))
def test_with_extra_packages_invalid_file_name(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file( os.path.join(source_dir, 'abc.tgz'), 'nothing') with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tgz')] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The --extra_packages option expects a full path ending with ' '\'.tar.gz\' instead of %s' % os.path.join(source_dir, 'abc.tgz'))
def create_job(self, job): """Submits for remote execution a job described by the workflow proto.""" # Stage job resources and add an environment proto with their paths. resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto # TODO(silviuc): Remove the debug logging eventually. logging.info('JOB: %s', job) request = dataflow.DataflowProjectsJobsCreateRequest() request.projectId = self.google_cloud_options.project request.job = job.proto try: response = self._client.projects_jobs.Create(request) except exceptions.BadStatusCodeError as e: logging.error( 'HTTP status %d trying to create job' ' at dataflow service endpoint %s', e.response.status, self.google_cloud_options.dataflow_endpoint) logging.fatal('details of server error: %s', e) raise logging.info('Create job: %s', response) # The response is a Job proto with the id for the new job. logging.info('Created job with id: [%s]', response.id) logging.info( 'To accesss the Dataflow monitoring console, please navigate to ' 'https://console.developers.google.com/project/%s/dataflow/job/%s', self.google_cloud_options.project, response.id) # Show the whitelisting warning. Projects should be whitelisted prior to # submitting jobs to Google Cloud Dataflow service. Please see documentation # for more information. # # TODO(altay): Remove once the whitelisting requirements are lifted. logging.warning( '\n\n***************************************************************\n' '* WARNING: PROJECT WHITELISTING REQUIRED. *' '\n***************************************************************\n' 'Please make sure your project is whitelisted for running\n' 'Python-based pipelines using the Google Cloud Dataflow service.\n\n' 'You may ignore this message if you have successfully ran\n' 'Python-based pipelines with this project on Google Cloud\n' 'Dataflow service before.\n\n' 'If your project is not whitelisted, your job will attempt to run\n' 'however it will fail to make any progress. Google Cloud Dataflow\n' 'service will automatically cancel your non-whitelisted job\n' 'after some time due to inactivity. You can also manually cancel\n' 'your job using the following command:\n\n' 'gcloud alpha dataflow jobs --project=%s cancel %s\n\n' 'Please refer to the documentation to learn more about whitelisting\n' 'your project at: %s' '\n***************************************************************\n\n', request.projectId, response.id, 'http://goo.gl/forms/o4w14whz9x') return response
def create_job(self, job): """Submits for remote execution a job described by the workflow proto.""" # Checks the whitelisting status of this account. This is just an early # courtesy check to show a warning in case of potential whitelisting errors. # It will not block job submission. Jobs submitted from non-whitelisted # projects will fail to download required files, make no progress and fail # eventually. # # This check will provide a false warning if a project is whitelisted but # not the current user. In that case job will still execute successfully # in the service. # # TODO(altay): Remove once the whitelisting requirements are lifted. try: request = storage.StorageObjectsListRequest( bucket='dataflow-python-docker') self._storage_client.objects.List(request) except exceptions.HttpError as e: if e.status_code == 403: logging.error( '\n*************************************************************\n' 'This account is not whitelisted to run Python-based pipelines ' 'using the Google Cloud Dataflow service. ' 'Make sure that your project is whitelisted before submitting your ' 'job. \nPlease see documentation for getting more information on ' 'getting your project whitelisted.' '\n*************************************************************\n') else: logging.warning('Could not verify whitelisting status.') # Stage job resources and add an environment proto with their paths. resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto # TODO(silviuc): Remove the debug logging eventually. logging.info('JOB: %s', job) request = dataflow.DataflowProjectsJobsCreateRequest() request.projectId = self.google_cloud_options.project request.job = job.proto try: response = self._client.projects_jobs.Create(request) except exceptions.BadStatusCodeError as e: logging.error('HTTP status %d trying to create job' ' at dataflow service endpoint %s', e.response.status, self.google_cloud_options.dataflow_endpoint) logging.fatal('details of server error: %s', e) raise logging.info('Create job: %s', response) # The response is a Job proto with the id for the new job. logging.info('Created job with id: [%s]', response.id) return response
def test_setup_file_not_named_setup_dot_py(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = (os.path.join( source_dir, 'xyz-setup.py')) self.create_temp_file(os.path.join(source_dir, 'xyz-setup.py'), 'notused') with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertTrue( cm.exception.message.startswith( 'The --setup_file option expects the full path to a file named ' 'setup.py instead of '))
def test_no_main_session(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = False self.update_options(options) self.assertEqual([], dependency.stage_job_resources(options))
def test_setup_file_not_named_setup_dot_py(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = ( os.path.join(source_dir, 'xyz-setup.py')) self.create_temp_file( os.path.join(source_dir, 'xyz-setup.py'), 'notused') with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertTrue( cm.exception.message.startswith( 'The --setup_file option expects the full path to a file named ' 'setup.py instead of '))
def test_no_main_session(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = False self.update_options(options) self.assertEqual( [], dependency.stage_job_resources(options))
def test_default_resources(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) self.assertEqual([names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, names.PICKLED_MAIN_SESSION_FILE)))
def test_default_resources(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, names.PICKLED_MAIN_SESSION_FILE)))
def test_sdk_location_gcs(self): staging_dir = tempfile.mkdtemp() sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz' self.override_file_copy(sdk_location, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_sdk_location_default(self): staging_dir = tempfile.mkdtemp() expected_from_url = '%s/v%s.tar.gz' % (dependency.PACKAGES_URL_PREFIX, __version__) expected_from_path = self.override_file_download( expected_from_url, staging_dir) self.override_file_copy(expected_from_path, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources( options, file_copy=dependency._dependency_file_copy))
def test_with_extra_packages(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file(os.path.join(source_dir, 'abc.tar.gz'), 'nothing') self.create_temp_file(os.path.join(source_dir, 'xyz.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, dependency.EXTRA_PACKAGES_FILE), 'nothing') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tar.gz'), os.path.join(source_dir, 'xyz.tar.gz'), 'gs://my-gcs-bucket/gcs.tar.gz' ] gcs_copied_files = [] def file_copy(from_path, to_path): if from_path.startswith('gs://'): gcs_copied_files.append(from_path) _, from_name = os.path.split(from_path) self.create_temp_file(os.path.join(to_path, from_name), 'nothing') logging.info('Fake copied GCS file: %s to %s', from_path, to_path) elif to_path.startswith('gs://'): logging.info('Faking file_copy(%s, %s)', from_path, to_path) else: shutil.copyfile(from_path, to_path) dependency._dependency_file_copy = file_copy self.assertEqual([ 'abc.tar.gz', 'xyz.tar.gz', 'gcs.tar.gz', dependency.EXTRA_PACKAGES_FILE, names.PICKLED_MAIN_SESSION_FILE ], dependency.stage_job_resources(options)) with open(os.path.join(staging_dir, dependency.EXTRA_PACKAGES_FILE)) as f: self.assertEqual(['abc.tar.gz\n', 'xyz.tar.gz\n', 'gcs.tar.gz\n'], f.readlines()) self.assertEqual(['gs://my-gcs-bucket/gcs.tar.gz'], gcs_copied_files)
def test_with_requirements_file(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_file = os.path.join( source_dir, dependency.REQUIREMENTS_FILE) self.create_temp_file( os.path.join(source_dir, dependency.REQUIREMENTS_FILE), 'nothing') self.assertEqual( [dependency.REQUIREMENTS_FILE, names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.REQUIREMENTS_FILE)))
def test_sdk_location_gcs(self): staging_dir = tempfile.mkdtemp() sdk_location = 'gs://my-gcs-bucket' expected_from_path = utils.path.join( sdk_location, 'google-cloud-dataflow-python-sdk-%s.tgz' % __version__) self.override_file_copy(expected_from_path, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_sdk_location_local(self): staging_dir = tempfile.mkdtemp() sdk_location = tempfile.mkdtemp() self.create_temp_file( os.path.join(sdk_location, names.DATAFLOW_SDK_TARBALL_FILE), 'contents') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join(staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'contents')
def test_sdk_location_default(self): staging_dir = tempfile.mkdtemp() expected_from_url = '%s/v%s.tar.gz' % ( dependency.PACKAGES_URL_PREFIX, __version__) expected_from_path = self.override_file_download( expected_from_url, staging_dir) self.override_file_copy(expected_from_path, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources( options, file_copy=dependency._dependency_file_copy))
def test_with_extra_packages(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file( os.path.join(source_dir, 'abc.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, 'xyz.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, dependency.EXTRA_PACKAGES_FILE), 'nothing') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tar.gz'), os.path.join(source_dir, 'xyz.tar.gz'), 'gs://my-gcs-bucket/gcs.tar.gz'] gcs_copied_files = [] def file_copy(from_path, to_path): if from_path.startswith('gs://'): gcs_copied_files.append(from_path) _, from_name = os.path.split(from_path) self.create_temp_file(os.path.join(to_path, from_name), 'nothing') logging.info('Fake copied GCS file: %s to %s', from_path, to_path) elif to_path.startswith('gs://'): logging.info('Faking file_copy(%s, %s)', from_path, to_path) else: shutil.copyfile(from_path, to_path) dependency._dependency_file_copy = file_copy self.assertEqual( ['abc.tar.gz', 'xyz.tar.gz', 'gcs.tar.gz', dependency.EXTRA_PACKAGES_FILE, names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) with open(os.path.join(staging_dir, dependency.EXTRA_PACKAGES_FILE)) as f: self.assertEqual(['abc.tar.gz\n', 'xyz.tar.gz\n', 'gcs.tar.gz\n'], f.readlines()) self.assertEqual(['gs://my-gcs-bucket/gcs.tar.gz'], gcs_copied_files)
def test_sdk_location_local(self): staging_dir = tempfile.mkdtemp() sdk_location = tempfile.mkdtemp() self.create_temp_file( os.path.join( sdk_location, names.DATAFLOW_SDK_TARBALL_FILE), 'contents') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE, names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join( staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'contents')
def test_with_extra_packages(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file( os.path.join(source_dir, 'abc.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, 'xyz.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, dependency.EXTRA_PACKAGES_FILE), 'nothing') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tar.gz'), os.path.join(source_dir, 'xyz.tar.gz')] self.assertEqual( ['abc.tar.gz', 'xyz.tar.gz', dependency.EXTRA_PACKAGES_FILE, names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) with open(os.path.join(staging_dir, dependency.EXTRA_PACKAGES_FILE)) as f: self.assertEqual(['abc.tar.gz\n', 'xyz.tar.gz\n'], f.readlines())
def test_with_requirements_file_and_cache(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_file = os.path.join( source_dir, dependency.REQUIREMENTS_FILE) options.view_as(SetupOptions).requirements_cache = os.path.join( tempfile.gettempdir(), 'alternative-cache-dir') self.create_temp_file( os.path.join(source_dir, dependency.REQUIREMENTS_FILE), 'nothing') self.assertEqual( sorted([dependency.REQUIREMENTS_FILE, names.PICKLED_MAIN_SESSION_FILE, 'abc.txt', 'def.txt']), sorted(dependency.stage_job_resources( options, populate_requirements_cache=self.populate_requirements_cache))) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.REQUIREMENTS_FILE))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'abc.txt'))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'def.txt')))
def test_no_staging_location(self): with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(PipelineOptions()) self.assertEqual('The --staging_location option must be specified.', cm.exception.message)