def test_no_temp_location(self): staging_dir = self.make_temp_dir() options = PipelineOptions() google_cloud_options = options.view_as(GoogleCloudOptions) google_cloud_options.staging_location = staging_dir self.update_options(options) google_cloud_options.temp_location = None with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertEqual('The --temp_location option must be specified.', cm.exception.message)
def test_no_temp_location(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() google_cloud_options = options.view_as(GoogleCloudOptions) google_cloud_options.staging_location = staging_dir self.update_options(options) google_cloud_options.temp_location = None with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertEqual('The --temp_location option must be specified.', cm.exception.message)
def test_requirements_file_not_present(self): staging_dir = self.make_temp_dir() with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_file = 'nosuchfile' dependency.stage_job_resources( options, populate_requirements_cache=self.populate_requirements_cache) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--requirements_file command line option.' % 'nosuchfile')
def test_with_extra_packages_missing_files(self): staging_dir = self.make_temp_dir() with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = ['nosuchfile.tar.gz'] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--extra_packages command line option.' % 'nosuchfile.tar.gz')
def test_setup_file_not_present(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = 'nosuchfile' with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--setup_file command line option.' % 'nosuchfile')
def test_setup_file_not_present(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = 'nosuchfile' with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The file %s cannot be found. It was specified in the ' '--setup_file command line option.' % 'nosuchfile')
def test_sdk_location_local_not_present(self): staging_dir = tempfile.mkdtemp() sdk_location = 'nosuchdir' with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location dependency.stage_job_resources(options) self.assertEqual( 'The file "%s" cannot be found. Its ' 'location was specified by the --sdk_location command-line option.' % sdk_location, cm.exception.message)
def test_sdk_location_local_not_present(self): staging_dir = self.make_temp_dir() sdk_location = 'nosuchdir' with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location dependency.stage_job_resources(options) self.assertEqual( 'The file "%s" cannot be found. Its ' 'location was specified by the --sdk_location command-line option.' % sdk_location, cm.exception.message)
def test_default_resources(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) self.assertEqual([], dependency.stage_job_resources(options))
def test_sdk_location_http(self): staging_dir = self.make_temp_dir() sdk_location = 'http://storage.googleapis.com/my-gcs-bucket/tarball.tar.gz' options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location def file_download(_, to_folder): tarball_path = os.path.join(to_folder, 'sdk-tarball') with open(tarball_path, 'w') as f: f.write('Package content.') return tarball_path with mock.patch( 'apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_download', file_download): self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join(staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_with_requirements_file_and_cache(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_file = os.path.join( source_dir, dependency.REQUIREMENTS_FILE) options.view_as(SetupOptions).requirements_cache = os.path.join( tempfile.gettempdir(), 'alternative-cache-dir') self.create_temp_file( os.path.join(source_dir, dependency.REQUIREMENTS_FILE), 'nothing') self.assertEqual( sorted([dependency.REQUIREMENTS_FILE, 'abc.txt', 'def.txt']), sorted( dependency.stage_job_resources( options, populate_requirements_cache=self. populate_requirements_cache))) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.REQUIREMENTS_FILE))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'abc.txt'))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'def.txt')))
def test_sdk_location_http(self): staging_dir = self.make_temp_dir() sdk_location = 'http://storage.googleapis.com/my-gcs-bucket/tarball.tar.gz' options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location def file_download(_, to_folder): tarball_path = os.path.join(to_folder, 'sdk-tarball') with open(tarball_path, 'w') as f: f.write('Package content.') return tarball_path with mock.patch('apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_download', file_download): self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join( staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_with_setup_file(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file(os.path.join(source_dir, 'setup.py'), 'notused') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = os.path.join( source_dir, 'setup.py') self.assertEqual( [dependency.WORKFLOW_TARBALL_FILE], dependency.stage_job_resources( options, # We replace the build setup command because a realistic one would # require the setuptools package to be installed. Note that we can't # use "touch" here to create the expected output tarball file, since # touch is not available on Windows, so we invoke python to produce # equivalent behavior. build_setup_args=[ 'python', '-c', 'open(__import__("sys").argv[1], "a")', os.path.join(source_dir, dependency.WORKFLOW_TARBALL_FILE) ], temp_dir=source_dir)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.WORKFLOW_TARBALL_FILE)))
def test_with_extra_packages_invalid_file_name(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file(os.path.join(source_dir, 'abc.tgz'), 'nothing') with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tgz') ] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The --extra_package option expects a full path ending with ".tar" or ' '".tar.gz" instead of %s' % os.path.join(source_dir, 'abc.tgz'))
def test_with_requirements_file(self): try: staging_dir = tempfile.mkdtemp() requirements_cache_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).requirements_cache = requirements_cache_dir options.view_as(SetupOptions).requirements_file = os.path.join( source_dir, dependency.REQUIREMENTS_FILE) self.create_temp_file( os.path.join(source_dir, dependency.REQUIREMENTS_FILE), 'nothing') self.assertEqual( sorted([dependency.REQUIREMENTS_FILE, 'abc.txt', 'def.txt']), sorted(dependency.stage_job_resources( options, populate_requirements_cache=self.populate_requirements_cache))) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.REQUIREMENTS_FILE))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'abc.txt'))) self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'def.txt'))) finally: shutil.rmtree(staging_dir) shutil.rmtree(requirements_cache_dir) shutil.rmtree(source_dir)
def test_with_extra_packages_invalid_file_name(self): staging_dir = tempfile.mkdtemp() source_dir = tempfile.mkdtemp() self.create_temp_file( os.path.join(source_dir, 'abc.tgz'), 'nothing') with self.assertRaises(RuntimeError) as cm: options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tgz')] dependency.stage_job_resources(options) self.assertEqual( cm.exception.message, 'The --extra_package option expects a full path ending with ".tar" or ' '".tar.gz" instead of %s' % os.path.join(source_dir, 'abc.tgz'))
def test_with_setup_file(self): staging_dir = self.make_temp_dir() source_dir = self.make_temp_dir() self.create_temp_file( os.path.join(source_dir, 'setup.py'), 'notused') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = os.path.join( source_dir, 'setup.py') self.assertEqual( [dependency.WORKFLOW_TARBALL_FILE], dependency.stage_job_resources( options, # We replace the build setup command because a realistic one would # require the setuptools package to be installed. Note that we can't # use "touch" here to create the expected output tarball file, since # touch is not available on Windows, so we invoke python to produce # equivalent behavior. build_setup_args=[ 'python', '-c', 'open(__import__("sys").argv[1], "a")', os.path.join(source_dir, dependency.WORKFLOW_TARBALL_FILE)], temp_dir=source_dir)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, dependency.WORKFLOW_TARBALL_FILE)))
def create_job_description(self, job): """Creates a job described by the workflow proto.""" resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto logging.debug('JOB: %s', job)
def test_setup_file_not_named_setup_dot_py(self): staging_dir = self.make_temp_dir() source_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = (os.path.join( source_dir, 'xyz-setup.py')) self.create_temp_file(os.path.join(source_dir, 'xyz-setup.py'), 'notused') with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertTrue(cm.exception.args[0].startswith( 'The --setup_file option expects the full path to a file named ' 'setup.py instead of '))
def create_job_description(self, job): """Creates a job described by the workflow proto.""" resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto logging.debug('JOB: %s', job)
def test_default_resources(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) self.assertEqual( [], dependency.stage_job_resources(options))
def test_no_main_session(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = False self.update_options(options) self.assertEqual([], dependency.stage_job_resources(options))
def test_setup_file_not_named_setup_dot_py(self): staging_dir = self.make_temp_dir() source_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).setup_file = ( os.path.join(source_dir, 'xyz-setup.py')) self.create_temp_file( os.path.join(source_dir, 'xyz-setup.py'), 'notused') with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(options) self.assertTrue( cm.exception.message.startswith( 'The --setup_file option expects the full path to a file named ' 'setup.py instead of '))
def create_job_description(self, job): """Creates a job described by the workflow proto.""" resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto # TODO(silviuc): Remove the debug logging eventually. logging.info('JOB: %s', job)
def create_job_description(self, job): """Creates a job described by the workflow proto.""" resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( packages=resources, options=job.options, environment_version=self.environment_version).proto # TODO(silviuc): Remove the debug logging eventually. logging.info('JOB: %s', job)
def test_no_main_session(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = False self.update_options(options) self.assertEqual( [], dependency.stage_job_resources(options))
def test_with_extra_packages(self): staging_dir = self.make_temp_dir() source_dir = self.make_temp_dir() self.create_temp_file(os.path.join(source_dir, 'abc.tar.gz'), 'nothing') self.create_temp_file(os.path.join(source_dir, 'xyz.tar.gz'), 'nothing') self.create_temp_file(os.path.join(source_dir, 'xyz2.tar'), 'nothing') self.create_temp_file(os.path.join(source_dir, 'whl.whl'), 'nothing') self.create_temp_file( os.path.join(source_dir, dependency.EXTRA_PACKAGES_FILE), 'nothing') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tar.gz'), os.path.join(source_dir, 'xyz.tar.gz'), os.path.join(source_dir, 'xyz2.tar'), os.path.join(source_dir, 'whl.whl'), 'gs://my-gcs-bucket/gcs.tar.gz' ] gcs_copied_files = [] def file_copy(from_path, to_path): if from_path.startswith('gs://'): gcs_copied_files.append(from_path) _, from_name = os.path.split(from_path) if os.path.isdir(to_path): to_path = os.path.join(to_path, from_name) self.create_temp_file(to_path, 'nothing') logging.info('Fake copied GCS file: %s to %s', from_path, to_path) elif to_path.startswith('gs://'): logging.info('Faking file_copy(%s, %s)', from_path, to_path) else: shutil.copyfile(from_path, to_path) dependency._dependency_file_copy = file_copy self.assertEqual([ 'abc.tar.gz', 'xyz.tar.gz', 'xyz2.tar', 'whl.whl', 'gcs.tar.gz', dependency.EXTRA_PACKAGES_FILE ], dependency.stage_job_resources(options)) with open(os.path.join(staging_dir, dependency.EXTRA_PACKAGES_FILE)) as f: self.assertEqual([ 'abc.tar.gz\n', 'xyz.tar.gz\n', 'xyz2.tar\n', 'whl.whl\n', 'gcs.tar.gz\n' ], f.readlines()) self.assertEqual(['gs://my-gcs-bucket/gcs.tar.gz'], gcs_copied_files)
def test_sdk_location_gcs(self): staging_dir = tempfile.mkdtemp() sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz' self.override_file_copy(sdk_location, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_with_main_session(self): staging_dir = tempfile.mkdtemp() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = True self.update_options(options) self.assertEqual([names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, names.PICKLED_MAIN_SESSION_FILE)))
def test_sdk_location_gcs_source_file(self): staging_dir = self.make_temp_dir() sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz' options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location with mock.patch('apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_copy'): self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_sdk_location_gcs(self): staging_dir = self.make_temp_dir() sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz' self.override_file_copy(sdk_location, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_sdk_location_gcs_wheel_file(self): staging_dir = self.make_temp_dir() sdk_filename = 'apache_beam-1.0.0-cp27-cp27mu-manylinux1_x86_64.whl' sdk_location = 'gs://my-gcs-bucket/' + sdk_filename options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location with mock.patch('apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_copy'): self.assertEqual([sdk_filename], dependency.stage_job_resources(options))
def test_with_main_session(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir options.view_as(SetupOptions).save_main_session = True self.update_options(options) self.assertEqual( [names.PICKLED_MAIN_SESSION_FILE], dependency.stage_job_resources(options)) self.assertTrue( os.path.isfile( os.path.join(staging_dir, names.PICKLED_MAIN_SESSION_FILE)))
def test_sdk_location_gcs_source_file(self): staging_dir = self.make_temp_dir() sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz' options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location with mock.patch('apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_copy'): self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options))
def test_with_extra_packages(self): staging_dir = self.make_temp_dir() source_dir = self.make_temp_dir() self.create_temp_file( os.path.join(source_dir, 'abc.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, 'xyz.tar.gz'), 'nothing') self.create_temp_file( os.path.join(source_dir, 'xyz2.tar'), 'nothing') self.create_temp_file( os.path.join(source_dir, 'whl.whl'), 'nothing') self.create_temp_file( os.path.join(source_dir, dependency.EXTRA_PACKAGES_FILE), 'nothing') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).extra_packages = [ os.path.join(source_dir, 'abc.tar.gz'), os.path.join(source_dir, 'xyz.tar.gz'), os.path.join(source_dir, 'xyz2.tar'), os.path.join(source_dir, 'whl.whl'), 'gs://my-gcs-bucket/gcs.tar.gz'] gcs_copied_files = [] def file_copy(from_path, to_path): if from_path.startswith('gs://'): gcs_copied_files.append(from_path) _, from_name = os.path.split(from_path) if os.path.isdir(to_path): to_path = os.path.join(to_path, from_name) self.create_temp_file(to_path, 'nothing') logging.info('Fake copied GCS file: %s to %s', from_path, to_path) elif to_path.startswith('gs://'): logging.info('Faking file_copy(%s, %s)', from_path, to_path) else: shutil.copyfile(from_path, to_path) dependency._dependency_file_copy = file_copy self.assertEqual( ['abc.tar.gz', 'xyz.tar.gz', 'xyz2.tar', 'whl.whl', 'gcs.tar.gz', dependency.EXTRA_PACKAGES_FILE], dependency.stage_job_resources(options)) with open(os.path.join(staging_dir, dependency.EXTRA_PACKAGES_FILE)) as f: self.assertEqual(['abc.tar.gz\n', 'xyz.tar.gz\n', 'xyz2.tar\n', 'whl.whl\n', 'gcs.tar.gz\n'], f.readlines()) self.assertEqual(['gs://my-gcs-bucket/gcs.tar.gz'], gcs_copied_files)
def test_sdk_location_gcs_wheel_file(self): staging_dir = self.make_temp_dir() sdk_filename = 'apache_beam-1.0.0-cp27-cp27mu-manylinux1_x86_64.whl' sdk_location = 'gs://my-gcs-bucket/' + sdk_filename options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location with mock.patch('apache_beam.runners.dataflow.internal.' 'dependency._dependency_file_copy'): self.assertEqual( [sdk_filename], dependency.stage_job_resources(options))
def test_sdk_location_default(self): staging_dir = tempfile.mkdtemp() expected_from_url = 'pypi' expected_from_path = self.override_pypi_download( expected_from_url, staging_dir) self.override_file_copy(expected_from_path, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources( options, file_copy=dependency._dependency_file_copy))
def test_sdk_location_default(self): staging_dir = self.make_temp_dir() expected_from_url = 'pypi' expected_from_path = self.override_pypi_download( expected_from_url, staging_dir) self.override_file_copy(expected_from_path, staging_dir) options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources( options, file_copy=dependency._dependency_file_copy))
def test_sdk_location_local_wheel_file(self): staging_dir = self.make_temp_dir() sdk_directory = self.make_temp_dir() sdk_filename = 'apache_beam-1.0.0-cp27-cp27mu-manylinux1_x86_64.whl' sdk_location = os.path.join(sdk_directory, sdk_filename) self.create_temp_file(sdk_location, 'Package content.') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual([sdk_filename], dependency.stage_job_resources(options)) tarball_path = os.path.join(staging_dir, sdk_filename) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_local_source_file(self): staging_dir = self.make_temp_dir() sdk_directory = self.make_temp_dir() sdk_filename = 'apache-beam-3.0.0.tar.gz' sdk_location = os.path.join(sdk_directory, sdk_filename) self.create_temp_file(sdk_location, 'Package content.') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join(staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_local(self): staging_dir = tempfile.mkdtemp() sdk_location = tempfile.mkdtemp() self.create_temp_file( os.path.join(sdk_location, names.DATAFLOW_SDK_TARBALL_FILE), 'contents') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join(staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'contents')
def create_job_description(self, job): """Creates a job described by the workflow proto.""" # Stage the pipeline for the runner harness self.stage_file(job.google_cloud_options.staging_location, names.STAGED_PIPELINE_FILENAME, StringIO(job.proto_pipeline.SerializeToString())) # Stage other resources for the SDK harness resources = dependency.stage_job_resources( job.options, file_copy=self._gcs_file_copy) job.proto.environment = Environment( pipeline_url=FileSystems.join(job.google_cloud_options.staging_location, names.STAGED_PIPELINE_FILENAME), packages=resources, options=job.options, environment_version=self.environment_version).proto logging.debug('JOB: %s', job)
def test_sdk_location_local_source_file(self): staging_dir = self.make_temp_dir() sdk_directory = self.make_temp_dir() sdk_filename = 'apache-beam-3.0.0.tar.gz' sdk_location = os.path.join(sdk_directory, sdk_filename) self.create_temp_file(sdk_location, 'Package content.') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join( staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_local_wheel_file(self): staging_dir = self.make_temp_dir() sdk_directory = self.make_temp_dir() sdk_filename = 'apache_beam-1.0.0-cp27-cp27mu-manylinux1_x86_64.whl' sdk_location = os.path.join(sdk_directory, sdk_filename) self.create_temp_file(sdk_location, 'Package content.') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [sdk_filename], dependency.stage_job_resources(options)) tarball_path = os.path.join( staging_dir, sdk_filename) with open(tarball_path) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_default(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' with mock.patch( 'apache_beam.utils.processes.check_call', self.build_fake_pip_download_command_handler( has_wheels=False)): staged_resources = dependency.stage_job_resources( options, temp_dir=self.make_temp_dir()) self.assertEqual([names.DATAFLOW_SDK_TARBALL_FILE], staged_resources) with open(os.path.join(staging_dir, names.DATAFLOW_SDK_TARBALL_FILE)) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_default(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' with mock.patch('apache_beam.utils.processes.check_call', self.build_fake_pip_download_command_handler( has_wheels=False)): staged_resources = dependency.stage_job_resources( options, temp_dir=self.make_temp_dir()) self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], staged_resources) with open(os.path.join( staging_dir, names.DATAFLOW_SDK_TARBALL_FILE)) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_local(self): staging_dir = self.make_temp_dir() sdk_location = self.make_temp_dir() self.create_temp_file( os.path.join( sdk_location, names.DATAFLOW_SDK_TARBALL_FILE), 'contents') options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = sdk_location self.assertEqual( [names.DATAFLOW_SDK_TARBALL_FILE], dependency.stage_job_resources(options)) tarball_path = os.path.join( staging_dir, names.DATAFLOW_SDK_TARBALL_FILE) with open(tarball_path) as f: self.assertEqual(f.read(), 'contents')
def test_sdk_location_default_with_wheels(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' with mock.patch( 'apache_beam.utils.processes.check_call', self.build_fake_pip_download_command_handler(has_wheels=True)): staged_resources = dependency.stage_job_resources( options, temp_dir=self.make_temp_dir()) self.assertTrue(len(staged_resources), 2) self.assertEqual(staged_resources[0], names.DATAFLOW_SDK_TARBALL_FILE) # Exact name depends on the version of the SDK. self.assertTrue(staged_resources[1].endswith('whl')) for name in staged_resources: with open(os.path.join(staging_dir, name)) as f: self.assertEqual(f.read(), 'Package content.')
def test_sdk_location_default_with_wheels(self): staging_dir = self.make_temp_dir() options = PipelineOptions() options.view_as(GoogleCloudOptions).staging_location = staging_dir self.update_options(options) options.view_as(SetupOptions).sdk_location = 'default' with mock.patch( 'apache_beam.utils.processes.check_call', self.build_fake_pip_download_command_handler(has_wheels=True)): staged_resources = dependency.stage_job_resources( options, temp_dir=self.make_temp_dir()) self.assertTrue(len(staged_resources), 2) self.assertEqual(staged_resources[0], names.DATAFLOW_SDK_TARBALL_FILE) # Exact name depends on the version of the SDK. self.assertTrue(staged_resources[1].endswith('whl')) for name in staged_resources: with open(os.path.join(staging_dir, name)) as f: self.assertEqual(f.read(), 'Package content.')
def test_no_staging_location(self): with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(PipelineOptions()) self.assertEqual('The --staging_location option must be specified.', cm.exception.message)
def test_no_staging_location(self): with self.assertRaises(RuntimeError) as cm: dependency.stage_job_resources(PipelineOptions()) self.assertEqual('The --staging_location option must be specified.', cm.exception.message)