def test_type_check_additional(self): runner = MockRunners.OtherRunner() options = PipelineOptions(['--type_check_additional=all']) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertFalse(errors) options = PipelineOptions(['--type_check_additional=']) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertFalse(errors)
def test_streaming(self): pipeline_options = PipelineOptions(['--streaming']) runner = MockRunners.TestDataflowRunner() validator = PipelineOptionsValidator(pipeline_options, runner) errors = validator.validate() self.assertIn('Streaming pipelines are not supported.', errors)
def test_dataflow_job_file_and_template_location_mutually_exclusive(self): runner = MockRunners.OtherRunner() options = PipelineOptions( ['--template_location', 'abc', '--dataflow_job_file', 'def']) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertTrue(errors)
def test_validate_dataflow_job_file(self): runner = MockRunners.OtherRunner() options = PipelineOptions([ '--dataflow_job_file', 'abc' ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertFalse(errors)
def test_validate_template_location(self): runner = MockRunners.OtherRunner() options = PipelineOptions([ '--template_location', 'abc', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertFalse(errors)
def test_missing_required_options(self): options = PipelineOptions(['']) runner = MockRunners.DataflowRunner() validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual( self.check_errors_for_arguments( errors, ['project', 'staging_location', 'temp_location']), [])
def test_worker_region_and_worker_zone_mutually_exclusive(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--worker_region', 'us-east1', '--worker_zone', 'us-east1-b', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertTrue(errors)
def test_dataflow_job_file_and_template_location_mutually_exclusive(self): runner = MockRunners.OtherRunner() options = PipelineOptions([ '--template_location', 'abc', '--dataflow_job_file', 'def' ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertTrue(errors)
def test_missing_required_options(self): options = PipelineOptions(['']) runner = MockRunners.DataflowRunner() # Remove default region for this test. runner.get_default_gcp_region = lambda: None validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual( self.check_errors_for_arguments( errors, ['project', 'staging_location', 'temp_location', 'region']), [])
def test_region_optional_for_non_service_runner(self): runner = MockRunners.DataflowRunner() # Remove default region for this test. runner.get_default_gcp_region = lambda: None options = PipelineOptions([ '--project=example:example', '--temp_location=gs://foo/bar', '--dataflow_endpoint=http://localhost:20281', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 0)
def test_num_workers_can_equal_max_num_workers(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--num_workers=42', '--max_num_workers=42', '--worker_region=us-east1', '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 0)
def test_zone_alias_worker_zone(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--zone=us-east1-b', '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 0) self.assertIsNone(options.view_as(WorkerOptions).zone) self.assertEqual(options.view_as(WorkerOptions).worker_zone, 'us-east1-b')
def test_transform_name_mapping_without_update(self): options = ['--project=example:example', '--staging_location=gs://foo/bar', '--temp_location=gs://foo/bar', '--transform_name_mapping={\"fromPardo\":\"toPardo\"}'] pipeline_options = PipelineOptions(options) runner = MockRunners.DataflowRunner() validator = PipelineOptionsValidator(pipeline_options, runner) errors = validator.validate() assert_that(errors, only_contains( contains_string('Transform name mapping option is only useful when ' '--update and --streaming is specified')))
def test_max_num_workers_is_positive(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--max_num_workers=-1', '--worker_region=us-east1', '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 1) self.assertIn('max_num_workers', errors[0]) self.assertIn('-1', errors[0])
def test_worker_harness_sdk_container_image_mutually_exclusive(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--worker_harness_container_image=WORKER', '--sdk_container_image=SDK_ONLY', '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 1) self.assertIn('sdk_container_image', errors[0]) self.assertIn('worker_harness_container_image', errors[0])
def test_transform_name_mapping_invalid_format(self): options = [ '--project=example:example', '--staging_location=gs://foo/bar', '--temp_location=gs://foo/bar', '--update', '--job_name=test', '--streaming', '--transform_name_mapping={\"fromPardo\":123}' ] pipeline_options = PipelineOptions(options) runner = MockRunners.DataflowRunner() validator = PipelineOptionsValidator(pipeline_options, runner) errors = validator.validate() assert_that( errors, only_contains( contains_string('Invalid transform name mapping format.')))
def test_worker_region_and_worker_zone_mutually_exclusive(self): runner = MockRunners.DataflowRunner() options = PipelineOptions([ '--worker_region', 'us-east1', '--worker_zone', 'us-east1-b', '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 1) self.assertIn('worker_region', errors[0]) self.assertIn('worker_zone', errors[0])
def test_alias_worker_harness_sdk_container_image(self): runner = MockRunners.DataflowRunner() test_image = "WORKER_HARNESS" options = PipelineOptions([ '--worker_harness_container_image=%s' % test_image, '--project=example:example', '--temp_location=gs://foo/bar', ]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 0) self.assertEqual( options.view_as(WorkerOptions).worker_harness_container_image, test_image) self.assertEqual( options.view_as(WorkerOptions).sdk_container_image, test_image)
def test_local_runner(self): runner = MockRunners.OtherRunner() options = PipelineOptions([]) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertEqual(len(errors), 0)
def test_type_check_additional_unrecognized_feature(self): runner = MockRunners.OtherRunner() options = PipelineOptions(['--type_check_additional=all,dfgdf']) validator = PipelineOptionsValidator(options, runner) errors = validator.validate() self.assertTrue(errors)