def test_incarceration_pipeline_specify_person_id_filters(self): # Arrange argv = [ '--job_name', 'incarceration-args-test', '--project', 'recidiviz-staging', '--person_filter_ids', '685253', '12345', '99999', '--setup_file', './setup.py' ] # Act incarceration_pipeline_args, apache_beam_args = incarceration_pipeline.get_arg_parser( ).parse_known_args(argv) pipeline_options = get_apache_beam_pipeline_options_from_args( apache_beam_args) # Assert expected_incarceration_pipeline_args = Namespace( **self.DEFAULT_INCARCERATION_PIPELINE_ARGS.__dict__) expected_incarceration_pipeline_args.person_filter_ids = [ 685253, 12345, 99999 ] self.assertEqual(incarceration_pipeline_args, expected_incarceration_pipeline_args) self.assertEqual(pipeline_options.get_all_options(drop_default=True), self.DEFAULT_APACHE_BEAM_OPTIONS_DICT)
def test_minimal_incarceration_pipeline_args_save_to_template(self): # Arrange argv = [ "--job_name", "incarceration-args-test", "--project", "recidiviz-staging", "--save_as_template", ] # Act ( incarceration_pipeline_args, apache_beam_args, ) = incarceration_pipeline.get_arg_parser().parse_known_args(argv) pipeline_options = get_apache_beam_pipeline_options_from_args( apache_beam_args) # Assert self.assertEqual(incarceration_pipeline_args, self.DEFAULT_INCARCERATION_PIPELINE_ARGS) expected_apache_beam_options_dict = self.DEFAULT_APACHE_BEAM_OPTIONS_DICT.copy( ) expected_apache_beam_options_dict[ "template_location"] = "gs://recidiviz-staging-dataflow-templates/templates/incarceration-args-test" self.assertEqual( pipeline_options.get_all_options(drop_default=True), expected_apache_beam_options_dict, )
def test_incarceration_pipeline_args_missing_arg(self): # Arrange argv = [ "--job_name", "incarceration-args-test", "--runner", "DirectRunner", # project arg omitted here "--setup_file", "./setup2.py", "--bucket", "recidiviz-123-my-bucket", "--region=us-central1", "--data_input", "county", "--reference_view_input", "reference_views_2", "--output", "dataflow_metrics_2", "--calculation_month_count=6", "--calculation_end_month=2009-07", "--save_as_template", ] # Act ( _incarceration_pipeline_args, apache_beam_args, ) = incarceration_pipeline.get_arg_parser().parse_known_args(argv) with self.assertRaises(SystemExit) as e: _ = get_apache_beam_pipeline_options_from_args(apache_beam_args) self.assertEqual(2, e.exception.code)
def test_incarceration_pipeline_args_missing_arg(self): # Arrange argv = [ '--job_name', 'incarceration-args-test', '--runner', 'DirectRunner', # project arg omitted here '--setup_file', './setup2.py', '--bucket', 'recidiviz-123-my-bucket', '--region=us-central1', '--data_input', 'county', '--reference_view_input', 'reference_views_2', '--output', 'dataflow_metrics_2', '--calculation_month_count=6', '--calculation_end_month=2009-07', '--save_as_template' ] # Act _incarceration_pipeline_args, apache_beam_args = incarceration_pipeline.get_arg_parser( ).parse_known_args(argv) with self.assertRaises(SystemExit) as e: _ = get_apache_beam_pipeline_options_from_args(apache_beam_args) self.assertEqual(2, e.exception.code)
def test_incarceration_pipeline_args_defaults_changed(self): # Arrange argv = [ '--job_name', 'incarceration-args-test', '--runner', 'DirectRunner', '--project', 'recidiviz-staging', '--setup_file', './setup2.py', '--bucket', 'recidiviz-123-my-bucket', '--region=us-central1', '--data_input', 'county', '--reference_view_input', 'reference_views_2', '--static_reference_input', 'static_reference_2', '--output', 'dataflow_metrics_2', '--calculation_month_count=6', '--calculation_end_month=2009-07', '--save_as_template' ] # Act incarceration_pipeline_args, apache_beam_args = incarceration_pipeline.get_arg_parser( ).parse_known_args(argv) pipeline_options = get_apache_beam_pipeline_options_from_args( apache_beam_args) # Assert expected_incarceration_pipeline_args = \ Namespace(calculation_month_count=6, calculation_end_month='2009-07', data_input='county', output='dataflow_metrics_2', metric_types={'ALL'}, person_filter_ids=None, reference_view_input='reference_views_2', static_reference_input='static_reference_2', state_code=None) self.assertEqual(incarceration_pipeline_args, expected_incarceration_pipeline_args) expected_apache_beam_options_dict = { 'runner': 'DirectRunner', 'project': 'recidiviz-staging', 'job_name': 'incarceration-args-test', # Locations based on the overriden bucket, not the project! 'staging_location': 'gs://recidiviz-123-my-bucket/staging/', 'temp_location': 'gs://recidiviz-123-my-bucket/temp/', 'template_location': 'gs://recidiviz-123-my-bucket/templates/incarceration-args-test', 'region': 'us-central1', 'machine_type': 'n1-standard-4', 'network': 'default', 'subnetwork': 'https://www.googleapis.com/compute/v1/projects/recidiviz-staging/regions/us-central1/subnetworks/default', 'use_public_ips': False, 'experiments': ['shuffle_mode=service', 'use_beam_bq_sink'], 'setup_file': './setup2.py', 'disk_size_gb': 50, } self.assertEqual(expected_apache_beam_options_dict, pipeline_options.get_all_options(drop_default=True))
def test_minimal_incarceration_pipeline_args(self): # Arrange argv = [ '--job_name', 'incarceration-args-test', '--project', 'recidiviz-staging' ] # Act incarceration_pipeline_args, apache_beam_args = incarceration_pipeline.get_arg_parser( ).parse_known_args(argv) pipeline_options = get_apache_beam_pipeline_options_from_args( apache_beam_args) # Assert self.assertEqual(incarceration_pipeline_args, self.DEFAULT_INCARCERATION_PIPELINE_ARGS) self.assertEqual(pipeline_options.get_all_options(drop_default=True), self.DEFAULT_APACHE_BEAM_OPTIONS_DICT)
def test_incarceration_pipeline_args_defaults_changed(self): # Arrange argv = [ "--job_name", "incarceration-args-test", "--runner", "DirectRunner", "--project", "recidiviz-staging", "--setup_file", "./setup2.py", "--bucket", "recidiviz-123-my-bucket", "--region=us-central1", "--data_input", "county", "--reference_view_input", "reference_views_2", "--static_reference_input", "static_reference_2", "--output", "dataflow_metrics_2", "--calculation_month_count=6", "--calculation_end_month=2009-07", "--save_as_template", ] # Act ( incarceration_pipeline_args, apache_beam_args, ) = incarceration_pipeline.get_arg_parser().parse_known_args(argv) pipeline_options = get_apache_beam_pipeline_options_from_args( apache_beam_args) # Assert expected_incarceration_pipeline_args = Namespace( calculation_month_count=6, calculation_end_month="2009-07", data_input="county", output="dataflow_metrics_2", metric_types={"ALL"}, person_filter_ids=None, reference_view_input="reference_views_2", static_reference_input="static_reference_2", state_code=None, ) self.assertEqual(incarceration_pipeline_args, expected_incarceration_pipeline_args) expected_apache_beam_options_dict = { "runner": "DirectRunner", "project": "recidiviz-staging", "job_name": "incarceration-args-test", # Locations based on the overriden bucket, not the project! "staging_location": "gs://recidiviz-123-my-bucket/staging/", "temp_location": "gs://recidiviz-123-my-bucket/temp/", "template_location": "gs://recidiviz-123-my-bucket/templates/incarceration-args-test", "region": "us-central1", "machine_type": "n1-standard-4", "network": "default", "subnetwork": "https://www.googleapis.com/compute/v1/projects/recidiviz-staging/" "regions/us-central1/subnetworks/default", "use_public_ips": False, "experiments": ["shuffle_mode=service", "use_beam_bq_sink"], "setup_file": "./setup2.py", "disk_size_gb": 50, } self.assertEqual( expected_apache_beam_options_dict, pipeline_options.get_all_options(drop_default=True), )