def run_test_pipeline( self, dataset: str, fake_supervision_period_id: int, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the program pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateProgramAssignment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateAssessments assessments = ( test_pipeline | 'Load Assessments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': fake_supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get program metrics program_metrics = ( person_program_events | 'Get Program Metrics' >> # type: ignore pipeline.GetProgramMetrics(pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()
def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateProgramAssignment, root_entity_class=entities.StateProgramAssignment, unifying_id_field='person_id', build_related_entities=True)) # Get StateAssessments assessments = (test_pipeline | 'Load Assessments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateAssessment, root_entity_class=entities.StateAssessment, unifying_id_field='person_id', build_related_entities=False)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionPeriod, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field='person_id', build_related_entities=False)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get program metrics program_metrics = (person_program_events | 'Get Program Metrics' >> pipeline.GetProgramMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()