def testClassifyProgramAssignments_NoReferrals(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) assessment = entities.StateAssessment.new_with_defaults( state_code="US_XX", assessment_type=StateAssessmentType.ORAS, assessment_score=33, assessment_date=date(2009, 7, 10), ) supervision_period = entities.StateSupervisionPeriod.new_with_defaults( supervision_period_id=111, status=StateSupervisionPeriodStatus.TERMINATED, state_code="US_XX", start_date=date(2008, 3, 5), termination_date=date(2010, 5, 19), termination_reason=StateSupervisionPeriodTerminationReason.DISCHARGE, supervision_type=StateSupervisionType.PAROLE, ) supervision_period_to_agent_map = { "agent_id": 1010, "person_id": fake_person_id, "agent_external_id": "OFFICER0009", "supervision_period_id": supervision_period.supervision_period_id, } person_periods = { "person": [fake_person], "program_assignments": [], "assessments": [assessment], "supervision_periods": [supervision_period], "supervision_period_to_agent_association": [ supervision_period_to_agent_map ], } correct_output = [] test_pipeline = TestPipeline() output = ( test_pipeline | beam.Create([(fake_person_id, person_periods)]) | "Identify Program Events" >> beam.ParDo(pipeline.ClassifyProgramAssignments()) ) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testClassifyProgramAssignments_NoSupervision(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code='US_CA', program_id='PG3', referral_date=date(2009, 10, 3)) assessment = entities.StateAssessment.new_with_defaults( state_code='US_CA', assessment_type=StateAssessmentType.ORAS, assessment_score=33, assessment_date=date(2009, 7, 10)) person_periods = { 'person': [fake_person], 'program_assignments': [program_assignment], 'assessments': [assessment], 'supervision_periods': [] } program_event = ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, assessment_score=33, assessment_type=StateAssessmentType.ORAS, ) correct_output = [(fake_person, [program_event])] test_pipeline = TestPipeline() supervision_period_to_agent_map = {'fake': 'map'} supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_periods_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) output = (test_pipeline | beam.Create([(fake_person_id, person_periods)]) | 'Identify Program Events' >> beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_periods_to_agent_associations_as_kv))) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testClassifyProgramAssignments_NoAssessments(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code='US_CA', program_id='PG3', referral_date=date(2009, 10, 3)) supervision_period = \ entities.StateSupervisionPeriod.new_with_defaults( supervision_period_id=111, status=StateSupervisionPeriodStatus.TERMINATED, state_code='UT', start_date=date(2008, 3, 5), termination_date=date(2010, 5, 19), termination_reason= StateSupervisionPeriodTerminationReason.DISCHARGE, supervision_type=StateSupervisionType.PAROLE ) person_periods = { 'person': [fake_person], 'program_assignments': [program_assignment], 'assessments': [], 'supervision_periods': [supervision_period] } program_event = ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, supervision_type=supervision_period.supervision_type, supervising_officer_external_id='OFFICER0009', supervising_district_external_id='10') correct_output = [(fake_person, [program_event])] test_pipeline = TestPipeline() supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_periods_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) output = (test_pipeline | beam.Create([(fake_person_id, person_periods)]) | 'Identify Program Events' >> beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_periods_to_agent_associations_as_kv))) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def run_test_pipeline( self, dataset: str, fake_supervision_period_id: int, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the program pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateProgramAssignment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateAssessments assessments = ( test_pipeline | 'Load Assessments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': fake_supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get program metrics program_metrics = ( person_program_events | 'Get Program Metrics' >> # type: ignore pipeline.GetProgramMetrics(pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()
def testClassifyProgramAssignments(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT ) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code='US_XX', program_id='PG3', program_location_id='XYZ', referral_date=date(2009, 10, 3), participation_status=StateProgramAssignmentParticipationStatus.IN_PROGRESS, start_date=date(2009, 10, 19) ) assessment = entities.StateAssessment.new_with_defaults( state_code='US_XX', assessment_type=StateAssessmentType.ORAS, assessment_score=33, assessment_date=date(2009, 7, 10) ) supervision_period = \ entities.StateSupervisionPeriod.new_with_defaults( supervision_period_id=111, status=StateSupervisionPeriodStatus.TERMINATED, state_code='US_XX', start_date=date(2008, 3, 5), supervision_type=StateSupervisionType.PAROLE ) person_periods = {'person': [fake_person], 'program_assignments': [program_assignment], 'assessments': [assessment], 'supervision_periods': [supervision_period] } program_events = [ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, participation_status=program_assignment.participation_status, assessment_score=33, assessment_type=StateAssessmentType.ORAS, supervision_type=supervision_period.supervision_type, supervising_officer_external_id='OFFICER0009', supervising_district_external_id='10' ), ProgramParticipationEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, program_location_id=program_assignment.program_location_id, event_date=date.today(), is_first_day_in_program=True, supervision_type=supervision_period.supervision_type )] correct_output = [(fake_person.person_id, (fake_person, program_events))] test_pipeline = TestPipeline() supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create([supervision_period_to_agent_map]) ) supervision_periods_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'supervision_period_id') ) output = (test_pipeline | beam.Create([(fake_person_id, person_periods)]) | 'Identify Program Events' >> beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_periods_to_agent_associations_as_kv)) ) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testClassifyProgramAssignments_NoSupervision(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code="US_XX", program_id="PG3", referral_date=date(2009, 10, 3), participation_status=StateProgramAssignmentParticipationStatus.PRESENT_WITHOUT_INFO, ) assessment = entities.StateAssessment.new_with_defaults( state_code="US_XX", assessment_type=StateAssessmentType.ORAS, assessment_score=33, assessment_date=date(2009, 7, 10), ) supervision_period_to_agent_map = {"supervision_period_id": "fake_map"} person_periods = { "person": [fake_person], "program_assignments": [program_assignment], "assessments": [assessment], "supervision_periods": [], "supervision_period_to_agent_association": [ supervision_period_to_agent_map ], } program_event = ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, assessment_score=33, assessment_type=StateAssessmentType.ORAS, participation_status=program_assignment.participation_status, ) correct_output = [(fake_person.person_id, (fake_person, [program_event]))] test_pipeline = TestPipeline() output = ( test_pipeline | beam.Create([(fake_person_id, person_periods)]) | "Identify Program Events" >> beam.ParDo(pipeline.ClassifyProgramAssignments()) ) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testClassifyProgramAssignments_NoAssessments(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code="US_XX", program_id="PG3", referral_date=date(2009, 10, 3), participation_status=StateProgramAssignmentParticipationStatus.PRESENT_WITHOUT_INFO, ) supervision_period = entities.StateSupervisionPeriod.new_with_defaults( supervision_period_id=111, status=StateSupervisionPeriodStatus.TERMINATED, state_code="US_XX", start_date=date(2008, 3, 5), termination_date=date(2010, 5, 19), termination_reason=StateSupervisionPeriodTerminationReason.DISCHARGE, supervision_type=StateSupervisionType.PAROLE, supervision_site="10", ) supervision_period_to_agent_map = { "agent_id": 1010, "person_id": fake_person_id, "agent_external_id": "OFFICER0009", "supervision_period_id": supervision_period.supervision_period_id, } person_periods = { "person": [fake_person], "program_assignments": [program_assignment], "assessments": [], "supervision_periods": [supervision_period], "supervision_period_to_agent_association": [ supervision_period_to_agent_map ], } program_event = ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, participation_status=program_assignment.participation_status, supervision_type=supervision_period.supervision_type, supervising_officer_external_id="OFFICER0009", supervising_district_external_id="10", level_1_supervision_location_external_id="10", ) correct_output = [(fake_person.person_id, (fake_person, [program_event]))] test_pipeline = TestPipeline() output = ( test_pipeline | beam.Create([(fake_person_id, person_periods)]) | "Identify Program Events" >> beam.ParDo(pipeline.ClassifyProgramAssignments()) ) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testClassifyProgramAssignments_us_nd(self): """Tests the ClassifyProgramAssignments DoFn.""" fake_person_id = 12345 fake_person = entities.StatePerson.new_with_defaults( state_code="US_ND", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) program_assignment = entities.StateProgramAssignment.new_with_defaults( state_code="US_ND", program_id="PG3", program_location_id="XYZ", referral_date=date(2009, 10, 3), participation_status=StateProgramAssignmentParticipationStatus.IN_PROGRESS, start_date=date(2009, 10, 19), ) assessment = entities.StateAssessment.new_with_defaults( state_code="US_ND", assessment_type=StateAssessmentType.ORAS, assessment_score=33, assessment_date=date(2009, 7, 10), ) supervision_period = entities.StateSupervisionPeriod.new_with_defaults( supervision_period_id=111, status=StateSupervisionPeriodStatus.TERMINATED, state_code="US_ND", start_date=date(2008, 3, 5), termination_date=date(2010, 3, 1), supervision_type=StateSupervisionType.PAROLE, supervision_site="10", ) supervision_period_to_agent_map = { "agent_id": 1010, "person_id": fake_person_id, "agent_external_id": "OFFICER0009", "supervision_period_id": supervision_period.supervision_period_id, } person_periods = { "person": [fake_person], "program_assignments": [program_assignment], "assessments": [assessment], "supervision_periods": [supervision_period], "supervision_period_to_agent_association": [ supervision_period_to_agent_map ], } program_events = [ ProgramReferralEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, event_date=program_assignment.referral_date, participation_status=program_assignment.participation_status, assessment_score=33, assessment_type=StateAssessmentType.ORAS, supervision_type=supervision_period.supervision_type, supervising_officer_external_id="OFFICER0009", supervising_district_external_id="10", level_1_supervision_location_external_id="10", ), ProgramParticipationEvent( state_code=program_assignment.state_code, program_id=program_assignment.program_id, program_location_id=program_assignment.program_location_id, event_date=date.today(), is_first_day_in_program=True, supervision_type=supervision_period.supervision_type, ), ] correct_output = [(fake_person.person_id, (fake_person, program_events))] test_pipeline = TestPipeline() output = ( test_pipeline | beam.Create([(fake_person_id, person_periods)]) | "Identify Program Events" >> beam.ParDo(pipeline.ClassifyProgramAssignments()) ) assert_that(output, equal_to(correct_output)) test_pipeline.run()
def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateProgramAssignment, root_entity_class=entities.StateProgramAssignment, unifying_id_field='person_id', build_related_entities=True)) # Get StateAssessments assessments = (test_pipeline | 'Load Assessments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateAssessment, root_entity_class=entities.StateAssessment, unifying_id_field='person_id', build_related_entities=False)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionPeriod, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field='person_id', build_related_entities=False)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get program metrics program_metrics = (person_program_events | 'Get Program Metrics' >> pipeline.GetProgramMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()