def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.assessment_types_patcher = mock.patch( 'recidiviz.calculator.pipeline.program.identifier.assessment_utils.' '_assessment_types_of_class_for_state') self.mock_assessment_types = self.assessment_types_patcher.start() self.mock_assessment_types.return_value = [StateAssessmentType.ORAS]
def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.fake_bq_sink_factory = FakeWriteToBigQueryFactory( FakeWriteToBigQuery) self.violation_delegate_patcher = mock.patch( "recidiviz.calculator.pipeline.violation.identifier.get_state_specific_violation_delegate" ) self.mock_violation_delegate = self.violation_delegate_patcher.start() self.mock_violation_delegate.return_value = UsXxViolationDelegate()
def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory()
class TestProgramPipeline(unittest.TestCase): """Tests the entire program pipeline.""" def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() @staticmethod def build_data_dict(fake_person_id: int, fake_supervision_period_id: int): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( state_code='CA', program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=fake_supervision_period_id, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], } return data_dict def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_supervision_period_id = 12345 data_dict = self.build_data_dict(fake_person_id, fake_supervision_period_id) dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(dataset, fake_supervision_period_id) def testProgramPipelineWithFilterSet(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_supervision_period_id = 12345 data_dict = self.build_data_dict(fake_person_id, fake_supervision_period_id) dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline( dataset, fake_supervision_period_id, unifying_id_field_filter_set={fake_person_id}) def run_test_pipeline( self, dataset: str, fake_supervision_period_id: int, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the program pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateProgramAssignment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateAssessments assessments = ( test_pipeline | 'Load Assessments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': fake_supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get program metrics program_metrics = ( person_program_events | 'Get Program Metrics' >> # type: ignore pipeline.GetProgramMetrics(pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run() def testProgramPipelineNoReferrals(self): """Tests the program pipeline where one person does not have any program assignment entities.""" fake_person_id = 12345 fake_person_id_2 = 9876 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) fake_person_2 = schema.StatePerson( person_id=fake_person_id_2, gender=Gender.MALE, birthdate=date(1974, 3, 12), residency_status=ResidencyStatus.PERMANENT) persons_data = normalized_database_base_dict_list( [fake_person, fake_person_2]) race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) # Program assignment for a different person program_assignment = schema.StateProgramAssignment( state_code='CA', program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id_2) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], } dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(dataset, supervision_period.supervision_period_id)
class TestProgramPipeline(unittest.TestCase): """Tests the entire program pipeline.""" def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.fake_bq_sink_factory = FakeWriteToBigQueryFactory(FakeWriteToBigQuery) self.assessment_types_patcher = mock.patch( "recidiviz.calculator.pipeline.program.identifier.assessment_utils." "_assessment_types_of_class_for_state" ) self.mock_assessment_types = self.assessment_types_patcher.start() self.mock_assessment_types.return_value = [StateAssessmentType.ORAS] def tearDown(self) -> None: self.assessment_types_patcher.stop() @staticmethod def build_data_dict(fake_person_id: int, fake_supervision_period_id: int): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code="US_XX", ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( state_code="US_XX", program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id, participation_status=StateProgramAssignmentParticipationStatus.IN_PROGRESS, ) assessment = schema.StateAssessment( assessment_id=298374, state_code="US_XX", assessment_date=date(2015, 3, 19), assessment_type="LSIR", person_id=fake_person_id, ) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=fake_supervision_period_id, state_code="US_XX", county_code="124", start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id, status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) program_assignment_data = [normalized_database_base_dict(program_assignment)] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [normalized_database_base_dict(supervision_period)] supervision_violation_response = ( database_test_utils.generate_test_supervision_violation_response( fake_person_id ) ) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] supervision_period_to_agent_data = [ { "agent_id": 1010, "person_id": fake_person_id, "state_code": "US_XX", "agent_external_id": "OFFICER0009", "supervision_period_id": fake_supervision_period_id, } ] state_race_ethnicity_population_count_data = [ { "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, } ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], "supervision_period_to_agent_association": supervision_period_to_agent_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, } return data_dict def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_supervision_period_id = 12345 data_dict = self.build_data_dict(fake_person_id, fake_supervision_period_id) dataset = "recidiviz-123.state" self.run_test_pipeline(dataset, data_dict) def testProgramPipelineWithFilterSet(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_supervision_period_id = 12345 data_dict = self.build_data_dict(fake_person_id, fake_supervision_period_id) dataset = "recidiviz-123.state" self.run_test_pipeline( dataset, data_dict, unifying_id_field_filter_set={fake_person_id} ) def run_test_pipeline( self, dataset: str, data_dict: DataTablesDict, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None, ): """Runs a test version of the program pipeline.""" expected_metric_types = { ProgramMetricType.PROGRAM_REFERRAL, } read_from_bq_constructor = ( self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict ) ) write_to_bq_constructor = ( self.fake_bq_sink_factory.create_fake_bq_sink_constructor( dataset, expected_output_metric_types=expected_metric_types, ) ) run_test_pipeline( pipeline_module=pipeline, state_code="US_XX", dataset=dataset, read_from_bq_constructor=read_from_bq_constructor, write_to_bq_constructor=write_to_bq_constructor, unifying_id_field_filter_set=unifying_id_field_filter_set, metric_types_filter=metric_types_filter, ) def testProgramPipelineNoReferrals(self): """Tests the program pipeline where one person does not have any program assignment entities.""" fake_person_id = 12345 fake_person_id_2 = 9876 fake_person = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) fake_person_2 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id_2, gender=Gender.MALE, birthdate=date(1974, 3, 12), residency_status=ResidencyStatus.PERMANENT, ) persons_data = normalized_database_base_dict_list([fake_person, fake_person_2]) race_1 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code="US_XX", ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) # Program assignment for a different person program_assignment = schema.StateProgramAssignment( state_code="US_XX", program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id_2, participation_status=StateProgramAssignmentParticipationStatus.DENIED, ) assessment = schema.StateAssessment( assessment_id=298374, state_code="US_XX", assessment_date=date(2015, 3, 19), assessment_type="LSIR", person_id=fake_person_id, ) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code="US_XX", county_code="124", start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id, status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) program_assignment_data = [normalized_database_base_dict(program_assignment)] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [normalized_database_base_dict(supervision_period)] supervision_violation_response = ( database_test_utils.generate_test_supervision_violation_response( fake_person_id ) ) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] supervision_period_to_agent_data = [ { "agent_id": 1010, "person_id": fake_person_id, "state_code": "US_XX", "agent_external_id": "OFFICER0009", "supervision_period_id": supervision_period.supervision_period_id, } ] state_race_ethnicity_population_count_data = [ { "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, } ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], "supervision_period_to_agent_association": supervision_period_to_agent_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, } dataset = "recidiviz-123.state" self.run_test_pipeline(dataset, data_dict)
class TestIncarcerationPipeline(unittest.TestCase): """Tests the entire incarceration pipeline.""" def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() @staticmethod def _default_data_dict(): return { schema.StatePerson.__tablename__: [], schema.StatePersonRace.__tablename__: [], schema.StatePersonEthnicity.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], schema.StateIncarcerationSentence.__tablename__: [], schema.StateSupervisionSentence.__tablename__: [], schema.StateIncarcerationPeriod.__tablename__: [], schema.state_incarceration_sentence_incarceration_period_association_table.name: [], schema.state_supervision_sentence_incarceration_period_association_table.name: [], schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateAssessment.__tablename__: [], schema.StateProgramAssignment.__tablename__: [], schema.StateFine.__tablename__: [], schema.StateCharge.__tablename__: [], schema.StateSupervisionPeriod.__tablename__: [], schema.StateEarlyDischarge.__tablename__: [], schema.state_charge_incarceration_sentence_association_table.name: [], schema.state_charge_supervision_sentence_association_table.name: [], schema.state_incarceration_sentence_supervision_period_association_table.name: [], schema.state_supervision_sentence_supervision_period_association_table.name: [], } def build_incarceration_pipeline_data_dict(self, fake_person_id: int, state_code: str = 'US_XX'): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code=state_code, person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code=state_code, race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code=state_code, race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code=state_code, ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code=state_code, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration ], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code=state_code, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': initial_incarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': first_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': subsequent_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, } data_dict.update(data_dict_overrides) return data_dict def testIncarcerationPipeline(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(fake_person_id, _STATE_CODE, dataset, expected_metric_types=ALL_METRIC_TYPES_SET) def testIncarcerationPipelineFilterMetrics(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = 'recidiviz-123.state' expected_metric_types = { IncarcerationMetricType.INCARCERATION_ADMISSION } metric_types_filter = { IncarcerationMetricType.INCARCERATION_ADMISSION.value } with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(fake_person_id, _STATE_CODE, dataset, expected_metric_types=expected_metric_types, metric_types_filter=metric_types_filter) def testIncarcerationPipelineUsMo(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id, state_code='US_MO') dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(fake_person_id, 'US_MO', dataset, expected_metric_types=ALL_METRIC_TYPES_SET) def testIncarcerationPipelineWithFilterSet(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = 'recidivz-staging.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline( fake_person_id, _STATE_CODE, dataset, unifying_id_field_filter_set={fake_person_id}, expected_metric_types=ALL_METRIC_TYPES_SET) # TODO(#4375): Update tests to run actual pipeline code and only mock BQ I/O @staticmethod def run_test_pipeline( fake_person_id: int, state_code: str, dataset: str, expected_metric_types: Set[IncarcerationMetricType], allow_empty: bool = False, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the incarceration pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateSentenceGroups sentence_groups = ( test_pipeline | 'Load StateSentenceGroups' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSentenceGroup, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateIncarcerationSentences incarceration_sentences = ( test_pipeline | 'Load StateIncarcerationSentences' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionSentences supervision_sentences = ( test_pipeline | 'Load StateSupervisionSentences' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) us_mo_sentence_status_rows: List[Dict[str, Any]] = [{ 'person_id': fake_person_id, 'sentence_external_id': 'XXX', 'sentence_status_external_id': 'YYY', 'status_code': 'ZZZ', 'status_date': 'not_a_date', 'status_description': 'XYZ' }] us_mo_sentence_statuses = (test_pipeline | 'Create MO sentence statuses' >> beam.Create(us_mo_sentence_status_rows)) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | 'Convert sentence status ranking table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) sentences_and_statuses = ( { 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences, 'sentence_statuses': us_mo_sentence_status_rankings_as_kv } | 'Group sentences to the sentence statuses for that person' >> beam.CoGroupByKey()) sentences_converted = ( sentences_and_statuses | 'Convert to state-specific sentences' >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( 'incarceration_sentences', 'supervision_sentences')) sentences_and_sentence_groups = ( { 'sentence_groups': sentence_groups, 'incarceration_sentences': sentences_converted.incarceration_sentences, 'supervision_sentences': sentences_converted.supervision_sentences } | 'Group sentences to sentence groups' >> beam.CoGroupByKey()) sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | 'Set hydrated sentences on sentence groups' >> beam.ParDo( SetSentencesOnSentenceGroup())) # Identify IncarcerationEvents events from the StatePerson's # StateIncarcerationPeriods fake_person_id_to_county_query_result = [{ 'person_id': fake_person_id, 'county_of_residence': _COUNTY_OF_RESIDENCE }] person_id_to_county_kv = ( test_pipeline | "Read person id to county associations from BigQuery" >> beam.Create(fake_person_id_to_county_query_result) | "Convert person_id to counties to KV" >> beam.ParDo( ConvertDictToKVTuple(), 'person_id')) incarceration_period_judicial_district_association_row = \ {'person_id': fake_person_id, 'incarceration_period_id': 123, 'judicial_district_code': 'NW'} ip_to_judicial_district_kv = ( test_pipeline | "Read incarceration_period to judicial_district associations from BigQuery" >> beam.Create( [incarceration_period_judicial_district_association_row]) | "Convert ips to judicial districts to KV" >> beam.ParDo( ConvertDictToKVTuple(), 'person_id')) state_race_ethnicity_population_count = { 'state_code': state_code, 'race_or_ethnicity': 'BLACK', 'population_count': 1, 'representation_priority': 1 } state_race_ethnicity_population_counts = ( test_pipeline | 'Create state_race_ethnicity_population_count table' >> beam.Create([state_race_ethnicity_population_count])) # Group each StatePerson with their related entities person_entities = ( { 'person': persons, 'sentence_groups': sentence_groups_with_hydrated_sentences, 'incarceration_period_judicial_district_association': ip_to_judicial_district_kv } | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey()) # Identify IncarcerationEvents events from the StatePerson's StateIncarcerationPeriods person_incarceration_events = ( person_entities | 'Classify Incarceration Events' >> beam.ParDo( pipeline.ClassifyIncarcerationEvents(), AsDict(person_id_to_county_kv))) person_metadata = ( persons | "Build the person_metadata dictionary" >> beam.ParDo( BuildPersonMetadata(), AsList(state_race_ethnicity_population_counts))) person_incarceration_events_with_metadata = ( { 'person_events': person_incarceration_events, 'person_metadata': person_metadata } | 'Group IncarcerationEvents with person-level metadata' >> beam.CoGroupByKey() | 'Organize StatePerson, PersonMetadata and IncarcerationEvents for calculations' >> beam.ParDo(ExtractPersonEventsMetadata())) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get IncarcerationMetrics incarceration_metrics = ( person_incarceration_events_with_metadata | 'Get Incarceration Metrics' >> # type: ignore pipeline.GetIncarcerationMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that( incarceration_metrics, AssertMatchers.validate_metric_type(allow_empty=allow_empty), 'Assert that all metrics are of the expected type.') assert_that( incarceration_metrics, AssertMatchers.validate_pipeline_test(expected_metric_types), 'Assert the type of metrics produced are expected') test_pipeline.run() def build_incarceration_pipeline_data_dict_no_incarceration( self, fake_person_id: int): """Builds a data_dict for a run of the pipeline where the person has no incarceration.""" fake_person_1 = schema.StatePerson( state_code='US_XX', person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) fake_person_id_2 = 6789 fake_person_2 = schema.StatePerson( state_code='US_XX', person_id=fake_person_id_2, gender=Gender.FEMALE, birthdate=date(1990, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [ normalized_database_base_dict(fake_person_1), normalized_database_base_dict(fake_person_2) ] sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) incarceration_period = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='US_XX', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[incarceration_period], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(incarceration_period) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': incarceration_period.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, } data_dict.update(data_dict_overrides) return data_dict def testIncarcerationPipelineNoIncarceration(self): """Tests the incarceration pipeline when a person doesn't have any incarceration periods.""" fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict_no_incarceration( fake_person_id) dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(fake_person_id, _STATE_CODE, dataset, expected_metric_types=set(), allow_empty=True)
class TestViolationPipeline(unittest.TestCase): """Tests the entire violation pipeline.""" def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.fake_bq_sink_factory = FakeWriteToBigQueryFactory( FakeWriteToBigQuery) self.violation_delegate_patcher = mock.patch( "recidiviz.calculator.pipeline.violation.identifier.get_state_specific_violation_delegate" ) self.mock_violation_delegate = self.violation_delegate_patcher.start() self.mock_violation_delegate.return_value = UsXxViolationDelegate() def tearDown(self) -> None: self.violation_delegate_patcher.stop() @staticmethod def build_data_dict( fake_person_id: int, fake_supervision_violation_id: int) -> Dict[str, List[Any]]: """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.FEMALE, birthdate=date(1985, 2, 1), ) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.ASIAN, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.AMERICAN_INDIAN_ALASKAN_NATIVE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code="US_XX", ethnicity=Ethnicity.NOT_HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) violation_type = schema.StateSupervisionViolationTypeEntry( state_code="US_XX", violation_type=StateSupervisionViolationType.FELONY, person_id=fake_person_id, ) incomplete_response = schema.StateSupervisionViolationResponse( state_code="US_XX", supervision_violation_response_id=1234, response_type=entities.StateSupervisionViolationResponseType. VIOLATION_REPORT, response_date=date(2021, 1, 4), is_draft=False, person_id=fake_person_id, ) violation = schema.StateSupervisionViolation( state_code="US_XX", supervision_violation_id=fake_supervision_violation_id, violation_date=date(2021, 1, 1), is_violent=False, is_sex_offense=False, supervision_violation_types=[violation_type], supervision_violation_responses=[incomplete_response], person_id=fake_person_id, ) incomplete_response.supervision_violation_id = fake_supervision_violation_id violation_type.supervision_violation_id = fake_supervision_violation_id violation_decision = schema.StateSupervisionViolationResponseDecisionEntry( state_code="US_XX", decision=StateSupervisionViolationResponseDecision. SHOCK_INCARCERATION, person_id=fake_person_id, supervision_violation_response_decision_entry_id=234, supervision_violation_response_id=1234, ) complete_violation_response = schema.StateSupervisionViolationResponse( state_code="US_XX", supervision_violation_response_id=1234, response_type=entities.StateSupervisionViolationResponseType. VIOLATION_REPORT, response_date=date(2021, 1, 4), is_draft=False, supervision_violation_response_decisions=[violation_decision], person_id=fake_person_id, ) complete_violation_response.supervision_violation_id = ( fake_supervision_violation_id) violations_data = [normalized_database_base_dict(violation)] violation_responses_data = [ normalized_database_base_dict(complete_violation_response) ] violation_types_data = [normalized_database_base_dict(violation_type)] violation_decisions_data = [ normalized_database_base_dict(violation_decision) ] state_race_ethnicity_population_count_data = [{ "state_code": "US_XX", "race_or_ethnicity": "ASIAN", "population_count": 1, "representation_priority": 1, }] data_dict: Dict[str, List[Any]] = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolation.__tablename__: violations_data, schema.StateSupervisionViolationResponse.__tablename__: violation_responses_data, schema.StateSupervisionViolationTypeEntry.__tablename__: violation_types_data, schema.StateSupervisionViolatedConditionEntry.__tablename__: [], schema.StateSupervisionViolationResponseDecisionEntry.__tablename__: violation_decisions_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateAssessment.__tablename__: [], schema.StateProgramAssignment.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, } return data_dict def run_test_pipeline( self, dataset: str, data_dict: DataTablesDict, expected_metric_types: Set[ViolationMetricType], unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None, ) -> None: """Runs a test version of the violation pipeline.""" read_from_bq_constructor = ( self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)) write_to_bq_constructor = ( self.fake_bq_sink_factory.create_fake_bq_sink_constructor( dataset, expected_output_metric_types=expected_metric_types)) run_test_pipeline( pipeline=ViolationPipeline(), state_code="US_XX", dataset=dataset, read_from_bq_constructor=read_from_bq_constructor, write_to_bq_constructor=write_to_bq_constructor, unifying_id_field_filter_set=unifying_id_field_filter_set, metric_types_filter=metric_types_filter, ) def testViolationPipeline(self) -> None: """Tests the violaitons pipeline.""" data_dict = self.build_data_dict(fake_person_id=12345, fake_supervision_violation_id=23456) dataset = "recidiviz-123.state" self.run_test_pipeline( dataset, data_dict, expected_metric_types={ViolationMetricType.VIOLATION}) def testViolationPipelineWithFilterSet(self) -> None: """Tests the violation pipeline with a proper filter set.""" data_dict = self.build_data_dict(fake_person_id=12345, fake_supervision_violation_id=23456) dataset = "recidiviz-123.state" self.run_test_pipeline( dataset, data_dict, expected_metric_types={ViolationMetricType.VIOLATION}, unifying_id_field_filter_set={12345}, ) def testViolationPipelineWithNoViolations(self) -> None: """Tests the violation pipeline when a person does not have any violations.""" data_dict = self.build_data_dict(fake_person_id=12345, fake_supervision_violation_id=23456) data_dict[schema.StateSupervisionViolation.__tablename__] = [] data_dict[schema.StateSupervisionViolationResponse.__tablename__] = [] data_dict[schema.StateSupervisionViolationTypeEntry.__tablename__] = [] data_dict[schema.StateSupervisionViolationResponseDecisionEntry. __tablename__] = [] dataset = "recidiviz-123.state" self.run_test_pipeline( dataset, data_dict, expected_metric_types=set(), )
def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.fake_bq_sink_factory = FakeWriteToBigQueryFactory( FakeWriteToBigQuery)
class TestIncarcerationPipeline(unittest.TestCase): """Tests the entire incarceration pipeline.""" def setUp(self) -> None: self.fake_bq_source_factory = FakeReadFromBigQueryFactory() self.fake_bq_sink_factory = FakeWriteToBigQueryFactory( FakeWriteToBigQuery) @staticmethod def _default_data_dict(): return { schema.StatePerson.__tablename__: [], schema.StatePersonRace.__tablename__: [], schema.StatePersonEthnicity.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], schema.StateIncarcerationSentence.__tablename__: [], schema.StateSupervisionSentence.__tablename__: [], schema.StateIncarcerationPeriod.__tablename__: [], schema.state_incarceration_sentence_incarceration_period_association_table.name: [], schema.state_supervision_sentence_incarceration_period_association_table.name: [], schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateAssessment.__tablename__: [], schema.StateProgramAssignment.__tablename__: [], schema.StateFine.__tablename__: [], schema.StateCharge.__tablename__: [], schema.StateSupervisionPeriod.__tablename__: [], schema.StateEarlyDischarge.__tablename__: [], schema.state_charge_incarceration_sentence_association_table.name: [], schema.state_charge_supervision_sentence_association_table.name: [], schema.state_incarceration_sentence_supervision_period_association_table.name: [], schema.state_supervision_sentence_supervision_period_association_table.name: [], } def build_incarceration_pipeline_data_dict(self, fake_person_id: int, state_code: str = "US_XX"): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code=state_code, person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace( person_race_id=111, state_code=state_code, race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code=state_code, race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code=state_code, ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup( sentence_group_id=98765, state_code=state_code, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, person_id=fake_person_id, ) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id, ) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code=state_code, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration, ], person_id=fake_person_id, ) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code=state_code, sentence_group_id=sentence_group.sentence_group_id, person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group.supervision_sentences = [supervision_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration), ] state_incarceration_sentence_incarceration_period_association = [ { "incarceration_period_id": initial_incarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, { "incarceration_period_id": first_reincarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, { "incarceration_period_id": subsequent_reincarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, ] fake_person_id_to_county_query_result = [{ "state_code": state_code, "person_id": fake_person_id, "county_of_residence": _COUNTY_OF_RESIDENCE, }] us_mo_sentence_status_data: List[Dict[str, Any]] = [{ "state_code": "US_MO", "person_id": fake_person_id, "sentence_external_id": "XXX", "sentence_status_external_id": "YYY", "status_code": "ZZZ", "status_date": "not_a_date", "status_description": "XYZ", }] incarceration_period_judicial_district_association_data = [{ "state_code": state_code, "person_id": fake_person_id, "incarceration_period_id": 123, "judicial_district_code": "NW", }] state_race_ethnicity_population_count_data = [{ "state_code": state_code, "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, }] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, "persons_to_recent_county_of_residence": fake_person_id_to_county_query_result, "incarceration_period_judicial_district_association": incarceration_period_judicial_district_association_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, "us_mo_sentence_statuses": us_mo_sentence_status_data, } data_dict.update(data_dict_overrides) return data_dict @freeze_time("2015-01-31") def testIncarcerationPipeline(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = "recidiviz-123.state" self.run_test_pipeline( state_code=_STATE_CODE, dataset=dataset, data_dict=data_dict, expected_metric_types=ALL_METRIC_TYPES_SET, ) @freeze_time("2015-01-31") def testIncarcerationPipelineFilterMetrics(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = "recidiviz-123.state" expected_metric_types = { IncarcerationMetricType.INCARCERATION_ADMISSION } metric_types_filter = { IncarcerationMetricType.INCARCERATION_ADMISSION.value } self.run_test_pipeline( state_code=_STATE_CODE, dataset=dataset, data_dict=data_dict, expected_metric_types=expected_metric_types, metric_types_filter=metric_types_filter, ) def testIncarcerationPipelineUsMo(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id, state_code="US_MO") dataset = "recidiviz-123.state" self.run_test_pipeline( state_code="US_MO", dataset=dataset, data_dict=data_dict, expected_metric_types=ALL_METRIC_TYPES_SET, ) def testIncarcerationPipelineWithFilterSet(self): fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict( fake_person_id=fake_person_id) dataset = "recidivz-staging.state" self.run_test_pipeline( state_code=_STATE_CODE, dataset=dataset, data_dict=data_dict, expected_metric_types=ALL_METRIC_TYPES_SET, unifying_id_field_filter_set={fake_person_id}, ) def run_test_pipeline( self, state_code: str, dataset: str, data_dict: Dict[str, List[Dict]], expected_metric_types: Set[IncarcerationMetricType], unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None, ) -> None: """Runs a test version of the supervision pipeline.""" read_from_bq_constructor = ( self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)) write_to_bq_constructor = ( self.fake_bq_sink_factory.create_fake_bq_sink_constructor( dataset, expected_output_metric_types=expected_metric_types, )) with patch( f"{INCARCERATION_PIPELINE_PACKAGE_NAME}.ReadFromBigQuery", read_from_bq_constructor, ): run_test_pipeline( pipeline_module=pipeline, state_code=state_code, dataset=dataset, read_from_bq_constructor=read_from_bq_constructor, write_to_bq_constructor=write_to_bq_constructor, unifying_id_field_filter_set=unifying_id_field_filter_set, metric_types_filter=metric_types_filter, ) def build_incarceration_pipeline_data_dict_no_incarceration( self, fake_person_id: int): """Builds a data_dict for a run of the pipeline where the person has no incarceration.""" fake_person_1 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) fake_person_id_2 = 6789 fake_person_2 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id_2, gender=Gender.FEMALE, birthdate=date(1990, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [ normalized_database_base_dict(fake_person_1), normalized_database_base_dict(fake_person_2), ] sentence_group = schema.StateSentenceGroup( sentence_group_id=111, state_code="US_XX", status=StateSentenceStatus.PRESENT_WITHOUT_INFO, person_id=fake_person_id, ) incarceration_period = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code="US_XX", county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code="US_XX", sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[incarceration_period], person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code="US_XX", person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(incarceration_period) ] state_incarceration_sentence_incarceration_period_association = [ { "incarceration_period_id": incarceration_period.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, ] fake_person_id_to_county_query_result = [{ "state_code": "US_XX", "person_id": fake_person_id, "county_of_residence": _COUNTY_OF_RESIDENCE, }] us_mo_sentence_status_data: List[Dict[str, Any]] = [{ "state_code": "US_MO", "person_id": fake_person_id, "sentence_external_id": "XXX", "sentence_status_external_id": "YYY", "status_code": "ZZZ", "status_date": "not_a_date", "status_description": "XYZ", }] incarceration_period_judicial_district_association_data = [{ "state_code": "US_XX", "person_id": fake_person_id, "incarceration_period_id": 123, "judicial_district_code": "NW", }] state_race_ethnicity_population_count_data = [{ "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, }] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, "persons_to_recent_county_of_residence": fake_person_id_to_county_query_result, "incarceration_period_judicial_district_association": incarceration_period_judicial_district_association_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, "us_mo_sentence_statuses": us_mo_sentence_status_data, } data_dict.update(data_dict_overrides) return data_dict def testIncarcerationPipelineNoIncarceration(self): """Tests the incarceration pipeline when a person doesn't have any incarceration periods.""" fake_person_id = 12345 data_dict = self.build_incarceration_pipeline_data_dict_no_incarceration( fake_person_id) dataset = "recidiviz-123.state" self.run_test_pipeline(_STATE_CODE, dataset, data_dict, expected_metric_types=set())