def test_getExternalIdsOfCls(self): supervision_sentence = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID) supervision_sentence_2 = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID_2) supervision_sentence_3 = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID_3) sentence_group = schema.StateSentenceGroup(external_id=_EXTERNAL_ID, supervision_sentences=[ supervision_sentence, supervision_sentence_2 ]) sentence_group_2 = schema.StateSentenceGroup( external_id=_EXTERNAL_ID_2, supervision_sentences=[ supervision_sentence_2, supervision_sentence_3 ]) external_id = schema.StatePersonExternalId(external_id=_EXTERNAL_ID) person = schema.StatePerson( external_ids=[external_id], sentence_groups=[sentence_group, sentence_group_2]) self.assertCountEqual([_EXTERNAL_ID, _EXTERNAL_ID_2, _EXTERNAL_ID_3], get_external_ids_of_cls( [person], schema.StateSupervisionSentence)) self.assertCountEqual([_EXTERNAL_ID, _EXTERNAL_ID_2], get_external_ids_of_cls( [person], schema.StateSentenceGroup)) self.assertCountEqual([_EXTERNAL_ID], get_external_ids_of_cls([person], schema.StatePerson))
def _move_supervision_periods_onto_sentences_for_sentence_group( sentence_group: schema.StateSentenceGroup): """Looks at all SupervisionPeriods in the provided |sentence_group|, and attempts to match them to any corresponding sentences, based on date. """ sentences = sentence_group.supervision_sentences + sentence_group.incarceration_sentences # Get all supervision periods from sentence group supervision_periods = get_all_entities_of_cls([sentence_group], schema.StateSupervisionPeriod) # Clear non-placeholder links from sentence to supervision period. We will # re-add/update these relationships below. for sentence in sentences: placeholder_supervision_periods = [sp for sp in sentence.supervision_periods if is_placeholder(sp)] sentence.supervision_periods = placeholder_supervision_periods unmatched_sps = [] non_placeholder_sentences = [s for s in sentences if not is_placeholder(s)] non_placeholder_supervision_periods = [sp for sp in supervision_periods if not is_placeholder(sp)] # Match SVs to non_placeholder_periods by date. for sp in non_placeholder_supervision_periods: matched = False sp_start_date = sp.start_date if sp.start_date else datetime.date.min sp_termination_date = sp.termination_date if sp.termination_date else datetime.date.max for s in non_placeholder_sentences: if not s.start_date: continue s_completion_date = s.completion_date if s.completion_date else datetime.date.max if (s.start_date <= sp_start_date < s_completion_date) \ or (s.start_date <= sp_termination_date < s_completion_date): matched = True s.supervision_periods.append(sp) # Unmatched SPs will be re-added to a placeholder sentence at the end. if not matched: unmatched_sps.append(sp) # Add unmatched supervision periods to a placeholder sentence if unmatched_sps: placeholder_sentences = [s for s in sentences if is_placeholder(s)] if not placeholder_sentences: # We may hit this case if an entity that has already been committed to the DB has a date updated in a later # run such that the dates of the existing sentences no longer line up with one of the existing supervision # periods. logging.info( 'No placeholder sentences exist on sentence group [%s]([%s]), creating a new placeholder sentence.', sentence_group.external_id, sentence_group.sentence_group_id) new_placeholder_sentence = schema.StateSupervisionSentence( state_code=sentence_group.state_code, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, person=sentence_group.person) placeholder_sentences.append(new_placeholder_sentence) sentence_group.supervision_sentences.append(new_placeholder_sentence) placeholder_sentences[0].supervision_periods = unmatched_sps
def generate_supervision_sentence(person, **kwargs) -> schema.StateSupervisionSentence: args = { "state_code": _STATE_CODE, "status": StateSentenceStatus.PRESENT_WITHOUT_INFO.value, } args.update(kwargs) return schema.StateSupervisionSentence(person=person, **args)
def test_removeSeosFromViolationIds_unexpectedFormat(self): with pytest.raises(ValueError): sv = schema.StateSupervisionViolation(external_id='bad_id') sp = schema.StateSupervisionPeriod( supervision_violation_entries=[sv]) ss = schema.StateSupervisionSentence(supervision_periods=[sp]) sg = schema.StateSentenceGroup(supervision_sentences=[ss]) p = schema.StatePerson(sentence_groups=[sg]) remove_suffix_from_violation_ids([p])
def test_getTotalEntitiesOfCls(self): supervision_sentence = schema.StateSupervisionSentence() supervision_sentence_2 = schema.StateSupervisionSentence() supervision_sentence_3 = schema.StateSupervisionSentence() sentence_group = schema.StateSentenceGroup(supervision_sentences=[ supervision_sentence, supervision_sentence_2 ]) sentence_group_2 = schema.StateSentenceGroup(supervision_sentences=[ supervision_sentence_2, supervision_sentence_3 ]) person = schema.StatePerson( sentence_groups=[sentence_group, sentence_group_2]) self.assertEqual( 3, get_total_entities_of_cls([person], schema.StateSupervisionSentence)) self.assertEqual( 2, get_total_entities_of_cls([person], schema.StateSentenceGroup)) self.assertEqual( 1, get_total_entities_of_cls([person], schema.StatePerson))
def generate_test_supervision_sentence( person_id, charges, supervision_periods) \ -> state_schema.StateSupervisionSentence: instance = state_schema.StateSupervisionSentence( supervision_sentence_id=1111, status=StateSentenceStatus.SERVING.value, state_code='us_ca', person_id=person_id, charges=charges, supervision_periods=supervision_periods, ) return instance
def generate_test_supervision_sentence( person_id, charges, supervision_periods, early_discharges=None ) -> state_schema.StateSupervisionSentence: instance = state_schema.StateSupervisionSentence( supervision_sentence_id=1111, status=StateSentenceStatus.SERVING.value, state_code="us_ca", person_id=person_id, charges=charges, supervision_periods=supervision_periods, early_discharges=(early_discharges if early_discharges else []), ) return instance
def test_readPersons_unexpectedRoot_raises(self): ingested_supervision_sentence = \ schema.StateSupervisionSentence( external_id=_EXTERNAL_ID) ingested_sentence_group = schema.StateSentenceGroup( supervision_sentences=[ingested_supervision_sentence]) ingested_person = schema.StatePerson( sentence_groups=[ingested_sentence_group]) with pytest.raises(ValueError): session = SessionFactory.for_schema_base(StateBase) read_persons_by_root_entity_cls( session, 'us_nd', [ingested_person], allowed_root_entity_classes=[schema.StateSentenceGroup])
def test_removeSeosFromViolationIds_unexpectedFormat(self) -> None: with pytest.raises(ValueError) as e: sv = schema.StateSupervisionViolation(external_id="bad_id") sp = schema.StateSupervisionPeriod( supervision_violation_entries=[sv], status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) ss = schema.StateSupervisionSentence(supervision_periods=[sp]) sg = schema.StateSentenceGroup(supervision_sentences=[ss]) p = schema.StatePerson(sentence_groups=[sg]) remove_suffix_from_violation_ids([p]) self.assertEqual( str(e.value), "Unexpected id format [bad_id] for [StateSupervisionViolation(external_id=bad_id)]", )
def test_readPersons_unexpectedRoot_raises(self) -> None: ingested_supervision_sentence = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID) ingested_sentence_group = schema.StateSentenceGroup( supervision_sentences=[ingested_supervision_sentence]) ingested_person = schema.StatePerson( sentence_groups=[ingested_sentence_group]) with pytest.raises(ValueError): with SessionFactory.using_database(self.database_key, autocommit=False) as session: read_persons_by_root_entity_cls( session, "us_nd", [ingested_person], allowed_root_entity_classes=[schema.StateSentenceGroup], )
def test_removeSeosFromViolationIds(self): svr = schema.StateSupervisionViolationResponse( external_id='DOC-CYC-VSN1-SEO-FSO') sv = schema.StateSupervisionViolation( external_id='DOC-CYC-VSN1-SEO-FSO', supervision_violation_responses=[svr]) svr_2 = schema.StateSupervisionViolationResponse( external_id='DOC-CYC-VSN1-SEO-FSO') sv_2 = schema.StateSupervisionViolation( external_id='DOC-CYC-VSN1-SEO-FSO', supervision_violation_responses=[svr_2]) sp = schema.StateSupervisionPeriod( supervision_violation_entries=[sv, sv_2]) ss = schema.StateSupervisionSentence(supervision_periods=[sp]) sg = schema.StateSentenceGroup(supervision_sentences=[ss]) p = schema.StatePerson(sentence_groups=[sg]) expected_svr = StateSupervisionViolationResponse.new_with_defaults( external_id='DOC-CYC-VSN1') expected_sv = StateSupervisionViolation.new_with_defaults( external_id='DOC-CYC-VSN1', supervision_violation_responses=[expected_svr]) expected_svr_2 = attr.evolve(expected_svr) expected_sv_2 = attr.evolve( expected_sv, supervision_violation_responses=[expected_svr_2]) expected_sp = StateSupervisionPeriod.new_with_defaults( supervision_violation_entries=[expected_sv, expected_sv_2]) expected_ss = StateSupervisionSentence.new_with_defaults( supervision_periods=[expected_sp]) expected_sg = StateSentenceGroup.new_with_defaults( supervision_sentences=[expected_ss]) expected_p = StatePerson.new_with_defaults( sentence_groups=[expected_sg]) remove_suffix_from_violation_ids([p]) self.assertEqual(expected_p, self.to_entity(p))
def build_incarceration_pipeline_data_dict(self, fake_person_id: int, state_code: str = "US_XX"): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code=state_code, person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace( person_race_id=111, state_code=state_code, race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code=state_code, race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code=state_code, ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup( sentence_group_id=98765, state_code=state_code, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, person_id=fake_person_id, ) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code=state_code, county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id, ) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code=state_code, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration, ], person_id=fake_person_id, ) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code=state_code, sentence_group_id=sentence_group.sentence_group_id, person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group.supervision_sentences = [supervision_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration), ] state_incarceration_sentence_incarceration_period_association = [ { "incarceration_period_id": initial_incarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, { "incarceration_period_id": first_reincarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, { "incarceration_period_id": subsequent_reincarceration.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, ] fake_person_id_to_county_query_result = [{ "state_code": state_code, "person_id": fake_person_id, "county_of_residence": _COUNTY_OF_RESIDENCE, }] us_mo_sentence_status_data: List[Dict[str, Any]] = [{ "state_code": "US_MO", "person_id": fake_person_id, "sentence_external_id": "XXX", "sentence_status_external_id": "YYY", "status_code": "ZZZ", "status_date": "not_a_date", "status_description": "XYZ", }] incarceration_period_judicial_district_association_data = [{ "state_code": state_code, "person_id": fake_person_id, "incarceration_period_id": 123, "judicial_district_code": "NW", }] state_race_ethnicity_population_count_data = [{ "state_code": state_code, "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, }] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, "persons_to_recent_county_of_residence": fake_person_id_to_county_query_result, "incarceration_period_judicial_district_association": incarceration_period_judicial_district_association_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, "us_mo_sentence_statuses": us_mo_sentence_status_data, } data_dict.update(data_dict_overrides) return data_dict
def build_incarceration_pipeline_data_dict_no_incarceration( self, fake_person_id: int): """Builds a data_dict for a run of the pipeline where the person has no incarceration.""" fake_person_1 = schema.StatePerson( state_code='US_XX', person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) fake_person_id_2 = 6789 fake_person_2 = schema.StatePerson( state_code='US_XX', person_id=fake_person_id_2, gender=Gender.FEMALE, birthdate=date(1990, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [ normalized_database_base_dict(fake_person_1), normalized_database_base_dict(fake_person_2) ] sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) incarceration_period = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='US_XX', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[incarceration_period], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(incarceration_period) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': incarceration_period.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, } data_dict.update(data_dict_overrides) return data_dict
def build_incarceration_pipeline_data_dict(self, fake_person_id: int, state_code: str = 'US_XX'): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code=state_code, person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code=state_code, race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code=state_code, race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code=state_code, ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, incarceration_type=StateIncarcerationType.STATE_PRISON, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code=state_code, county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code=state_code, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration ], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code=state_code, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': initial_incarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': first_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': subsequent_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, } data_dict.update(data_dict_overrides) return data_dict
def build_incarceration_pipeline_data_dict_no_incarceration( self, fake_person_id: int): """Builds a data_dict for a run of the pipeline where the person has no incarceration.""" fake_person_1 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) fake_person_id_2 = 6789 fake_person_2 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id_2, gender=Gender.FEMALE, birthdate=date(1990, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [ normalized_database_base_dict(fake_person_1), normalized_database_base_dict(fake_person_2), ] sentence_group = schema.StateSentenceGroup( sentence_group_id=111, state_code="US_XX", status=StateSentenceStatus.PRESENT_WITHOUT_INFO, person_id=fake_person_id, ) incarceration_period = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code="US_XX", county_code="124", facility="San Quentin", facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, state_code="US_XX", sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[incarceration_period], person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, state_code="US_XX", person_id=fake_person_id, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(incarceration_period) ] state_incarceration_sentence_incarceration_period_association = [ { "incarceration_period_id": incarceration_period.incarceration_period_id, "incarceration_sentence_id": incarceration_sentence.incarceration_sentence_id, }, ] fake_person_id_to_county_query_result = [{ "state_code": "US_XX", "person_id": fake_person_id, "county_of_residence": _COUNTY_OF_RESIDENCE, }] us_mo_sentence_status_data: List[Dict[str, Any]] = [{ "state_code": "US_MO", "person_id": fake_person_id, "sentence_external_id": "XXX", "sentence_status_external_id": "YYY", "status_code": "ZZZ", "status_date": "not_a_date", "status_description": "XYZ", }] incarceration_period_judicial_district_association_data = [{ "state_code": "US_XX", "person_id": fake_person_id, "incarceration_period_id": 123, "judicial_district_code": "NW", }] state_race_ethnicity_population_count_data = [{ "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, }] data_dict = self._default_data_dict() data_dict_overrides = { schema.StatePerson.__tablename__: persons_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, "persons_to_recent_county_of_residence": fake_person_id_to_county_query_result, "incarceration_period_judicial_district_association": incarceration_period_judicial_district_association_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, "us_mo_sentence_statuses": us_mo_sentence_status_data, } data_dict.update(data_dict_overrides) return data_dict
def testIncarcerationPipeline(self): fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration ], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': initial_incarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': first_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': subsequent_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, schema.state_supervision_sentence_incarceration_period_association_table.name: [{}] } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateSentenceGroups sentence_groups = ( test_pipeline | 'Load StateSentencegroups' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSentenceGroup, root_entity_class=entities.StateSentenceGroup, unifying_id_field='person_id', build_related_entities=True)) # Get StateIncarcerationSentences incarceration_sentences = ( test_pipeline | 'Load StateIncarcerationSentences' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateIncarcerationSentence, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field='person_id', build_related_entities=True)) # Get StateSupervisionSentences supervision_sentences = ( test_pipeline | 'Load StateSupervisionSentences' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionSentence, root_entity_class=entities.StateSupervisionSentence, unifying_id_field='person_id', build_related_entities=True)) sentences_and_sentence_groups = ( { 'sentence_groups': sentence_groups, 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences } | 'Group sentences to sentence groups' >> beam.CoGroupByKey()) sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | 'Set hydrated sentences on sentence groups' >> beam.ParDo( SetSentencesOnSentenceGroup())) # Group each StatePerson with their related entities person_and_sentence_groups = ( { 'person': persons, 'sentence_groups': sentence_groups_with_hydrated_sentences } | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey()) # Identify IncarcerationEvents events from the StatePerson's # StateIncarcerationPeriods fake_person_id_to_county_query_result = [{ 'person_id': fake_person_id, 'county_of_residence': _COUNTY_OF_RESIDENCE }] person_id_to_county_kv = ( test_pipeline | "Read person id to county associations from BigQuery" >> beam.Create(fake_person_id_to_county_query_result) | "Convert to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) person_events = (person_and_sentence_groups | 'Classify Incarceration Events' >> beam.ParDo( pipeline.ClassifyIncarcerationEvents(), AsDict(person_id_to_county_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get IncarcerationMetrics incarceration_metrics = ( person_events | 'Get Incarceration Metrics' >> pipeline.GetIncarcerationMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(incarceration_metrics, AssertMatchers.validate_metric_type()) test_pipeline.run()
def test_removeSeosFromViolationIds(self) -> None: svr = schema.StateSupervisionViolationResponse( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1-SEO-FSO") sv = schema.StateSupervisionViolation( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1-SEO-FSO", supervision_violation_responses=[svr], ) svr_2 = schema.StateSupervisionViolationResponse( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1-SEO-FSO") sv_2 = schema.StateSupervisionViolation( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1-SEO-FSO", supervision_violation_responses=[svr_2], ) sp = schema.StateSupervisionPeriod( state_code=_STATE_CODE, supervision_violation_entries=[sv, sv_2], status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) ss = schema.StateSupervisionSentence( state_code=_STATE_CODE, supervision_periods=[sp], status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) sg = schema.StateSentenceGroup( state_code=_STATE_CODE, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, supervision_sentences=[ss], ) p = schema.StatePerson(state_code=_STATE_CODE, sentence_groups=[sg]) expected_svr = StateSupervisionViolationResponse.new_with_defaults( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1") expected_sv = StateSupervisionViolation.new_with_defaults( state_code=_STATE_CODE, external_id="DOC-CYC-VSN1", supervision_violation_responses=[expected_svr], ) expected_svr_2 = attr.evolve(expected_svr) expected_sv_2 = attr.evolve( expected_sv, supervision_violation_responses=[expected_svr_2]) expected_sp = StateSupervisionPeriod.new_with_defaults( state_code=_STATE_CODE, supervision_violation_entries=[expected_sv, expected_sv_2], status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) expected_ss = StateSupervisionSentence.new_with_defaults( state_code=_STATE_CODE, supervision_periods=[expected_sp], status=StateSentenceStatus.PRESENT_WITHOUT_INFO, ) expected_sg = StateSentenceGroup.new_with_defaults( state_code=_STATE_CODE, status=StateSentenceStatus.PRESENT_WITHOUT_INFO, supervision_sentences=[expected_ss], ) expected_p = StatePerson.new_with_defaults( state_code=_STATE_CODE, sentence_groups=[expected_sg]) remove_suffix_from_violation_ids([p]) self.assertEqual(expected_p, self.to_entity(p))