def test_readPeopleByRootExternalIds_entireTreeReturnedWithOneMatch( self) -> None: # Arrange person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) external_id_match = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) external_id_no_match = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [external_id_match, external_id_no_match] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person] self.assertCountEqual(people, expected_people)
def test_add_fine_conflicting_external_id_same_session(self) -> None: # Arrange db_person = schema.StatePerson(full_name=self.FULL_NAME, state_code=self.state_code) db_fine = schema.StateFine( person=db_person, status=StateFineStatus.EXTERNAL_UNKNOWN.value, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, county_code=self.COUNTY_CODE, ) db_sentence_group = schema.StateSentenceGroup( status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_1, state_code=self.state_code, county_code=self.COUNTY_CODE, fines=[db_fine], ) db_external_id = schema.StatePersonExternalId( state_code=self.state_code, external_id=self.EXTERNAL_ID_1, id_type=self.ID_TYPE_1, ) db_person.sentence_groups = [db_sentence_group] db_person.external_ids = [db_external_id] db_person_dupe = schema.StatePerson(full_name=self.FULL_NAME, state_code=self.state_code) db_fine_dupe = schema.StateFine( person=db_person_dupe, status=StateFineStatus.EXTERNAL_UNKNOWN.value, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, county_code=self.COUNTY_CODE, ) db_sentence_group_dupe = schema.StateSentenceGroup( status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_2, state_code=self.state_code, county_code=self.COUNTY_CODE, fines=[db_fine_dupe], ) db_external_id_dupe = schema.StatePersonExternalId( state_code=self.state_code, external_id=self.EXTERNAL_ID_2, id_type=self.ID_TYPE_1, ) db_person_dupe.sentence_groups = [db_sentence_group_dupe] db_person_dupe.external_ids = [db_external_id_dupe] # Act session = SessionFactory.for_schema_base(StateBase) session.add(db_fine) session.add(db_fine_dupe) session.flush() with self.assertRaises(sqlalchemy.exc.IntegrityError): session.commit()
def test_readMultipleIngestedPeopleMatchSamePerson(self) -> None: # Arrange person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person_external_id2 = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [person_external_id, person_external_id2] with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(person) session.commit() ingested_person1 = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE, external_ids=[ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ) ], ) ingested_person2 = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE, external_ids=[ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ) ], ) # Act people = dao.read_people_by_external_ids( session, _REGION, [ingested_person1, ingested_person2]) # Assert expected_people = [person] self.assertCountEqual(people, expected_people)
def test_readPersonIdsMatchMultiplePeople(self): # Arrange person1 = schema.StatePerson(person_id=1) person1_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person1, ) person1.external_ids = [person1_external_id] person2 = schema.StatePerson(person_id=2) person2_external_id = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person2, ) person2.external_ids = [person2_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person1) session.add(person2) session.commit() ingested_person = entities.StatePerson.new_with_defaults() ingested_person.external_ids = \ [ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE), entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE) ] # Act people = dao.read_people_by_external_ids(session, _REGION, [ingested_person]) # Assert expected_people = [ converter.convert_schema_object_to_entity(person1), converter.convert_schema_object_to_entity(person2) ] self.assertCountEqual(people, expected_people)
def test_readPersonMultipleIdsMatch(self) -> None: # Arrange person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person_external_id2 = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [person_external_id, person_external_id2] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() ingested_person = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE) ingested_person.external_ids = [ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ), entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ), ] # Act people = dao.read_people_by_external_ids(session, _REGION, [ingested_person]) # Assert expected_people = [person] self.assertCountEqual(people, expected_people)
def generate_external_id(**kwargs) -> schema.StatePersonExternalId: args = { "state_code": _STATE_CODE, "id_type": _ID_TYPE, } args.update(kwargs) return schema.StatePersonExternalId(**args)
def test_readPeopleByRootExternalIds(self) -> None: # Arrange person_no_match = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person_match_external_id = schema.StatePerson(person_id=2, state_code=_STATE_CODE) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person_match_external_id, ) person_match_external_id.external_ids = [person_external_id] with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(person_no_match) session.add(person_match_external_id) session.commit() # Act people = dao.read_people_by_cls_external_ids( session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person_match_external_id] self.assertCountEqual(people, expected_people)
def test_readPeopleByExternalId(self): # Arrange person_no_match = schema.StatePerson(person_id=1) person_match_external_id = schema.StatePerson(person_id=2) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person_match_external_id, ) person_match_external_id.external_ids = [person_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person_no_match) session.add(person_match_external_id) session.commit() ingested_person = entities.StatePerson.new_with_defaults() ingested_person.external_ids = \ [entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=ingested_person, )] # Act people = dao.read_people_by_external_ids(session, _REGION, [ingested_person]) # Assert expected_people = [person_match_external_id] self.assertCountEqual(people, expected_people)
def test_getExternalIdsOfCls(self): supervision_sentence = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID) supervision_sentence_2 = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID_2) supervision_sentence_3 = schema.StateSupervisionSentence( external_id=_EXTERNAL_ID_3) sentence_group = schema.StateSentenceGroup(external_id=_EXTERNAL_ID, supervision_sentences=[ supervision_sentence, supervision_sentence_2 ]) sentence_group_2 = schema.StateSentenceGroup( external_id=_EXTERNAL_ID_2, supervision_sentences=[ supervision_sentence_2, supervision_sentence_3 ]) external_id = schema.StatePersonExternalId(external_id=_EXTERNAL_ID) person = schema.StatePerson( external_ids=[external_id], sentence_groups=[sentence_group, sentence_group_2]) self.assertCountEqual([_EXTERNAL_ID, _EXTERNAL_ID_2, _EXTERNAL_ID_3], get_external_ids_of_cls( [person], schema.StateSupervisionSentence)) self.assertCountEqual([_EXTERNAL_ID, _EXTERNAL_ID_2], get_external_ids_of_cls( [person], schema.StateSentenceGroup)) self.assertCountEqual([_EXTERNAL_ID], get_external_ids_of_cls([person], schema.StatePerson))
def test_readPlaceholderPeople(self) -> None: placeholder_person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person = schema.StatePerson(person_id=2, state_code=_STATE_CODE) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [person_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(placeholder_person) session.add(person) session.commit() # Act people = dao.read_placeholder_persons(session, _STATE_CODE) # Assert expected_people = [placeholder_person] self.assertCountEqual(people, expected_people)
def test_readPeopleByRootExternalIds(self): # Arrange person_no_match = schema.StatePerson(person_id=1) person_match_external_id = schema.StatePerson(person_id=2) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person_match_external_id, ) person_match_external_id.external_ids = [person_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person_no_match) session.add(person_match_external_id) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person_match_external_id] self.assertCountEqual(people, expected_people)
def test_readPersonsByRootEntityCls(self) -> None: schema_person_with_root_entity = schema.StatePerson( person_id=1, state_code=_STATE_CODE) schema_sentence_group = schema.StateSentenceGroup( sentence_group_id=_ID, external_id=_EXTERNAL_ID, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, ) schema_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=_ID_2, external_id=_EXTERNAL_ID_2, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, ) schema_person_with_root_entity.sentence_groups = [ schema_sentence_group, schema_sentence_group_2, ] placeholder_schema_person = schema.StatePerson(person_id=_ID_2, state_code=_STATE_CODE) schema_person_other_state = schema.StatePerson(person_id=_ID_3, state_code=_STATE_CODE) schema_external_id_other_state = schema.StatePersonExternalId( person_external_id_id=_ID_2, external_id=_ID, id_type=_ID_TYPE, state_code=_STATE_CODE, ) schema_person_other_state.external_ids = [ schema_external_id_other_state ] with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(schema_person_with_root_entity) session.add(placeholder_schema_person) session.add(schema_person_other_state) session.commit() ingested_sentence_group = schema.StateSentenceGroup( state_code=_STATE_CODE, external_id=_EXTERNAL_ID) ingested_person = schema.StatePerson( sentence_groups=[ingested_sentence_group]) expected_people = [ schema_person_with_root_entity, placeholder_schema_person, ] people = read_persons_by_root_entity_cls( session, _STATE_CODE, [ingested_person], allowed_root_entity_classes=[schema.StateSentenceGroup], ) self.assert_schema_object_lists_equal(expected_people, people)
def test_isMatch_statePersonExternalId_type(self): external_id = schema.StatePersonExternalId(person_external_id_id=_ID, state_code=_STATE_CODE, id_type=_ID_TYPE, external_id=_EXTERNAL_ID) external_id_different = schema.StatePersonExternalId( person_external_id_id=None, state_code=_STATE_CODE, id_type=_ID_TYPE, external_id=_EXTERNAL_ID) self.assertTrue( _is_match(ingested_entity=external_id, db_entity=external_id_different)) external_id.id_type = _ID_TYPE_ANOTHER self.assertFalse( _is_match(ingested_entity=external_id, db_entity=external_id_different))
def test_isMatch_statePerson(self): external_id = schema.StatePersonExternalId(state_code=_STATE_CODE, external_id=_EXTERNAL_ID) external_id_same = schema.StatePersonExternalId( state_code=_STATE_CODE, external_id=_EXTERNAL_ID) external_id_different = schema.StatePersonExternalId( state_code=_STATE_CODE, external_id=_EXTERNAL_ID_2) person = schema.StatePerson(full_name='name', external_ids=[external_id]) person_another = schema.StatePerson(full_name='name_2', external_ids=[external_id_same]) self.assertTrue( _is_match(ingested_entity=person, db_entity=person_another)) person_another.external_ids = [external_id_different] self.assertFalse( _is_match(ingested_entity=person, db_entity=person_another))
def test_getExternalIdsOfCls_emptyExternalId_raises(self): sentence_group = schema.StateSentenceGroup(external_id=_EXTERNAL_ID) sentence_group_2 = schema.StateSentenceGroup() external_id = schema.StatePersonExternalId(external_id=_EXTERNAL_ID) person = schema.StatePerson( external_ids=[external_id], sentence_groups=[sentence_group, sentence_group_2]) with pytest.raises(EntityMatchingError): get_external_ids_of_cls([person], schema.StateSentenceGroup)
def test_isPlaceholder_personWithExternalId(self) -> None: sentence_group = schema.StateSentenceGroup( state_code=_STATE_CODE, status=StateSentenceStatus.PRESENT_WITHOUT_INFO) person = schema.StatePerson(state_code=_STATE_CODE, sentence_groups=[sentence_group]) self.assertTrue(is_placeholder(person)) person.external_ids.append( schema.StatePersonExternalId(state_code=_STATE_CODE, external_id=_EXTERNAL_ID, id_type=_ID_TYPE)) self.assertFalse(is_placeholder(person))
def test_readPersonsByRootEntityCls(self): schema_person_with_root_entity = schema.StatePerson( person_id=1, state_code=_STATE_CODE) schema_sentence_group = schema.StateSentenceGroup( sentence_group_id=_ID, external_id=_EXTERNAL_ID, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code='US_ND') schema_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=_ID_2, external_id=_EXTERNAL_ID_2, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code='US_ND') schema_person_with_root_entity.sentence_groups = [ schema_sentence_group, schema_sentence_group_2 ] placeholder_schema_person = schema.StatePerson(person_id=_ID_2, state_code=_STATE_CODE) schema_person_other_state = schema.StatePerson(person_id=_ID_3, state_code=_STATE_CODE) schema_external_id_other_state = schema.StatePersonExternalId( person_external_id_id=_ID_2, external_id=_ID, id_type=_ID_TYPE, state_code=_STATE_CODE) schema_person_other_state.external_ids = [ schema_external_id_other_state ] session = SessionFactory.for_schema_base(StateBase) session.add(schema_person_with_root_entity) session.add(placeholder_schema_person) session.add(schema_person_other_state) session.commit() ingested_sentence_group = schema.StateSentenceGroup( state_code='us_nd', external_id=_EXTERNAL_ID) ingested_person = schema.StatePerson( sentence_groups=[ingested_sentence_group]) expected_people = [ schema_person_with_root_entity, placeholder_schema_person ] people = read_persons_by_root_entity_cls( session, 'us_nd', [ingested_person], allowed_root_entity_classes=[schema.StateSentenceGroup]) self.assert_schema_object_lists_equal(expected_people, people)
def test_add_fine_different_external_id_same_state(self) -> None: # Arrange arrange_session = SessionFactory.for_schema_base(StateBase) db_person = schema.StatePerson(person_id=self._ID_1, full_name=self.FULL_NAME, state_code=self.state_code) db_fine = schema.StateFine( person=db_person, status=StateFineStatus.EXTERNAL_UNKNOWN.value, fine_id=self._ID_1, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, county_code=self.COUNTY_CODE, ) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=self._ID_1, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_1, state_code=self.state_code, county_code=self.COUNTY_CODE, fines=[db_fine], ) db_external_id = schema.StatePersonExternalId( person_external_id_id=self._ID_1, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, id_type=self.ID_TYPE_1, ) db_person.sentence_groups = [db_sentence_group] db_person.external_ids = [db_external_id] arrange_session.add(db_fine) arrange_session.commit() db_person_dupe = schema.StatePerson(person_id=self._ID_2, full_name=self.FULL_NAME, state_code=self.state_code) db_fine_dupe = schema.StateFine( person=db_person_dupe, status=StateFineStatus.EXTERNAL_UNKNOWN.value, fine_id=self._ID_2, state_code=self.state_code, external_id=self.EXTERNAL_ID_2, county_code=self.COUNTY_CODE, ) db_sentence_group_dupe = schema.StateSentenceGroup( sentence_group_id=self._ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_2, state_code=self.state_code, county_code=self.COUNTY_CODE, fines=[db_fine_dupe], ) db_external_id_dupe = schema.StatePersonExternalId( person_external_id_id=self._ID_2, state_code=self.state_code, external_id=self.EXTERNAL_ID_2, id_type=self.ID_TYPE_1, ) db_person_dupe.sentence_groups = [db_sentence_group_dupe] db_person_dupe.external_ids = [db_external_id_dupe] # Act session = SessionFactory.for_schema_base(StateBase) session.add(db_fine_dupe) session.flush() session.commit()
def test_state_threeSentenceGroups_dontPersistAboveThreshold(self): # Arrange ingest_info = IngestInfo() ingest_info.state_people.add( state_person_id='1_GENERATE', state_sentence_group_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2]) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_external_id = schema.StatePersonExternalId(person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE) db_person.sentence_groups = [db_sentence_group, db_sentence_group_2] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1) db_sentence_group_2_dup = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE) db_person_2.sentence_groups = [db_sentence_group_2_dup] db_person_2.external_ids = [db_external_id_2] # No updates expected_person = self.to_entity(db_person) expected_person_2 = self.to_entity(db_person_2) session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual([expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons))
def test_state_threeSentenceGroups_persistsTwoBelowThreshold(self): # Arrange ingest_info = IngestInfo() ingest_info.state_people.add(state_person_id='1_GENERATE', state_sentence_group_ids=[ SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2, SENTENCE_GROUP_ID_3 ]) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_3, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_sentence_group_3 = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE) db_external_id = schema.StatePersonExternalId(person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE) db_person.sentence_groups = [ db_sentence_group, db_sentence_group_2, db_sentence_group_3 ] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1) db_sentence_group_3_dup = schema.StateSentenceGroup( sentence_group_id=ID_4, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE) db_person_2.sentence_groups = [db_sentence_group_3_dup] db_person_2.external_ids = [db_external_id_2] expected_person = StatePerson.new_with_defaults(person_id=ID, full_name=FULL_NAME_1, external_ids=[], sentence_groups=[]) expected_external_id = StatePersonExternalId.new_with_defaults( person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE, person=expected_person) expected_sentence_group = StateSentenceGroup.new_with_defaults( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE, county_code=COUNTY_CODE, person=expected_person) expected_sentence_group_2 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE, county_code=COUNTY_CODE, person=expected_person) # No county code because errors during match expected_sentence_group_3 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE, person=expected_person) expected_person.external_ids = [expected_external_id] expected_person.sentence_groups = [ expected_sentence_group, expected_sentence_group_2, expected_sentence_group_3 ] expected_person_2 = StatePerson.new_with_defaults( person_id=ID_2, full_name=FULL_NAME_1) expected_external_id_2 = StatePersonExternalId.new_with_defaults( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE, person=expected_person_2) # No county code because unmatched expected_sentence_group_3_dup = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_4, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE, person=expected_person_2) expected_person_2.sentence_groups = [expected_sentence_group_3_dup] expected_person_2.external_ids = [expected_external_id_2] session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual([expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons))
def generate_test_person(person_id, sentence_groups, incarceration_period, agent, supervision_period) -> state_schema.StatePerson: """Returns a StatePerson to be used for testing.""" instance = state_schema.StatePerson( person_id=person_id, full_name='name', birthdate=datetime.date(1980, 1, 5), birthdate_inferred_from_age=False, external_ids=[ state_schema.StatePersonExternalId( person_external_id_id=234, external_id='person_external_id', id_type='STATE', state_code='us_ny', person_id=person_id, ) ], aliases=[ state_schema.StatePersonAlias( person_alias_id=1456, state_code='us_ca', full_name='name', person_id=person_id, ) ], races=[ state_schema.StatePersonRace( person_race_id=345, state_code='us_ca', race=Race.BLACK.value, race_raw_text='BLK', person_id=person_id, ) ], ethnicities=[ state_schema.StatePersonEthnicity( person_ethnicity_id=345, state_code='us_ca', ethnicity=Ethnicity.NOT_HISPANIC.value, ethnicity_raw_text='HISP', person_id=person_id, ) ], sentence_groups=sentence_groups, assessments=[ state_schema.StateAssessment( assessment_id=456, person_id=person_id, state_code='us_ca', incarceration_period=incarceration_period, conducting_agent=agent, ), state_schema.StateAssessment( assessment_id=4567, person_id=person_id, state_code='us_ca', supervision_period=supervision_period, conducting_agent=agent, ) ], program_assignments=[ state_schema.StateProgramAssignment( program_assignment_id=567, participation_status= StateProgramAssignmentParticipationStatus. PRESENT_WITHOUT_INFO.value, state_code='us_ca', referring_agent=agent, ) ] ) return instance
def test_state_threeSentenceGroups_dontPersistAboveThreshold( self, mock_get_matcher): # Arrange mock_get_matcher.return_value = _PatchedStateEntityMatcher( region_code=STATE_CODE, erroring_class=schema.StateSentenceGroup, erroring_external_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_4], ) # Arrange ingest_info = IngestInfo() ingest_info.state_people.add( state_person_id="1_GENERATE", state_sentence_group_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2], ) ingest_info.state_people.add( state_person_id="2_GENERATE", state_sentence_group_ids=[ SENTENCE_GROUP_ID_3, SENTENCE_GROUP_ID_4 ], ) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_3, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_4, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1, state_code=STATE_CODE) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=STATE_CODE, ) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=STATE_CODE, ) db_external_id = schema.StatePersonExternalId( person_external_id_id=ID, state_code=STATE_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE, ) db_person.sentence_groups = [db_sentence_group, db_sentence_group_2] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1, state_code=STATE_CODE) db_sentence_group_3 = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=STATE_CODE, ) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=STATE_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE, ) db_person_2.external_ids = [db_external_id_2] db_person_2.sentence_groups = [db_sentence_group_3] # No updates expected_person = self.to_entity(db_person) expected_person_2 = self.to_entity(db_person_2) session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual( [expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons), )
def test_state_threeSentenceGroups_persistsTwoBelowThreshold( self, mock_get_matcher): """Ensure that the number of errors is below the ND specific threshold""" mock_get_matcher.return_value = _PatchedStateEntityMatcher( region_code=STATE_CODE, erroring_class=schema.StateSentenceGroup, erroring_external_ids=[SENTENCE_GROUP_ID], ) # Set the ENTITY_MATCHING_THRESHOLD to 0, such that we can verify that the forty percent threshold for # ENTITY_MATCHING_THRESHOLD is dictated by the state-specific override in # STATE_CODE_TO_ENTITY_MATCHING_THRESHOLD_FORTY_PERCENT. STATE_ERROR_THRESHOLDS_WITH_FORTY_PERCENT_RATIOS[ ENTITY_MATCHING_THRESHOLD] = 0 # Arrange ingest_info = IngestInfo() ingest_info.state_people.add( state_person_id="1_GENERATE", state_sentence_group_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2], ) ingest_info.state_people.add( state_person_id="2_GENERATE", state_sentence_group_ids=[SENTENCE_GROUP_ID_3]) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_3, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1, state_code=STATE_CODE) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=STATE_CODE, ) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=STATE_CODE, ) db_external_id = schema.StatePersonExternalId( person_external_id_id=ID, state_code=STATE_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE, ) db_person.sentence_groups = [db_sentence_group, db_sentence_group_2] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1, state_code=STATE_CODE) db_sentence_group_3 = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=STATE_CODE, ) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=STATE_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE, ) db_person_2.external_ids = [db_external_id_2] db_person_2.sentence_groups = [db_sentence_group_3] session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() expected_person = StatePerson.new_with_defaults( person_id=ID, full_name=FULL_NAME_1, external_ids=[], sentence_groups=[], state_code=STATE_CODE, ) expected_external_id = StatePersonExternalId.new_with_defaults( person_external_id_id=ID, state_code=STATE_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE, person=expected_person, ) # No county code because errors during match expected_sentence_group = StateSentenceGroup.new_with_defaults( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID, state_code=STATE_CODE, person=expected_person, ) expected_sentence_group_2 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_2, state_code=STATE_CODE, county_code=COUNTY_CODE, person=expected_person, ) expected_person.external_ids = [expected_external_id] expected_person.sentence_groups = [ expected_sentence_group, expected_sentence_group_2, ] expected_person_2 = StatePerson.new_with_defaults( person_id=ID_2, full_name=FULL_NAME_1, state_code=STATE_CODE) expected_external_id_2 = StatePersonExternalId.new_with_defaults( person_external_id_id=ID_2, state_code=STATE_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE, person=expected_person_2, ) expected_sentence_group_3 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_3, state_code=STATE_CODE, county_code=COUNTY_CODE, person=expected_person_2, ) expected_person_2.sentence_groups = [expected_sentence_group_3] expected_person_2.external_ids = [expected_external_id_2] # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual( [expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons), )
def test_readMultipleIngestedPeople(self) -> None: # Arrange person1 = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person1_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person1, ) person1.external_ids = [person1_external_id] person2 = schema.StatePerson(person_id=2, state_code=_STATE_CODE) person2_external_id = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person2, ) person2.external_ids = [person2_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person1) session.add(person2) session.commit() ingested_person1 = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE, external_ids=[ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ) ], ) ingested_person2 = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE, external_ids=[ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ) ], ) ingested_person3 = entities.StatePerson.new_with_defaults( state_code=_STATE_CODE, external_ids=[ entities.StatePersonExternalId.new_with_defaults( external_id="NONEXISTENT_ID", id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, ) ], ) # Act people = dao.read_people_by_external_ids( session, _REGION, [ingested_person1, ingested_person2, ingested_person3]) # Assert expected_people = [person1, person2] self.assertCountEqual(people, expected_people)
def test_add_fine_conflicting_external_id_different_state(self) -> None: # Arrange with SessionFactory.using_database( self.database_key) as arrange_session: db_person = schema.StatePerson( person_id=self._ID_1, full_name=self.FULL_NAME, state_code=self.state_code, ) db_fine = schema.StateFine( person=db_person, status=StateFineStatus.EXTERNAL_UNKNOWN.value, fine_id=self._ID_1, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, county_code=self.COUNTY_CODE, ) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=self._ID_1, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_1, state_code=self.state_code, county_code=self.COUNTY_CODE, fines=[db_fine], ) db_external_id = schema.StatePersonExternalId( person_external_id_id=self._ID_1, state_code=self.state_code, external_id=self.EXTERNAL_ID_1, id_type=self.ID_TYPE_1, ) db_person.sentence_groups = [db_sentence_group] db_person.external_ids = [db_external_id] arrange_session.add(db_fine) db_person_dupe = schema.StatePerson( person_id=self._ID_2, full_name=self.FULL_NAME, state_code=self.OTHER_STATE_CODE, ) db_fine_dupe = schema.StateFine( person=db_person_dupe, status=StateFineStatus.EXTERNAL_UNKNOWN.value, fine_id=self._ID_2, state_code=self.OTHER_STATE_CODE, external_id=self.EXTERNAL_ID_1, county_code=self.COUNTY_CODE, ) db_sentence_group_dupe = schema.StateSentenceGroup( sentence_group_id=self._ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=self.EXTERNAL_ID_2, state_code=self.OTHER_STATE_CODE, county_code=self.COUNTY_CODE, fines=[db_fine_dupe], ) db_external_id_dupe = schema.StatePersonExternalId( person_external_id_id=self._ID_2, state_code=self.OTHER_STATE_CODE, external_id=self.EXTERNAL_ID_2, id_type=self.ID_TYPE_1, ) db_person_dupe.sentence_groups = [db_sentence_group_dupe] db_person_dupe.external_ids = [db_external_id_dupe] # Act with SessionFactory.using_database(self.database_key) as session: session.add(db_fine_dupe) session.flush()