def test_readPeople(self): # Arrange person = schema.StatePerson(person_id=8, full_name=_FULL_NAME, birthdate=_BIRTHDATE) person_different_name = schema.StatePerson(person_id=9, full_name='diff_name') person_different_birthdate = schema.StatePerson( person_id=10, birthdate=datetime.date(year=2002, month=1, day=2)) session = SessionFactory.for_schema_base(StateBase) session.add(person) session.add(person_different_name) session.add(person_different_birthdate) session.commit() # Act people = dao.read_people(session, full_name=None, birthdate=None) # Assert expected_people = [ converter.convert_schema_object_to_entity(person), converter.convert_schema_object_to_entity(person_different_name), converter.convert_schema_object_to_entity( person_different_birthdate) ] self.assertCountEqual(people, expected_people)
def test_matchPerson_updateStatusOnOrphanedEntities(self): # Arrange schema_bond = schema.Bond( bond_id=_BOND_ID, status=BondStatus.PENDING.value, booking_id=_BOOKING_ID) schema_charge = schema.Charge( charge_id=_CHARGE_ID, status=ChargeStatus.PENDING.value, bond=schema_bond) schema_booking = schema.Booking( admission_date=_DATE_2, booking_id=_BOOKING_ID, custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=_DATE, first_seen_time=_DATE, charges=[schema_charge]) schema_person = schema.Person( person_id=_PERSON_ID, full_name=_FULL_NAME, birthdate=_DATE, jurisdiction_id=_JURISDICTION_ID, region=_REGION, bookings=[schema_booking]) session = SessionFactory.for_schema_base(JailsBase) session.add(schema_person) session.commit() ingested_charge_no_bond = attr.evolve( converter.convert_schema_object_to_entity(schema_charge), charge_id=None, bond=None) ingested_booking = attr.evolve( converter.convert_schema_object_to_entity(schema_booking), booking_id=None, custody_status=CustodyStatus.RELEASED, charges=[ingested_charge_no_bond]) ingested_person = attr.evolve( converter.convert_schema_object_to_entity(schema_person), person_id=None, bookings=[ingested_booking]) # Act out = entity_matching.match(session, _REGION, [ingested_person]) # Assert expected_orphaned_bond = attr.evolve( converter.convert_schema_object_to_entity(schema_bond), status=BondStatus.REMOVED_WITHOUT_INFO) expected_charge = attr.evolve( ingested_charge_no_bond, charge_id=schema_charge.charge_id) expected_booking = attr.evolve( ingested_booking, booking_id=schema_booking.booking_id, charges=[expected_charge]) expected_person = attr.evolve( ingested_person, person_id=schema_person.person_id, bookings=[expected_booking]) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.people), [expected_person]) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.orphaned_entities), [expected_orphaned_bond]) self.assertEqual(out.error_count, 0)
def test_matchPeople_errorCount(self): # Arrange schema_booking = schema.Booking( external_id=_EXTERNAL_ID, admission_date=_DATE_2, booking_id=_BOOKING_ID, custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=_DATE, first_seen_time=_DATE) schema_booking_another = copy.deepcopy(schema_booking) schema_booking_another.booking_id = _BOOKING_ID_ANOTHER schema_person = schema.Person( person_id=_PERSON_ID, external_id=_EXTERNAL_ID, jurisdiction_id=_JURISDICTION_ID, full_name=_FULL_NAME, birthdate=_DATE, region=_REGION, bookings=[schema_booking, schema_booking_another]) schema_person_another = schema.Person(person_id=_PERSON_ID_ANOTHER, jurisdiction_id=_JURISDICTION_ID, region=_REGION, full_name=_NAME_2, external_id=_EXTERNAL_ID_ANOTHER) session = SessionFactory.for_schema_base(JailsBase) session.add(schema_person) session.add(schema_person_another) session.commit() ingested_booking = attr.evolve( converter.convert_schema_object_to_entity(schema_booking), booking_id=None, custody_status=CustodyStatus.RELEASED) ingested_person = attr.evolve( converter.convert_schema_object_to_entity(schema_person), person_id=None, bookings=[ingested_booking]) ingested_person_another = attr.evolve( converter.convert_schema_object_to_entity(schema_person_another), person_id=None ) # Act out = entity_matching.match( session, _REGION, [ingested_person, ingested_person_another]) # Assert expected_person = attr.evolve(ingested_person_another, person_id=schema_person_another.person_id) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.people), [expected_person]) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.orphaned_entities), []) self.assertEqual(out.error_count, 1)
def test_readPersonIdsMatchMultiplePeople(self): # Arrange person1 = schema.StatePerson(person_id=1) person1_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person1, ) person1.external_ids = [person1_external_id] person2 = schema.StatePerson(person_id=2) person2_external_id = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person2, ) person2.external_ids = [person2_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person1) session.add(person2) session.commit() ingested_person = entities.StatePerson.new_with_defaults() ingested_person.external_ids = \ [ entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE), entities.StatePersonExternalId.new_with_defaults( external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE) ] # Act people = dao.read_people_by_external_ids(session, _REGION, [ingested_person]) # Assert expected_people = [ converter.convert_schema_object_to_entity(person1), converter.convert_schema_object_to_entity(person2) ] self.assertCountEqual(people, expected_people)
def _convert_and_normalize_record_trees( people: List[schema.StatePerson], populate_back_edges: bool = True) -> List[entities.StatePerson]: """Converts schema record trees to persistence layer models and removes any duplicate people created by how SQLAlchemy handles joins """ converted_people: List[entities.StatePerson] = [] count_by_id: Dict[int, int] = defaultdict(lambda: 0) for person in people: if count_by_id[person.person_id] == 0: converted = converter.convert_schema_object_to_entity( person, populate_back_edges) if not isinstance(converted, entities.StatePerson): raise ValueError( f"Unexpected return type [{converted.__class__}]") converted_people.append(converted) count_by_id[person.person_id] += 1 duplicates = [(person_id, count) for person_id, count in count_by_id.items() if count > 1] if duplicates: id_counts = '\n'.join([ 'ID {} with count {}'.format(duplicate[0], duplicate[1]) for duplicate in duplicates ]) logging.error("Duplicate records returned for person IDs:\n%s", id_counts) now = datetime.datetime.now() logging.info("Finished _convert_and_normalize_record_trees at time [%s]", now.isoformat()) return converted_people
def test_readPeopleByRootExternalIds_SentenceGroupExternalId(self): # Arrange person = schema.StatePerson(person_id=1) sentence_group = schema.StateSentenceGroup( sentence_group_id=1, external_id=_EXTERNAL_ID, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, person=person) sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=2, external_id=_EXTERNAL_ID2, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, person=person) person.sentence_groups = [sentence_group, sentence_group_2] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() # Act people = dao.read_people_by_cls_external_ids( session, _STATE_CODE, entities.StateSentenceGroup, [_EXTERNAL_ID]) # Assert expected_people = [converter.convert_schema_object_to_entity(person)] self.assertCountEqual(people, expected_people)
def test_readPeopleByRootExternalIds_entireTreeReturnedWithOneMatch(self): # Arrange person = schema.StatePerson(person_id=1) external_id_match = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) external_id_no_match = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [external_id_match, external_id_no_match] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, entities.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [converter.convert_schema_object_to_entity(person)] self.assertCountEqual(people, expected_people)
def test_readPeopleByExternalId(self): admission_date = datetime.datetime(2018, 6, 20) release_date = datetime.date(2018, 7, 20) closed_booking = Booking( custody_status=CustodyStatus.IN_CUSTODY.value, admission_date=admission_date, release_date=release_date, first_seen_time=admission_date, last_seen_time=admission_date) person_no_match = Person(person_id=1, region=_REGION, jurisdiction_id=_JURISDICTION_ID, bookings=[deepcopy(closed_booking)]) person_match_external_id = Person(person_id=2, region=_REGION, jurisdiction_id=_JURISDICTION_ID, bookings=[closed_booking], external_id=_EXTERNAL_ID) session = SessionFactory.for_schema_base(JailsBase) session.add(person_no_match) session.add(person_match_external_id) session.commit() ingested_person = entities.Person.new_with_defaults( external_id=_EXTERNAL_ID) people = dao.read_people_by_external_ids(session, _REGION, [ingested_person]) expected_people = [ converter.convert_schema_object_to_entity(person_match_external_id)] self.assertCountEqual(people, expected_people)
def _convert_and_normalize_record_trees(people: List[Person]) -> List[entities.Person]: """Converts schema record trees to persistence layer models and removes any duplicate people created by how SQLAlchemy handles joins """ converted_people: List[entities.Person] = [] count_by_id: Dict[int, int] = defaultdict(lambda: 0) for person in people: if count_by_id[person.person_id] == 0: converted = converter.convert_schema_object_to_entity(person) if not isinstance(converted, entities.Person): raise ValueError(f"Unexpected return type [{converted.__class__}]") converted_people.append(converted) count_by_id[person.person_id] += 1 duplicates = [ (person_id, count) for person_id, count in count_by_id.items() if count > 1 ] if duplicates: id_counts = "\n".join( [ "ID {} with count {}".format(duplicate[0], duplicate[1]) for duplicate in duplicates ] ) logging.error("Duplicate records returned for person IDs:\n%s", id_counts) return converted_people
def test_readPeopleWithOpenBookingsBeforeDate(self): # Arrange person = Person(person_id=8, region=_REGION, jurisdiction_id=_JURISDICTION_ID) person_resolved_booking = Person(person_id=9, region=_REGION, jurisdiction_id=_JURISDICTION_ID) person_most_recent_scrape = Person(person_id=10, region=_REGION, jurisdiction_id=_JURISDICTION_ID) person_wrong_region = Person(person_id=11, region=_REGION_ANOTHER, jurisdiction_id=_JURISDICTION_ID) release_date = datetime.date(2018, 7, 20) most_recent_scrape_date = datetime.datetime(2018, 6, 20) date_in_past = most_recent_scrape_date - datetime.timedelta(days=1) first_seen_time = most_recent_scrape_date - datetime.timedelta(days=3) # Bookings that should be returned open_booking_before_last_scrape = Booking( person_id=person.person_id, custody_status=CustodyStatus.IN_CUSTODY.value, first_seen_time=first_seen_time, last_seen_time=date_in_past) # Bookings that should not be returned open_booking_incorrect_region = Booking( person_id=person_wrong_region.person_id, custody_status=CustodyStatus.IN_CUSTODY.value, first_seen_time=first_seen_time, last_seen_time=date_in_past) open_booking_most_recent_scrape = Booking( person_id=person_most_recent_scrape.person_id, custody_status=CustodyStatus.IN_CUSTODY.value, first_seen_time=first_seen_time, last_seen_time=most_recent_scrape_date) resolved_booking = Booking( person_id=person_resolved_booking.person_id, custody_status=CustodyStatus.RELEASED.value, release_date=release_date, first_seen_time=first_seen_time, last_seen_time=date_in_past) session = SessionFactory.for_schema_base(JailsBase) session.add(person) session.add(person_resolved_booking) session.add(person_most_recent_scrape) session.add(person_wrong_region) session.add(open_booking_before_last_scrape) session.add(open_booking_incorrect_region) session.add(open_booking_most_recent_scrape) session.add(resolved_booking) session.commit() # Act people = dao.read_people_with_open_bookings_scraped_before_time( session, person.region, most_recent_scrape_date) # Assert self.assertEqual(people, [converter.convert_schema_object_to_entity(person)])
def _ingest_file_schema_metadata_as_entity( schema_metadata: schema.DirectIngestIngestFileMetadata) -> DirectIngestIngestFileMetadata: entity_metadata = convert_schema_object_to_entity(schema_metadata) if not isinstance(entity_metadata, DirectIngestIngestFileMetadata): raise ValueError(f'Unexpected metadata entity type: {type(entity_metadata)}') return entity_metadata
def test_readPeopleWithOpenBookings(self): admission_date = datetime.datetime(2018, 6, 20) release_date = datetime.date(2018, 7, 20) open_booking = Booking( custody_status=CustodyStatus.IN_CUSTODY.value, admission_date=admission_date, first_seen_time=admission_date, last_seen_time=admission_date, ) closed_booking = Booking( custody_status=CustodyStatus.RELEASED.value, admission_date=admission_date, release_date=release_date, first_seen_time=admission_date, last_seen_time=admission_date, ) person_no_match = Person( person_id=1, region=_REGION, jurisdiction_id=_JURISDICTION_ID, bookings=[deepcopy(open_booking)], ) person_match_full_name = Person( person_id=2, region=_REGION, jurisdiction_id=_JURISDICTION_ID, bookings=[deepcopy(open_booking)], full_name=_FULL_NAME, ) person_no_open_bookings = Person( person_id=6, region=_REGION, jurisdiction_id=_JURISDICTION_ID, full_name=_FULL_NAME, bookings=[closed_booking], ) with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(person_no_match) session.add(person_no_open_bookings) session.add(person_match_full_name) session.commit() info = IngestInfo() info.create_person(full_name=_FULL_NAME, person_id=_EXTERNAL_ID) people = dao.read_people_with_open_bookings( session, _REGION, info.people) expected_people = [ converter.convert_schema_object_to_entity(p) for p in [person_match_full_name] ] self.assertCountEqual(people, expected_people)
def test_matchPeople_twoMatchingPeople_PicksMostSimilar(self): # Arrange schema_person = schema.Person( person_id=_PERSON_ID, external_id=_EXTERNAL_ID, jurisdiction_id=_JURISDICTION_ID, full_name=_FULL_NAME, birthdate=_DATE, region=_REGION, gender=Gender.MALE.value, ) schema_person_mismatch = copy.deepcopy(schema_person) schema_person_mismatch.person_id = _PERSON_ID_ANOTHER schema_person_mismatch.gender = Gender.FEMALE.value session = SessionFactory.for_schema_base(JailsBase) session.add(schema_person) session.add(schema_person_mismatch) session.commit() ingested_person = attr.evolve( converter.convert_schema_object_to_entity(schema_person), person_id=None ) expected_person = attr.evolve( ingested_person, person_id=schema_person.person_id ) # Act matched_entities = entity_matching.match(session, _REGION, [ingested_person]) # Assert both schema objects are matches, but we select the most # similar one. self.assertTrue( county_matching_utils.is_person_match( db_entity=schema_person, ingested_entity=ingested_person ) ) self.assertTrue( county_matching_utils.is_person_match( db_entity=schema_person_mismatch, ingested_entity=ingested_person ) ) self.assertEqual(matched_entities.error_count, 0) self.assertEqual(len(matched_entities.orphaned_entities), 0) self.assertEqual(ingested_person, expected_person)
def test_readPeople_byFullName(self): # Arrange person = schema.StatePerson(person_id=8, full_name=_FULL_NAME) person_different_name = schema.StatePerson(person_id=9, full_name='diff_name') session = SessionFactory.for_schema_base(StateBase) session.add(person) session.add(person_different_name) session.commit() # Act people = dao.read_people(session, full_name=_FULL_NAME, birthdate=None) # Assert expected_people = [converter.convert_schema_object_to_entity(person)] self.assertCountEqual(people, expected_people)
def to_entity(self, schema_obj): return converter.convert_schema_object_to_entity( schema_obj, populate_back_edges=False)
def assert_schema_objects_equal(self, expected: StateBase, actual: StateBase): self.assertEqual( converter.convert_schema_object_to_entity(expected), converter.convert_schema_object_to_entity(actual), )
def test_matchPeople(self): # Arrange schema_booking = schema.Booking( admission_date=_DATE_2, booking_id=_BOOKING_ID, custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=_DATE, first_seen_time=_DATE, ) schema_person = schema.Person( person_id=_PERSON_ID, full_name=_FULL_NAME, birthdate=_DATE, jurisdiction_id=_JURISDICTION_ID, region=_REGION, bookings=[schema_booking], ) schema_booking_external_id = schema.Booking( admission_date=_DATE_2, booking_id=_BOOKING_ID_ANOTHER, release_date=_DATE, custody_status=CustodyStatus.RELEASED.value, last_seen_time=_DATE, first_seen_time=_DATE, ) schema_person_external_id = schema.Person( person_id=_PERSON_ID_ANOTHER, external_id=_EXTERNAL_ID, full_name=_FULL_NAME, birthdate=_DATE, jurisdiction_id=_JURISDICTION_ID, region=_REGION, bookings=[schema_booking_external_id], ) with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(schema_person) session.add(schema_person_external_id) session.commit() ingested_booking = attr.evolve( converter.convert_schema_object_to_entity(schema_booking), booking_id=None, custody_status=CustodyStatus.RELEASED, ) ingested_person = attr.evolve( converter.convert_schema_object_to_entity(schema_person), person_id=None, bookings=[ingested_booking], ) ingested_booking_external_id = attr.evolve( converter.convert_schema_object_to_entity( schema_booking_external_id), booking_id=None, facility=_FACILITY, ) ingested_person_external_id = attr.evolve( converter.convert_schema_object_to_entity( schema_person_external_id), person_id=None, bookings=[ingested_booking_external_id], ) # Act out = entity_matching.match( session, _REGION, [ingested_person_external_id, ingested_person]) # Assert expected_booking = attr.evolve(ingested_booking, booking_id=_BOOKING_ID) expected_person = attr.evolve(ingested_person, person_id=_PERSON_ID, bookings=[expected_booking]) expected_booking_external_id = attr.evolve( ingested_booking_external_id, booking_id=_BOOKING_ID_ANOTHER) expected_person_external_id = attr.evolve( ingested_person_external_id, person_id=_PERSON_ID_ANOTHER, bookings=[expected_booking_external_id], ) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.people), [expected_person_external_id, expected_person], ) self.assertCountEqual( converter.convert_schema_objects_to_entity(out.orphaned_entities), []) self.assertEqual(out.error_count, 0)
def to_entity(self, schema_obj: DatabaseEntity) -> Entity: return converter.convert_schema_object_to_entity( schema_obj, populate_back_edges=False)