def get_non_external_id_match( self, ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree]) -> Optional[EntityTree]: """ND specific logic to match the |ingested_entity_tree| to one of the |db_entity_trees| that does not rely solely on matching by external_id. If such a match is found, it is returned. """ if isinstance(ingested_entity_tree.entity, schema.StateIncarcerationPeriod): return entity_matching_utils.get_only_match( ingested_entity_tree, db_entity_trees, is_incarceration_period_match) if isinstance( ingested_entity_tree.entity, ( schema.StateAgent, schema.StateIncarcerationSentence, schema.StateAssessment, schema.StateSupervisionPeriod, schema.StateSupervisionViolation, schema.StateSupervisionViolationResponse, ), ): return entity_matching_utils.get_only_match( ingested_entity_tree, db_entity_trees, nonnull_fields_entity_match) return None
def match_holds( *, db_booking: entities.Booking, ingested_booking: entities.Booking): """ Attempts to match all holds on the |ingested_booking| with holds on the |db_booking|. For any ingested hold, if a matching hold exists on |db_booking|, the primary key is updated on the ingested hold. All db holds that are not matched to an ingested hold are marked dropped and added to the |ingested_booking|. """ matched_holds_by_db_id: Dict[int, entities.Hold] = {} for ingested_hold in ingested_booking.holds: db_hold = cast(entities.Hold, get_only_match( ingested_hold, db_booking.holds, is_hold_match)) if db_hold: logging.debug( 'Successfully matched to hold with ID %s', db_hold.hold_id) # If the match was previously matched to a different database # charge, raise an error. if db_hold.hold_id in matched_holds_by_db_id: matches = [ingested_hold, matched_holds_by_db_id[db_hold.hold_id]] raise MatchedMultipleIngestedEntitiesError(db_hold, matches) ingested_hold.hold_id = db_hold.hold_id matched_holds_by_db_id[cast(int, db_hold.hold_id)] = ingested_hold dropped_holds = [] for db_hold in db_booking.holds: if db_hold.hold_id not in matched_holds_by_db_id: _drop_hold(db_hold) dropped_holds.append(db_hold) ingested_booking.holds.extend(dropped_holds)
def nd_get_incomplete_incarceration_period_match( ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree]) \ -> Optional[EntityTree]: """For the ingested StateIncarcerationPeriod in the provided |ingested_entity_tree|, attempts to find a matching incomplete StateIncarcerationPeriod in the provided |db_entity_trees|. Returns the match if one is found, otherwise returns None. """ # If the period is complete, it cannot match to an incomplete period. ingested_period = cast(StateIncarcerationPeriod, ingested_entity_tree.entity) if is_incarceration_period_complete(ingested_period): return None incomplete_db_trees = [] for db_tree in db_entity_trees: db_period = cast(StateIncarcerationPeriod, db_tree.entity) if not is_incarceration_period_complete(db_period): incomplete_db_trees.append(db_tree) return entity_matching_utils.get_only_match( ingested_entity_tree, incomplete_db_trees, is_incomplete_incarceration_period_match)
def match_bookings(*, db_person: entities.Person, ingested_person: entities.Person, orphaned_entities: List[Entity]): """ Attempts to match all bookings on the |ingested_person| with bookings on the |db_person|. For any ingested booking, if a matching booking exists on |db_person|, the primary key is updated on the ingested booking and we attempt to match all children entities. """ matched_bookings_by_db_id: Dict[int, entities.Booking] = {} for ingested_booking in ingested_person.bookings: db_booking: entities.Booking = \ cast(entities.Booking, get_only_match(ingested_booking, db_person.bookings, is_booking_match)) if db_booking: logging.debug('Successfully matched to booking with ID %s', db_booking.booking_id) # If the match was previously matched to a different database # booking, raise an error. if db_booking.booking_id in matched_bookings_by_db_id: matches = [ ingested_booking, matched_bookings_by_db_id[db_booking.booking_id] ] raise MatchedMultipleIngestedEntitiesError(db_booking, matches) matched_bookings_by_db_id[cast( int, db_booking.booking_id)] = ingested_booking ingested_booking.booking_id = db_booking.booking_id # Since db_booking exists, it must already have a first_seen_time, # which means that value should be used rather than any value # provided on the ingested_booking. ingested_booking.first_seen_time = db_booking.first_seen_time if (db_booking.admission_date_inferred and ingested_booking.admission_date_inferred): ingested_booking.admission_date = db_booking.admission_date ingested_booking.admission_date_inferred = True match_arrest(db_booking=db_booking, ingested_booking=ingested_booking) match_holds(db_booking=db_booking, ingested_booking=ingested_booking) match_charges(db_booking=db_booking, ingested_booking=ingested_booking) match_bonds(db_booking=db_booking, ingested_booking=ingested_booking, orphaned_entities=orphaned_entities) match_sentences(db_booking=db_booking, ingested_booking=ingested_booking, orphaned_entities=orphaned_entities) for db_booking in db_person.bookings: if db_booking.booking_id not in matched_bookings_by_db_id: ingested_person.bookings.append(db_booking)
def _get_match(ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree]) -> Optional[EntityTree]: """With the provided |ingested_entity_tree|, this attempts to find a match among the provided |db_entity_trees|. If a match is found, it is returned. """ exact_match = entity_matching_utils.get_only_match(ingested_entity_tree, db_entity_trees, is_match) if not exact_match: if isinstance(ingested_entity_tree.entity, StateIncarcerationPeriod): return nd_get_incomplete_incarceration_period_match( ingested_entity_tree, db_entity_trees) if isinstance(ingested_entity_tree.entity, (StateAgent, StateIncarcerationSentence, StateAssessment, StateSupervisionPeriod, StateSupervisionViolation, StateSupervisionViolationResponse)): return entity_matching_utils.get_only_match( ingested_entity_tree, db_entity_trees, base_entity_match) return exact_match
def get_non_external_id_match( self, ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree]) -> Optional[EntityTree]: """PA specific logic to match the |ingested_entity_tree| to one of the |db_entity_trees| that does not rely solely on matching by external_id. If such a match is found, it is returned. """ if isinstance(ingested_entity_tree.entity, (schema.StateAssessment, schema.StateCharge)): return entity_matching_utils.get_only_match(ingested_entity_tree, db_entity_trees, nonnull_fields_entity_match) return None
def get_non_external_id_match( self, ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree]) -> Optional[EntityTree]: """MO specific logic to match the |ingested_entity_tree| to one of the |db_entity_trees| that does not rely solely on matching by external_id. If such a match is found, it is returned. """ if isinstance(ingested_entity_tree.entity, schema.StateSupervisionViolationResponse): return entity_matching_utils.get_only_match( ingested_entity_tree, db_entity_trees, self._nonnull_fields_ssvr_entity_match) return None
def test_get_only_match_duplicates(self): def match(db_entity, ingested_entity): return db_entity.birthdate == ingested_entity.birthdate person = county_entities.Person.new_with_defaults(person_id=1, birthdate=_DATE) person_2 = county_entities.Person.new_with_defaults( person_id=2, birthdate=_DATE_OTHER) ing_person = county_entities.Person.new_with_defaults(birthdate=_DATE) self.assertEqual( get_only_match(ing_person, [person, person_2, person], match), person)