def test_readPeopleByRootExternalIds_entireTreeReturnedWithOneMatch( self) -> None: # Arrange person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) external_id_match = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) external_id_no_match = schema.StatePersonExternalId( person_external_id_id=2, external_id=_EXTERNAL_ID2, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person, ) person.external_ids = [external_id_match, external_id_no_match] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person] self.assertCountEqual(people, expected_people)
def test_readPeopleByRootExternalIds_SentenceGroupExternalId(self) -> None: # Arrange person = schema.StatePerson(person_id=1, state_code=_STATE_CODE) sentence_group = schema.StateSentenceGroup( sentence_group_id=1, external_id=_EXTERNAL_ID, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, person=person, ) sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=2, external_id=_EXTERNAL_ID2, status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value, state_code=_STATE_CODE, person=person, ) person.sentence_groups = [sentence_group, sentence_group_2] session = SessionFactory.for_schema_base(StateBase) session.add(person) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, schema.StateSentenceGroup, [_EXTERNAL_ID]) # Assert expected_people = [person] self.assertCountEqual(people, expected_people)
def test_readPeopleByRootExternalIds(self): # Arrange person_no_match = schema.StatePerson(person_id=1) person_match_external_id = schema.StatePerson(person_id=2) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person_match_external_id, ) person_match_external_id.external_ids = [person_external_id] session = SessionFactory.for_schema_base(StateBase) session.add(person_no_match) session.add(person_match_external_id) session.commit() # Act people = dao.read_people_by_cls_external_ids(session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person_match_external_id] self.assertCountEqual(people, expected_people)
def read_db_entity_trees_of_cls_to_merge( session: Session, state_code: str, schema_cls: Type[StateBase] ) -> List[List[EntityTree]]: """ Returns a list of lists of EntityTree where each inner list is a group of EntityTrees with entities of class |schema_cls| that need to be merged because their entities have the same external_id. Will assert if schema_cls does not have a person_id or external_id field. """ external_ids = dao.read_external_ids_of_cls_with_external_id_match( session, state_code, schema_cls) people = dao.read_people_by_cls_external_ids( session, state_code, schema_cls, external_ids) all_cls_trees = get_all_entity_trees_of_cls(people, schema_cls) external_ids_map: Dict[str, List[EntityTree]] = defaultdict(list) for tree in all_cls_trees: if not isinstance(tree.entity, schema_cls): raise ValueError(f'Unexpected entity type [{type(tree.entity)}]') if tree.entity.external_id in external_ids: external_ids_map[tree.entity.external_id].append(tree) return [tree_list for _, tree_list in external_ids_map.items()]
def test_readPeopleByRootExternalIds(self) -> None: # Arrange person_no_match = schema.StatePerson(person_id=1, state_code=_STATE_CODE) person_match_external_id = schema.StatePerson(person_id=2, state_code=_STATE_CODE) person_external_id = schema.StatePersonExternalId( person_external_id_id=1, external_id=_EXTERNAL_ID, id_type=external_id_types.US_ND_SID, state_code=_STATE_CODE, person=person_match_external_id, ) person_match_external_id.external_ids = [person_external_id] with SessionFactory.using_database(self.database_key, autocommit=False) as session: session.add(person_no_match) session.add(person_match_external_id) session.commit() # Act people = dao.read_people_by_cls_external_ids( session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID]) # Assert expected_people = [person_match_external_id] self.assertCountEqual(people, expected_people)
def _nd_read_people(session: Session, region: str, ingested_people: List[StatePerson]) -> List[StatePerson]: """ND specific code that looks up all people necessary for entity matching based on the provided |region| and |ingested_people|. """ root_entity_cls = get_root_entity_cls(ingested_people) if root_entity_cls not in (StatePerson, StateSentenceGroup): raise EntityMatchingError( f'For region [{region}] found unexpected root_entity_cls: ' f'[{root_entity_cls.__name__}]', 'root_entity_cls') root_external_ids = get_external_ids_of_cls(ingested_people, root_entity_cls) return dao.read_people_by_cls_external_ids(session, region, root_entity_cls, root_external_ids, populate_back_edges=False)
def read_persons_by_root_entity_cls( session: Session, region: str, ingested_people: List[schema.StatePerson], allowed_root_entity_classes: Optional[List[Type[DatabaseEntity]]], ) -> List[schema.StatePerson]: """Looks up all people necessary for entity matching based on the provided |region| and |ingested_people|. If |allowed_root_entity_classes| is provided, throw an error if any unexpected root entity class is found. """ root_entity_cls = get_root_entity_cls(ingested_people) if (allowed_root_entity_classes and root_entity_cls not in allowed_root_entity_classes): raise ValueError( f"For region [{region}] found unexpected root_entity_cls: [{root_entity_cls.__name__}]. " f"Allowed classes: [{allowed_root_entity_classes}]") root_external_ids = get_external_ids_of_cls(ingested_people, root_entity_cls) logging.info( "[Entity Matching] Reading [%s] external ids of class [%s]", len(root_external_ids), root_entity_cls.__name__, ) persons_by_root_entity = dao.read_people_by_cls_external_ids( session, region, root_entity_cls, root_external_ids) placeholder_persons = dao.read_placeholder_persons(session, region) # When the |root_entity_cls| is not StatePerson, it is possible for both # persons_by_root_entity and placeholder_persons to contain the same # placeholder person(s). For this reason, we dedup people across both lists # before returning. deduped_people = [] seen_person_ids: Set[int] = set() for person in persons_by_root_entity + placeholder_persons: if person.person_id not in seen_person_ids: deduped_people.append(person) seen_person_ids.add(person.person_id) return deduped_people