예제 #1
0
    def test_readPeopleByRootExternalIds_entireTreeReturnedWithOneMatch(
            self) -> None:
        # Arrange
        person = schema.StatePerson(person_id=1, state_code=_STATE_CODE)
        external_id_match = schema.StatePersonExternalId(
            person_external_id_id=1,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person,
        )
        external_id_no_match = schema.StatePersonExternalId(
            person_external_id_id=2,
            external_id=_EXTERNAL_ID2,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person,
        )
        person.external_ids = [external_id_match, external_id_no_match]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.commit()

        # Act
        people = dao.read_people_by_cls_external_ids(session, _STATE_CODE,
                                                     schema.StatePerson,
                                                     [_EXTERNAL_ID])

        # Assert
        expected_people = [person]

        self.assertCountEqual(people, expected_people)
예제 #2
0
    def test_readPeopleByRootExternalIds_SentenceGroupExternalId(self) -> None:
        # Arrange
        person = schema.StatePerson(person_id=1, state_code=_STATE_CODE)
        sentence_group = schema.StateSentenceGroup(
            sentence_group_id=1,
            external_id=_EXTERNAL_ID,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person,
        )
        sentence_group_2 = schema.StateSentenceGroup(
            sentence_group_id=2,
            external_id=_EXTERNAL_ID2,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person,
        )
        person.sentence_groups = [sentence_group, sentence_group_2]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.commit()

        # Act
        people = dao.read_people_by_cls_external_ids(session, _STATE_CODE,
                                                     schema.StateSentenceGroup,
                                                     [_EXTERNAL_ID])

        # Assert
        expected_people = [person]

        self.assertCountEqual(people, expected_people)
예제 #3
0
    def test_readPeopleByRootExternalIds(self):
        # Arrange
        person_no_match = schema.StatePerson(person_id=1)
        person_match_external_id = schema.StatePerson(person_id=2)
        person_external_id = schema.StatePersonExternalId(
            person_external_id_id=1,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person_match_external_id,
        )
        person_match_external_id.external_ids = [person_external_id]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person_no_match)
        session.add(person_match_external_id)
        session.commit()

        # Act
        people = dao.read_people_by_cls_external_ids(session, _STATE_CODE,
                                                     schema.StatePerson,
                                                     [_EXTERNAL_ID])

        # Assert
        expected_people = [person_match_external_id]

        self.assertCountEqual(people, expected_people)
예제 #4
0
def read_db_entity_trees_of_cls_to_merge(
        session: Session,
        state_code: str,
        schema_cls: Type[StateBase]
) -> List[List[EntityTree]]:
    """
    Returns a list of lists of EntityTree where each inner list is a group
    of EntityTrees with entities of class |schema_cls| that need to be merged
    because their entities have the same external_id.

    Will assert if schema_cls does not have a person_id or external_id field.
    """
    external_ids = dao.read_external_ids_of_cls_with_external_id_match(
        session, state_code, schema_cls)
    people = dao.read_people_by_cls_external_ids(
        session, state_code, schema_cls, external_ids)
    all_cls_trees = get_all_entity_trees_of_cls(people, schema_cls)

    external_ids_map: Dict[str, List[EntityTree]] = defaultdict(list)
    for tree in all_cls_trees:
        if not isinstance(tree.entity, schema_cls):
            raise ValueError(f'Unexpected entity type [{type(tree.entity)}]')

        if tree.entity.external_id in external_ids:
            external_ids_map[tree.entity.external_id].append(tree)

    return [tree_list for _, tree_list in external_ids_map.items()]
예제 #5
0
    def test_readPeopleByRootExternalIds(self) -> None:
        # Arrange
        person_no_match = schema.StatePerson(person_id=1,
                                             state_code=_STATE_CODE)
        person_match_external_id = schema.StatePerson(person_id=2,
                                                      state_code=_STATE_CODE)
        person_external_id = schema.StatePersonExternalId(
            person_external_id_id=1,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person_match_external_id,
        )
        person_match_external_id.external_ids = [person_external_id]

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            session.add(person_no_match)
            session.add(person_match_external_id)
            session.commit()

            # Act
            people = dao.read_people_by_cls_external_ids(
                session, _STATE_CODE, schema.StatePerson, [_EXTERNAL_ID])

            # Assert
            expected_people = [person_match_external_id]

            self.assertCountEqual(people, expected_people)
예제 #6
0
def _nd_read_people(session: Session, region: str,
                    ingested_people: List[StatePerson]) -> List[StatePerson]:
    """ND specific code that looks up all people necessary for entity matching
    based on the provided |region| and |ingested_people|.
    """
    root_entity_cls = get_root_entity_cls(ingested_people)
    if root_entity_cls not in (StatePerson, StateSentenceGroup):
        raise EntityMatchingError(
            f'For region [{region}] found unexpected root_entity_cls: '
            f'[{root_entity_cls.__name__}]', 'root_entity_cls')
    root_external_ids = get_external_ids_of_cls(ingested_people,
                                                root_entity_cls)

    return dao.read_people_by_cls_external_ids(session,
                                               region,
                                               root_entity_cls,
                                               root_external_ids,
                                               populate_back_edges=False)
예제 #7
0
def read_persons_by_root_entity_cls(
    session: Session,
    region: str,
    ingested_people: List[schema.StatePerson],
    allowed_root_entity_classes: Optional[List[Type[DatabaseEntity]]],
) -> List[schema.StatePerson]:
    """Looks up all people necessary for entity matching based on the provided
    |region| and |ingested_people|.

    If |allowed_root_entity_classes| is provided, throw an error if any
    unexpected root entity class is found.
    """
    root_entity_cls = get_root_entity_cls(ingested_people)
    if (allowed_root_entity_classes
            and root_entity_cls not in allowed_root_entity_classes):
        raise ValueError(
            f"For region [{region}] found unexpected root_entity_cls: [{root_entity_cls.__name__}]. "
            f"Allowed classes: [{allowed_root_entity_classes}]")
    root_external_ids = get_external_ids_of_cls(ingested_people,
                                                root_entity_cls)
    logging.info(
        "[Entity Matching] Reading [%s] external ids of class [%s]",
        len(root_external_ids),
        root_entity_cls.__name__,
    )
    persons_by_root_entity = dao.read_people_by_cls_external_ids(
        session, region, root_entity_cls, root_external_ids)
    placeholder_persons = dao.read_placeholder_persons(session, region)

    # When the |root_entity_cls| is not StatePerson, it is possible for both
    # persons_by_root_entity and placeholder_persons to contain the same
    # placeholder person(s). For this reason, we dedup people across both lists
    # before returning.
    deduped_people = []
    seen_person_ids: Set[int] = set()
    for person in persons_by_root_entity + placeholder_persons:
        if person.person_id not in seen_person_ids:
            deduped_people.append(person)
            seen_person_ids.add(person.person_id)

    return deduped_people