예제 #1
0
def match_people_and_return_error_count(
        *, db_people: List[entities.Person],
        ingested_people: List[entities.Person]) -> MatchedEntities:
    """
    Attempts to match all people from |ingested_people| with people from the
    |db_people|. Returns an MatchedEntities object that contains the results
    of matching.
    """
    people = []
    orphaned_entities = []
    error_count = 0
    matched_people_by_db_id: Dict[int, entities.Person] = {}

    for ingested_person in ingested_people:
        try:
            ingested_person_orphans: List[Entity] = []
            match_person(ingested_person=ingested_person,
                         db_people=db_people,
                         orphaned_entities=ingested_person_orphans,
                         matched_people_by_db_id=matched_people_by_db_id)

            people.append(ingested_person)
            orphaned_entities.extend(ingested_person_orphans)
        except EntityMatchingError as e:
            logging.exception(
                'Found %s while matching ingested person. \nPerson: %s',
                e.__class__.__name__, ingested_person)
            increment_error(e.entity_name)
            error_count += 1
    return MatchedEntities(people=people,
                           orphaned_entities=orphaned_entities,
                           error_count=error_count)
예제 #2
0
def _match_entity_trees(*, ingested_entity_trees: List[EntityTree],
                        db_entity_trees: List[EntityTree],
                        root_entity_cls: Type) -> MatchResults:
    """Attempts to match all of the |ingested_entity_trees| with one of the
    provided |db_entity_trees|. For all matches, merges the ingested entity
    information into the db entity, and continues entity matching for all
    child entities.

    If the provided |root_entity_cls| corresponds to the class of the provided
    |ingested_entity_trees|, increments an error count rather than raising when
    one is encountered.

    Returns a MatchResults object which contains IndividualMatchResults for each
    ingested tree, a list of unmatched DB entities, and the number of errors
    encountered while matching these trees.
    """
    individual_match_results: List[IndividualMatchResult] = []
    matched_entities_by_db_id: Dict[int, Entity] = {}
    error_count = 0

    for ingested_entity_tree in ingested_entity_trees:
        try:
            match_result = _match_entity_tree(
                ingested_entity_tree=ingested_entity_tree,
                db_entity_trees=db_entity_trees,
                matched_entities_by_db_ids=matched_entities_by_db_id,
                root_entity_cls=root_entity_cls)
            individual_match_results.append(match_result)
            error_count += match_result.error_count
        except EntityMatchingError as e:
            if isinstance(ingested_entity_tree.entity, root_entity_cls):
                ingested_entity = cast(Entity, ingested_entity_tree.entity)
                logging.exception(
                    "Found error while matching ingested %s. \nEntity: %s",
                    ingested_entity.get_entity_name(), ingested_entity)
                increment_error(e.entity_name)
                error_count += 1
            else:
                raise e

    # Keep track of even unmatched DB entities, as the parent of this entity
    # layer must know about all of its children (even the unmatched ones). If
    # we exclude the unmatched database entities from this list, on write,
    # SQLAlchemy will treat the incomplete child list as an update, and attempt
    # to remove any children with links to the parent in our database but not
    # in the provided list.
    unmatched_db_entities: List[Entity] = []
    for db_entity_tree in db_entity_trees:
        db_entity = db_entity_tree.entity
        if db_entity.get_id() not in matched_entities_by_db_id.keys():
            unmatched_db_entities.append(db_entity)

    return MatchResults(individual_match_results, unmatched_db_entities,
                        error_count)