Exemple #1
0
def get_total_entities_of_cls(
        persons: List[schema.StatePerson], cls: Type) -> int:
    """Counts the total number of unique objects of type |cls| in the entity
    graphs passed in by |persons|.
    """
    check_all_objs_have_type(persons, schema.StatePerson)
    return len(get_all_entities_of_cls(persons, cls))
def read_persons(
        session: Session, region: str,
        ingested_people: List[schema.StatePerson]) -> List[schema.StatePerson]:
    """Looks up all people necessary for entity matching based on the provided
    |region| and |ingested_people|.
    """
    check_all_objs_have_type(ingested_people, schema.StatePerson)

    # TODO(1868): more specific query
    db_people = dao.read_people(session)
    logging.info("Read [%d] people from DB in region [%s]", len(db_people),
                 region)
    return db_people
Exemple #3
0
def get_external_ids_of_cls(persons: List[schema.StatePerson],
                            cls: Type[DatabaseEntity]) -> Set[str]:
    """Returns the external ids of all entities of type |cls| found in the
    provided |persons| trees.
    """
    check_all_objs_have_type(persons, schema.StatePerson)

    ids: Set[str] = set()
    entities = get_all_entities_of_cls(persons, cls)
    for entity in entities:
        external_ids = get_external_ids_from_entity(entity)
        if not external_ids:
            raise EntityMatchingError(
                f'Expected all external_ids to be present in cls '
                f'[{cls.__name__}]', cls.__name__)
        ids.update(external_ids)
    return ids
Exemple #4
0
def get_external_ids_of_cls(persons: List[schema.StatePerson],
                            cls: Type[DatabaseEntity]) -> Set[str]:
    """Returns the external ids of all entities of type |cls| found in the
    provided |persons| trees.
    """
    check_all_objs_have_type(persons, schema.StatePerson)

    ids: Set[str] = set()
    entities = get_all_entities_of_cls(persons, cls)
    for entity in entities:
        external_ids = get_external_ids_from_entity(entity)
        if not external_ids:
            raise EntityMatchingError(
                f"Expected external_ids to be non-empty for entity [{entity}] with class [{cls.__name__}]",
                entity.get_class_id_name(),
            )
        ids.update(external_ids)
    return ids
Exemple #5
0
def get_root_entity_cls(
        ingested_persons: List[schema.StatePerson]) -> Type[DatabaseEntity]:
    """
    Attempts to find the highest entity class within the |ingested_persons| for
    which objects are not placeholders. Returns the class if found, otherwise
    raises.

    Note: This should only be used with persons ingested from a region directly
    (and not with persons post entity matching), as this function uses DFS to
    find the root entity cls. This therefore assumes that a) the passed in
    StatePersons are trees and not DAGs (one parent per entity) and b) that the
    structure of the passed in graph is symmetrical.
    """
    check_all_objs_have_type(ingested_persons, schema.StatePerson)

    root_cls = None
    if ingested_persons:
        root_cls = _get_root_entity_helper(ingested_persons[0])
    if root_cls is None:
        raise EntityMatchingError(
            "Could not find root class for ingested persons", 'state_person')
    return root_cls
Exemple #6
0
def _save_record_trees(session: Session,
                       root_people: List[SchemaPersonType],
                       orphaned_entities: List[DatabaseEntity],
                       metadata: IngestMetadata):
    """Persists all record trees rooted at |root_people|. Also performs any
    historical snapshot updates required for any entities in any of these
    record trees. Returns the list of persisted (SchemaPersonType) objects.
    """

    # TODO(2382): Once County entity matching is updated to use
    #  DatabaseEntity objects directly, we shouldn't need to add dummy ids / do
    #  a session merge/flush for the county code.
    if metadata.system_level == SystemLevel.COUNTY:
        check_all_objs_have_type(root_people, county_schema.Person)
        _set_dummy_booking_ids(root_people)

        # Merge is recursive for all related entities, so this persists all
        # master entities in all record trees
        #
        # Merge and flush is required to ensure all master entities, including
        # newly created ones, have primary keys set before performing historical
        # snapshot operations

        logging.info("Starting Session merge of [%s] persons.",
                     str(len(root_people)))

        merged_root_people = []
        for root_person in root_people:
            merged_root_people.append(session.merge(root_person))
            if len(merged_root_people) % 200 == 0:
                logging.info("Merged [%s] of [%s] people.",
                             str(len(merged_root_people)),
                             str(len(root_people)))

        logging.info("Starting Session merge of [%s] orphaned entities.",
                     str(len(orphaned_entities)))
        merged_orphaned_entities = []
        for entity in orphaned_entities:
            merged_orphaned_entities.append(session.merge(entity))
            if len(merged_orphaned_entities) % 200 == 0:
                logging.info("Merged [%s] of [%s] entities.",
                             str(len(merged_orphaned_entities)),
                             str(len(orphaned_entities)))

        logging.info("Session flush start.")
        session.flush()
        logging.info("Session flush complete.")

        check_all_objs_have_type(merged_root_people, county_schema.Person)
        _overwrite_dummy_booking_ids(merged_root_people)

    elif metadata.system_level == SystemLevel.STATE:
        merged_root_people = root_people
        if orphaned_entities:
            raise PersistenceError("State doesn't use orphaned entities")
        merged_orphaned_entities = []
    else:
        raise PersistenceError(
            f"Unexpected system level [{metadata.system_level}]")

    update_snapshots.update_historical_snapshots(
        session, merged_root_people, merged_orphaned_entities, metadata)

    return merged_root_people