Example #1
0
    def is_back_edge(self, from_obj, to_field_name) -> bool:
        """Given an object and a field name on that object, returns whether
        traversing from the obj to an object in that field would be traveling
        along a 'back edge' in the object graph. A back edge is an edge that
        might introduce a cycle in the graph.
        Without back edges, the object graph should have no cycles.

        Args:
            from_obj: An object that is the origin of this edge
            to_field_name: A string field name for the field on from_obj
                containing the destination object of this edge
        Returns:
            True if a graph edge travelling from from_src_obj to an object in
                to_field_name is a back edge, i.e. it travels in a direction
                opposite to the class hierarchy.
        """
        from_class_name = from_obj.__class__.__name__

        if isinstance(from_obj, DatabaseEntity):
            to_class_name = \
                from_obj.get_relationship_property_class_name(to_field_name)
        elif isinstance(from_obj, Entity):
            to_class_name = get_non_flat_property_class_name(from_obj,
                                                             to_field_name)
        else:
            raise ValueError(f'Unexpected type [{type(from_obj)}]')

        if to_class_name is None:
            return False

        if from_class_name not in self._class_hierarchy_map:
            raise PersistenceError(
                f"Unable to convert: [{from_class_name}] not in the class "
                f"hierarchy map")

        if to_class_name not in self._class_hierarchy_map:
            raise PersistenceError(
                f"Unable to convert: [{to_class_name}] not in the class "
                f"hierarchy map")

        return self._class_hierarchy_map[from_class_name] >= \
            self._class_hierarchy_map[to_class_name]
Example #2
0
def _check_class_hierarchy_includes_all_expected_classes(
        class_hierarchy: List[str], entities_module: ModuleType) -> None:
    expected_class_names = \
        get_all_entity_class_names_in_module(entities_module)

    given_minus_expected = \
        set(class_hierarchy).difference(expected_class_names)
    expected_minus_given = expected_class_names.difference(class_hierarchy)

    if given_minus_expected or expected_minus_given:
        msg = ""
        if given_minus_expected:
            msg += f"Found unexpected class in class hierarchy: " \
                f"[{list(given_minus_expected)[0]}]. "
        if expected_minus_given:
            msg += f"Missing expected class in class hierarchy: " \
                f"[{list(expected_minus_given)[0]}]. "

        raise PersistenceError(msg)
Example #3
0
def _db_open_booking_matches_ingested_booking(
        *, db_entity: entities.Person,
        ingested_entity: entities.Person) -> bool:
    """Returns True if the external id on the open booking in the database
    matches any of the external ids of the bookings on the ingested person.
    If there is no open booking in the db, return True as well.

    Note: if the same person has been rebooked on subsequent scrapes, and the
    ingested person doesn't have historical bookings, we will not match the
    person entities. This is the same behavior as if the person is rebooked on
    non-consecutive days.
    """
    db_open_bookings = [b for b in db_entity.bookings if is_booking_active(b)]
    if not db_open_bookings:
        return True
    if len(db_open_bookings) > 1:
        raise PersistenceError(
            "db person {} has more than one open booking".format(
                db_entity.person_id))
    return any(db_open_bookings[0].external_id == ingested_booking.external_id
               for ingested_booking in ingested_entity.bookings)
Example #4
0
def _save_record_trees(session: Session,
                       root_people: List[SchemaPersonType],
                       orphaned_entities: List[DatabaseEntity],
                       metadata: IngestMetadata):
    """Persists all record trees rooted at |root_people|. Also performs any
    historical snapshot updates required for any entities in any of these
    record trees. Returns the list of persisted (SchemaPersonType) objects.
    """

    # TODO(2382): Once County entity matching is updated to use
    #  DatabaseEntity objects directly, we shouldn't need to add dummy ids / do
    #  a session merge/flush for the county code.
    if metadata.system_level == SystemLevel.COUNTY:
        check_all_objs_have_type(root_people, county_schema.Person)
        _set_dummy_booking_ids(root_people)

        # Merge is recursive for all related entities, so this persists all
        # master entities in all record trees
        #
        # Merge and flush is required to ensure all master entities, including
        # newly created ones, have primary keys set before performing historical
        # snapshot operations

        logging.info("Starting Session merge of [%s] persons.",
                     str(len(root_people)))

        merged_root_people = []
        for root_person in root_people:
            merged_root_people.append(session.merge(root_person))
            if len(merged_root_people) % 200 == 0:
                logging.info("Merged [%s] of [%s] people.",
                             str(len(merged_root_people)),
                             str(len(root_people)))

        logging.info("Starting Session merge of [%s] orphaned entities.",
                     str(len(orphaned_entities)))
        merged_orphaned_entities = []
        for entity in orphaned_entities:
            merged_orphaned_entities.append(session.merge(entity))
            if len(merged_orphaned_entities) % 200 == 0:
                logging.info("Merged [%s] of [%s] entities.",
                             str(len(merged_orphaned_entities)),
                             str(len(orphaned_entities)))

        logging.info("Session flush start.")
        session.flush()
        logging.info("Session flush complete.")

        check_all_objs_have_type(merged_root_people, county_schema.Person)
        _overwrite_dummy_booking_ids(merged_root_people)

    elif metadata.system_level == SystemLevel.STATE:
        merged_root_people = root_people
        if orphaned_entities:
            raise PersistenceError("State doesn't use orphaned entities")
        merged_orphaned_entities = []
    else:
        raise PersistenceError(
            f"Unexpected system level [{metadata.system_level}]")

    update_snapshots.update_historical_snapshots(
        session, merged_root_people, merged_orphaned_entities, metadata)

    return merged_root_people
Example #5
0
def _save_record_trees(session: Session, root_people: List[SchemaPersonType],
                       orphaned_entities: List[DatabaseEntity],
                       metadata: IngestMetadata):
    """Persists all record trees rooted at |root_people|. Also performs any
    historical snapshot updates required for any entities in any of these
    record trees. Returns the list of persisted (SchemaPersonType) objects.
    """

    if metadata.system_level == SystemLevel.COUNTY:
        if not all(
                isinstance(person, county_schema.Person)
                for person in root_people):
            raise PersistenceError(
                "Not all persons are type county_schema.Person")

        _set_dummy_booking_ids(root_people)

    # Merge is recursive for all related entities, so this persists all master
    # entities in all record trees
    #
    # Merge and flush is required to ensure all master entities, including
    # newly created ones, have primary keys set before performing historical
    # snapshot operations

    logging.info("Starting Session merge of [%s] persons.",
                 str(len(root_people)))

    merged_root_people = []
    for root_person in root_people:
        merged_root_people.append(session.merge(root_person))
        if len(merged_root_people) % 200 == 0:
            logging.info("Merged [%s] of [%s] people.",
                         str(len(merged_root_people)), str(len(root_people)))

    logging.info("Starting Session merge of [%s] orphaned entities.",
                 str(len(orphaned_entities)))
    merged_orphaned_entities = []
    for entity in orphaned_entities:
        merged_orphaned_entities.append(session.merge(entity))
        if len(merged_orphaned_entities) % 200 == 0:
            logging.info("Merged [%s] of [%s] entities.",
                         str(len(merged_orphaned_entities)),
                         str(len(orphaned_entities)))

    logging.info("Session flush start.")
    session.flush()
    logging.info("Session flush complete.")

    if metadata.system_level == SystemLevel.COUNTY:
        if not all(
                isinstance(person, county_schema.Person)
                for person in merged_root_people):
            raise PersistenceError(
                "Not all persons are type county_schema.Person")

        _overwrite_dummy_booking_ids(merged_root_people)

    update_snapshots.update_historical_snapshots(session, merged_root_people,
                                                 merged_orphaned_entities,
                                                 metadata)

    return merged_root_people
Example #6
0
def check_not_dirty(session: Session):
    if session.dirty:
        raise PersistenceError(
            "Session unexpectedly dirty - flush before querying the database.")