def is_back_edge(self, from_obj, to_field_name) -> bool: """Given an object and a field name on that object, returns whether traversing from the obj to an object in that field would be traveling along a 'back edge' in the object graph. A back edge is an edge that might introduce a cycle in the graph. Without back edges, the object graph should have no cycles. Args: from_obj: An object that is the origin of this edge to_field_name: A string field name for the field on from_obj containing the destination object of this edge Returns: True if a graph edge travelling from from_src_obj to an object in to_field_name is a back edge, i.e. it travels in a direction opposite to the class hierarchy. """ from_class_name = from_obj.__class__.__name__ if isinstance(from_obj, DatabaseEntity): to_class_name = \ from_obj.get_relationship_property_class_name(to_field_name) elif isinstance(from_obj, Entity): to_class_name = get_non_flat_property_class_name(from_obj, to_field_name) else: raise ValueError(f'Unexpected type [{type(from_obj)}]') if to_class_name is None: return False if from_class_name not in self._class_hierarchy_map: raise PersistenceError( f"Unable to convert: [{from_class_name}] not in the class " f"hierarchy map") if to_class_name not in self._class_hierarchy_map: raise PersistenceError( f"Unable to convert: [{to_class_name}] not in the class " f"hierarchy map") return self._class_hierarchy_map[from_class_name] >= \ self._class_hierarchy_map[to_class_name]
def _check_class_hierarchy_includes_all_expected_classes( class_hierarchy: List[str], entities_module: ModuleType) -> None: expected_class_names = \ get_all_entity_class_names_in_module(entities_module) given_minus_expected = \ set(class_hierarchy).difference(expected_class_names) expected_minus_given = expected_class_names.difference(class_hierarchy) if given_minus_expected or expected_minus_given: msg = "" if given_minus_expected: msg += f"Found unexpected class in class hierarchy: " \ f"[{list(given_minus_expected)[0]}]. " if expected_minus_given: msg += f"Missing expected class in class hierarchy: " \ f"[{list(expected_minus_given)[0]}]. " raise PersistenceError(msg)
def _db_open_booking_matches_ingested_booking( *, db_entity: entities.Person, ingested_entity: entities.Person) -> bool: """Returns True if the external id on the open booking in the database matches any of the external ids of the bookings on the ingested person. If there is no open booking in the db, return True as well. Note: if the same person has been rebooked on subsequent scrapes, and the ingested person doesn't have historical bookings, we will not match the person entities. This is the same behavior as if the person is rebooked on non-consecutive days. """ db_open_bookings = [b for b in db_entity.bookings if is_booking_active(b)] if not db_open_bookings: return True if len(db_open_bookings) > 1: raise PersistenceError( "db person {} has more than one open booking".format( db_entity.person_id)) return any(db_open_bookings[0].external_id == ingested_booking.external_id for ingested_booking in ingested_entity.bookings)
def _save_record_trees(session: Session, root_people: List[SchemaPersonType], orphaned_entities: List[DatabaseEntity], metadata: IngestMetadata): """Persists all record trees rooted at |root_people|. Also performs any historical snapshot updates required for any entities in any of these record trees. Returns the list of persisted (SchemaPersonType) objects. """ # TODO(2382): Once County entity matching is updated to use # DatabaseEntity objects directly, we shouldn't need to add dummy ids / do # a session merge/flush for the county code. if metadata.system_level == SystemLevel.COUNTY: check_all_objs_have_type(root_people, county_schema.Person) _set_dummy_booking_ids(root_people) # Merge is recursive for all related entities, so this persists all # master entities in all record trees # # Merge and flush is required to ensure all master entities, including # newly created ones, have primary keys set before performing historical # snapshot operations logging.info("Starting Session merge of [%s] persons.", str(len(root_people))) merged_root_people = [] for root_person in root_people: merged_root_people.append(session.merge(root_person)) if len(merged_root_people) % 200 == 0: logging.info("Merged [%s] of [%s] people.", str(len(merged_root_people)), str(len(root_people))) logging.info("Starting Session merge of [%s] orphaned entities.", str(len(orphaned_entities))) merged_orphaned_entities = [] for entity in orphaned_entities: merged_orphaned_entities.append(session.merge(entity)) if len(merged_orphaned_entities) % 200 == 0: logging.info("Merged [%s] of [%s] entities.", str(len(merged_orphaned_entities)), str(len(orphaned_entities))) logging.info("Session flush start.") session.flush() logging.info("Session flush complete.") check_all_objs_have_type(merged_root_people, county_schema.Person) _overwrite_dummy_booking_ids(merged_root_people) elif metadata.system_level == SystemLevel.STATE: merged_root_people = root_people if orphaned_entities: raise PersistenceError("State doesn't use orphaned entities") merged_orphaned_entities = [] else: raise PersistenceError( f"Unexpected system level [{metadata.system_level}]") update_snapshots.update_historical_snapshots( session, merged_root_people, merged_orphaned_entities, metadata) return merged_root_people
def _save_record_trees(session: Session, root_people: List[SchemaPersonType], orphaned_entities: List[DatabaseEntity], metadata: IngestMetadata): """Persists all record trees rooted at |root_people|. Also performs any historical snapshot updates required for any entities in any of these record trees. Returns the list of persisted (SchemaPersonType) objects. """ if metadata.system_level == SystemLevel.COUNTY: if not all( isinstance(person, county_schema.Person) for person in root_people): raise PersistenceError( "Not all persons are type county_schema.Person") _set_dummy_booking_ids(root_people) # Merge is recursive for all related entities, so this persists all master # entities in all record trees # # Merge and flush is required to ensure all master entities, including # newly created ones, have primary keys set before performing historical # snapshot operations logging.info("Starting Session merge of [%s] persons.", str(len(root_people))) merged_root_people = [] for root_person in root_people: merged_root_people.append(session.merge(root_person)) if len(merged_root_people) % 200 == 0: logging.info("Merged [%s] of [%s] people.", str(len(merged_root_people)), str(len(root_people))) logging.info("Starting Session merge of [%s] orphaned entities.", str(len(orphaned_entities))) merged_orphaned_entities = [] for entity in orphaned_entities: merged_orphaned_entities.append(session.merge(entity)) if len(merged_orphaned_entities) % 200 == 0: logging.info("Merged [%s] of [%s] entities.", str(len(merged_orphaned_entities)), str(len(orphaned_entities))) logging.info("Session flush start.") session.flush() logging.info("Session flush complete.") if metadata.system_level == SystemLevel.COUNTY: if not all( isinstance(person, county_schema.Person) for person in merged_root_people): raise PersistenceError( "Not all persons are type county_schema.Person") _overwrite_dummy_booking_ids(merged_root_people) update_snapshots.update_historical_snapshots(session, merged_root_people, merged_orphaned_entities, metadata) return merged_root_people
def check_not_dirty(session: Session): if session.dirty: raise PersistenceError( "Session unexpectedly dirty - flush before querying the database.")