def _copy_entity_fields_to_historical_snapshot( self, entity: DatabaseEntity, historical_snapshot: DatabaseEntity) -> None: """ Copies all column values present on |entity| to |historical_snapshot|. NOTE: This method *only* copies values for properties which are present on both the master and historical tables. Any property that is only present on one table will be ignored. The only exception is the master key column, which is copied over regardless of property name (only based on *column* name), following module assumption #2. """ for column_property_name in self._get_shared_column_property_names( type(entity), type(historical_snapshot)): entity_value = getattr(entity, column_property_name) setattr(historical_snapshot, column_property_name, entity_value) # See module assumption #2 key_column_name = entity.get_primary_key_column_name() # type: ignore historical_master_key_property_name = type( historical_snapshot).get_property_name_by_column_name( key_column_name) setattr( historical_snapshot, historical_master_key_property_name, entity.get_primary_key(), ) # type: ignore
def get_external_ids_from_entity(entity: DatabaseEntity): external_ids = [] if isinstance(entity, schema.StatePerson): for external_id in entity.external_ids: if external_id: external_ids.append(external_id.external_id) else: if entity.get_external_id(): external_ids.append(entity.get_external_id()) return external_ids
def default_merge_flat_fields( *, new_entity: DatabaseEntity, old_entity: DatabaseEntity) -> DatabaseEntity: """Merges all set non-relationship fields on the |new_entity| onto the |old_entity|. Returns the newly merged entity.""" for child_field_name in get_set_entity_field_names(new_entity, EntityFieldType.FLAT_FIELD): if child_field_name == old_entity.get_class_id_name(): continue # Do not overwrite with default status if child_field_name == 'status' and new_entity.has_default_status(): continue old_entity.set_field(child_field_name, new_entity.get_field(child_field_name)) return old_entity
def remove_child_from_entity(*, entity: DatabaseEntity, child_field_name: str, child_to_remove: DatabaseEntity): """If present, removes the |child_to_remove| from the |child_field_name| field on the |entity|. """ child_field = entity.get_field(child_field_name) if isinstance(child_field, list): if child_to_remove in child_field: child_field.remove(child_to_remove) elif isinstance(child_field, DatabaseEntity): if child_field == child_to_remove: child_field = None entity.set_field(child_field_name, child_field)
def get_external_id_keys_from_multiple_id_entity( entity: DatabaseEntity) -> List[str]: """Returns a list of strings that uniquely represent all external ids on the given entity. """ external_str_ids = [] for external_id in entity.get_field_as_list('external_ids'): str_id = external_id.id_type + '|' + external_id.external_id external_str_ids.append(str_id) return external_str_ids
def add_child_to_entity(*, entity: DatabaseEntity, child_field_name: str, child_to_add: DatabaseEntity): """Adds the |child_to_add| to the |child_field_name| field on the |entity|. """ child_field = entity.get_field(child_field_name) if isinstance(child_field, list): if child_to_add not in child_field: child_field.append(child_to_add) else: if child_field and child_field != child_to_add: raise EntityMatchingError( f"Attempting to add child {child_to_add} to entity {entity}, " f"but {child_field_name} already had different value " f"{child_field}", entity.get_entity_name()) child_field = child_to_add entity.set_field(child_field_name, child_field)
def _get_root_entity_helper( entity: DatabaseEntity) -> Optional[Type[DatabaseEntity]]: if not is_placeholder(entity): return entity.__class__ for field_name in get_set_entity_field_names( entity, EntityFieldType.FORWARD_EDGE): field = entity.get_field_as_list(field_name)[0] result = _get_root_entity_helper(field) if result is not None: return result return None
def get_or_create_placeholder_child(parent_entity: DatabaseEntity, child_field_name: str, child_class: Type[DatabaseEntity], **child_kwargs): """Checks all the entities in the |parent_entity|'s field |child_field_name|. If there is a placeholder entity, returns that. Otherwise creates a new placeholder entity of type |child_class| on the parent's |child_field_name| using |child_kw_args|. """ children = parent_entity.get_field_as_list(child_field_name) placeholder_children = [c for c in children if is_placeholder(c)] if placeholder_children: return placeholder_children[0] logging.info( 'No placeholder children on entity with id [%s] of type [%s] exist on field [%s]. Have to create one.', parent_entity.get_external_id(), parent_entity.get_entity_name(), child_field_name) new_child = child_class(**child_kwargs) if not is_placeholder(new_child): raise EntityMatchingError( f'Child created with kwargs is not a placeholder [{child_kwargs}]', parent_entity.get_entity_name()) children.append(new_child) parent_entity.set_field_from_list(child_field_name, children) return new_child
def _get_all_database_entity_field_names(entity: DatabaseEntity, entity_field_type: EntityFieldType, direction_checker): """Returns a set of field_names that correspond to any set fields on the provided DatabaseEntity |entity| that match the provided |entity_field_type|. """ back_edges = set() forward_edges = set() flat_fields = set() foreign_keys = set() for relationship_field_name in entity.get_relationship_property_names(): if direction_checker.is_back_edge(entity, relationship_field_name): back_edges.add(relationship_field_name) else: forward_edges.add(relationship_field_name) for foreign_key_name in entity.get_foreign_key_names(): foreign_keys.add(foreign_key_name) for column_field_name in entity.get_column_property_names(): if column_field_name not in foreign_keys: flat_fields.add(column_field_name) if entity_field_type is EntityFieldType.FLAT_FIELD: return flat_fields if entity_field_type is EntityFieldType.FOREIGN_KEYS: return foreign_keys if entity_field_type is EntityFieldType.FORWARD_EDGE: return forward_edges if entity_field_type is EntityFieldType.BACK_EDGE: return back_edges if entity_field_type is EntityFieldType.ALL: return flat_fields | foreign_keys | forward_edges | back_edges raise ValueError( f"Unrecognized EntityFieldType [{entity_field_type}] on entity [{entity}]" )
def register_entity(self, schema_object: DatabaseEntity) -> None: """Creates (_SnapshotContext) for |entity| and adds it to registry Raises (ValueError) if |entity| has already been registered """ type_name = type(schema_object).__name__ if type_name not in self.snapshot_contexts: self.snapshot_contexts[type_name] = {} entity_id = schema_object.get_primary_key() if entity_id in self.snapshot_contexts[type_name]: raise ValueError( "Entity already registered with type {type} and primary key " "{primary_key}".format(type=type_name, primary_key=entity_id)) self.snapshot_contexts[type_name][entity_id] = _SnapshotContext( schema_object=schema_object)
def get_all_db_objs_from_tree(db_obj: DatabaseEntity, result=None) -> Set[DatabaseEntity]: if result is None: result = set() if db_obj in result: return result result.add(db_obj) set_fields = get_set_entity_field_names( db_obj, EntityFieldType.FORWARD_EDGE) for field in set_fields: child = db_obj.get_field_as_list(field) get_all_db_objs_from_trees(child, result) return result
def _base_entity_match( a: DatabaseEntity, b: DatabaseEntity, skip_fields: Set[str], allow_null_mismatch: bool = False ) -> bool: """Returns whether two objects of the same type are an entity match. Args: a: The first entity to match b: The second entity to match skip_fields: A list of names of fields that should be ignored when determining if two objects match based on flat fields. allow_null_mismatch: Allow for two objects to still match if one has a null value in a field where the other's is nonnull. """ # Placeholders never match if is_placeholder(a) or is_placeholder(b): return False # Compare external ids if one is present if a.get_external_id() or b.get_external_id(): return a.get_external_id() == b.get_external_id() # Compare all flat fields of the two entities all_set_flat_field_names = \ get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \ get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD) for field_name in all_set_flat_field_names: # Skip primary key if field_name == a.get_class_id_name() or field_name in skip_fields: continue a_field = a.get_field(field_name) b_field = b.get_field(field_name) if allow_null_mismatch and (a_field is None or b_field is None): # Do not disqualify a match if one of the fields is null continue if a_field != b_field: return False return True
def _get_related_entities(self, entity: DatabaseEntity) -> List[DatabaseEntity]: """Returns list of all entities related to |entity|""" related_entities = [] for relationship_name in entity.get_relationship_property_names(): # TODO(#1145): For County schema, fix direction checker to gracefully # handle the fact that SentenceRelationship exists in the schema # but not in the entity layer. if self.get_system_level() == SystemLevel.STATE: # Skip back edges direction_checker = SchemaEdgeDirectionChecker.state_direction_checker( ) if direction_checker.is_back_edge(entity, relationship_name): continue related = getattr(entity, relationship_name) # Relationship can return either a list or a single item if isinstance(related, list): related_entities.extend(related) elif related is not None: related_entities.append(related) return related_entities
def _base_entity_match(a: DatabaseEntity, b: DatabaseEntity) -> bool: # Placeholders never match if is_placeholder(a) or is_placeholder(b): return False # Compare external ids if one is present if a.get_external_id() or b.get_external_id(): return a.get_external_id() == b.get_external_id() # Compare all flat fields of the two entities all_set_flat_field_names = \ get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \ get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD) for field_name in all_set_flat_field_names: # Skip primary key if field_name == a.get_class_id_name(): continue a_field = a.get_field(field_name) b_field = b.get_field(field_name) if a_field != b_field: return False return True
def snapshot_context(self, entity: DatabaseEntity) -> _SnapshotContext: """Returns (_SnapshotContext) for |entity|""" context_map = self.snapshot_contexts[type(entity).__name__] return context_map[entity.get_primary_key()]
def convert_to_placeholder(entity: DatabaseEntity): for field_name in get_set_entity_field_names(entity, EntityFieldType.FLAT_FIELD): if field_name == entity.get_class_id_name(): continue if field_name == 'state_code': continue if field_name == 'status': entity.set_field(field_name, enum_canonical_strings.present_without_info) continue if field_name == 'incarceration_type': entity.set_field(field_name, StateIncarcerationType.STATE_PRISON.value) continue if field_name == 'court_type': entity.set_field(field_name, StateCourtType.PRESENT_WITHOUT_INFO.value) continue if field_name == 'agent_type': entity.set_field(field_name, StateAgentType.PRESENT_WITHOUT_INFO.value) continue entity.clear_field(field_name)
def _is_match(*, ingested_entity: DatabaseEntity, db_entity: DatabaseEntity) -> bool: """Returns true if the provided |ingested_entity| matches the provided |db_entity|. Otherwise returns False. """ if not ingested_entity or not db_entity: return ingested_entity == db_entity if ingested_entity.__class__ != db_entity.__class__: raise EntityMatchingError( f"is_match received entities of two different classes: " f"ingested entity {ingested_entity.__class__.__name__} and " f"db_entity {db_entity.__class__.__name__}", ingested_entity.get_entity_name()) if not isinstance(ingested_entity, DatabaseEntity): raise EntityMatchingError( f"Unexpected type for ingested entity[{type(ingested_entity)}]", 'unknown') if not isinstance(db_entity, DatabaseEntity): raise EntityMatchingError( f"Unexpected type for db entity[{type(db_entity)}]", 'unknown') if isinstance(ingested_entity, schema.StatePerson): db_entity = cast(schema.StatePerson, db_entity) for ingested_external_id in ingested_entity.external_ids: for db_external_id in db_entity.external_ids: if _is_match(ingested_entity=ingested_external_id, db_entity=db_external_id): return True return False # Aside from people, all entities are state specific. if ingested_entity.get_field('state_code') \ != db_entity.get_field('state_code'): return False # TODO(2671): Update all person attributes below to use complete entity # equality instead of just comparing individual fields. if isinstance(ingested_entity, schema.StatePersonExternalId): db_entity = cast(schema.StatePersonExternalId, db_entity) return ingested_entity.external_id == db_entity.external_id \ and ingested_entity.id_type == db_entity.id_type # As person has already been matched, assume that any of these 'person # attribute' entities are matches if specific attributes match. if isinstance(ingested_entity, schema.StatePersonAlias): db_entity = cast(schema.StatePersonAlias, db_entity) return ingested_entity.full_name == db_entity.full_name if isinstance(ingested_entity, schema.StatePersonRace): db_entity = cast(schema.StatePersonRace, db_entity) return ingested_entity.race == db_entity.race if isinstance(ingested_entity, schema.StatePersonEthnicity): db_entity = cast(schema.StatePersonEthnicity, db_entity) return ingested_entity.ethnicity == db_entity.ethnicity if isinstance(ingested_entity, (schema.StateSupervisionViolationResponseDecisionEntry, schema.StateSupervisionViolatedConditionEntry, schema.StateSupervisionViolationTypeEntry, schema.StateSupervisionCaseTypeEntry)): return _base_entity_match(ingested_entity, db_entity) # Placeholders entities are considered equal if ingested_entity.get_external_id() is None \ and db_entity.get_external_id() is None: return is_placeholder(ingested_entity) and is_placeholder(db_entity) return ingested_entity.get_external_id() == db_entity.get_external_id()
def db_id_or_object_id(entity: DatabaseEntity) -> int: """If present, returns the primary key field from the provided |entity|, otherwise provides the object id. """ return entity.get_id() if entity.get_id() else id(entity)