def get_all_entities_from_tree( entity: Entity, result: Optional[List[Entity]] = None, seen_ids: Optional[Set[int]] = None) -> List[Entity]: """Returns a list of all entities in the tree below the entity, including the entity itself. Entities are deduplicated by Python object id. """ if result is None: result = [] if seen_ids is None: seen_ids = set() if id(entity) in seen_ids: return result result.append(entity) seen_ids.add(id(entity)) fields = get_all_core_entity_field_names(entity, EntityFieldType.FORWARD_EDGE) for field in fields: child = entity.get_field(field) if child is None: continue if isinstance(child, list): for c in child: get_all_entities_from_tree(c, result, seen_ids) else: get_all_entities_from_tree(child, result, seen_ids) return result
def __init__(self, database_entity: Entity, ingested_entities: Sequence[Entity]): msg_template = ( "Matched one database entity to multiple ingested entities." "\nDatabase entity: {}" "\nIngested entities: {}") msg = msg_template.format(database_entity, '\n'.join(str(e) for e in ingested_entities)) super(MatchedMultipleIngestedEntitiesError, self).__init__(msg, database_entity.get_entity_name())
def _is_match(*, ingested_entity: Entity, db_entity: Entity) -> bool: """Returns true if the provided |ingested_entity| matches the provided |db_entity|. Otherwise returns False. """ if not ingested_entity or not db_entity: return ingested_entity == db_entity if ingested_entity.__class__ != db_entity.__class__: raise EntityMatchingError( f"is_match received entities of two different classes: " f"ingested entity {ingested_entity.__class__.__name__} and " f"db_entity {db_entity.__class__.__name__}", ingested_entity.get_entity_name()) if isinstance(ingested_entity, StatePerson): db_entity = cast(StatePerson, db_entity) for ingested_external_id in ingested_entity.external_ids: for db_external_id in db_entity.external_ids: if _is_match(ingested_entity=ingested_external_id, db_entity=db_external_id): return True return False if isinstance(ingested_entity, StatePersonExternalId): db_entity = cast(StatePersonExternalId, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.external_id == db_entity.external_id \ and ingested_entity.id_type == db_entity.id_type # As person has already been matched, assume that any of these 'person # attribute' entities are matches if their state_codes align. if isinstance(ingested_entity, StatePersonAlias): db_entity = cast(StatePersonAlias, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.full_name == db_entity.full_name if isinstance(ingested_entity, StatePersonRace): db_entity = cast(StatePersonRace, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.race == db_entity.race if isinstance(ingested_entity, StatePersonEthnicity): db_entity = cast(StatePersonEthnicity, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.ethnicity == db_entity.ethnicity db_entity = cast(ExternalIdEntity, db_entity) ingested_entity = cast(ExternalIdEntity, ingested_entity) # Placeholders entities are considered equal if ingested_entity.external_id is None and db_entity.external_id is None: return is_placeholder(ingested_entity) and is_placeholder(db_entity) return ingested_entity.external_id == db_entity.external_id
def get_set_entity_field_names( entity: Entity, entity_field_type: EntityFieldType) -> Set[str]: """Returns a set of field_names that correspond to any set fields on the provided |entity| that match the provided |entity_field_type|. """ if entity.get_entity_name().startswith('state_'): direction_checker = SchemaEdgeDirectionChecker.state_direction_checker() else: direction_checker = \ SchemaEdgeDirectionChecker.county_direction_checker() back_edges = set() forward_edges = set() flat_fields = set() for field, _ in attr.fields_dict(entity.__class__).items(): v = getattr(entity, field) if v is None: continue # TODO(1908): Update traversal logic if relationship fields can be # different types aside from Entity and List if issubclass(type(v), Entity): is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) elif isinstance(v, list): # Disregard empty lists if not v: continue is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) else: flat_fields.add(field) if entity_field_type is EntityFieldType.FLAT_FIELD: return flat_fields if entity_field_type is EntityFieldType.FORWARD_EDGE: return forward_edges if entity_field_type is EntityFieldType.BACK_EDGE: return back_edges raise EntityMatchingError( f"Unrecognized EntityFieldType {entity_field_type}", 'entity_field_type')
def _is_match_with_relationships(*, db_entity: Entity, ingested_entity: Entity) -> bool: ing_entity_id = generate_id_from_obj(ingested_entity) db_entity_id = db_entity.get_id() matcher = getattr(county_matching_utils, "is_{}_match".format(name)) obj_match = matcher(db_entity=db_entity, ingested_entity=ingested_entity) # The relationships "match" if new relationships have been added # since the last scrape, but not if relationships have been removed. parents_of_db_entity = db_relationship_map[db_entity_id] parents_of_ing_entity = ing_relationship_map[ing_entity_id] relationship_match = parents_of_db_entity.issubset( parents_of_ing_entity) return obj_match and relationship_match
def set_field_from_list(entity: Entity, field_name: str, value: List): """Given the provided |value|, sets the value onto the provided |entity| based on the given |field_name|. """ field = get_field(entity, field_name) if isinstance(field, list): set_field(entity, field_name, value) else: if not value: set_field(entity, field_name, None) elif len(value) == 1: set_field(entity, field_name, value[0]) else: raise EntityMatchingError( f"Attempting to set singular field: {field_name} on entity: " f"{entity.get_entity_name()}, but got multiple values: " f"{value}.", entity.get_entity_name())
def add_child_to_entity(*, entity: Entity, child_field_name: str, child_to_add: Entity): """Adds the |child_to_add| to the |child_field_name| field on the |entity|. """ child_field = get_field(entity, child_field_name) if isinstance(child_field, list): if child_to_add not in child_field: child_field.append(child_to_add) else: if child_field and child_field != child_to_add: raise EntityMatchingError( f"Attempting to add child {child_to_add} to entity {entity}, " f"but {child_field_name} already had different value " f"{child_field}", entity.get_entity_name()) child_field = child_to_add set_field(entity, child_field_name, child_field)
def _add_match_to_matched_entities_cache( *, db_entity_match: Entity, ingested_entity: Entity, matched_entities_by_db_ids: Dict[int, Entity]): """Records a new ingested_entity/db_entity match. If the DB entity has already been matched to a different ingested_entity, it raises an error. """ matched_db_id = db_entity_match.get_id() if matched_db_id in matched_entities_by_db_ids: if ingested_entity != matched_entities_by_db_ids[matched_db_id]: matches = [ ingested_entity, matched_entities_by_db_ids[matched_db_id] ] # It's ok for a DB object to match multiple ingested placeholders. if is_placeholder(matches[0]) and is_placeholder(matches[1]): return raise MatchedMultipleIngestedEntitiesError(db_entity_match, matches) else: matched_entities_by_db_ids[matched_db_id] = ingested_entity
def set_field(entity: Entity, field_name: str, value: Any): if not hasattr(entity, field_name): raise EntityMatchingError( f"Expected entity {entity} to have field {field_name}, but it did " f"not.", entity.get_entity_name()) return setattr(entity, field_name, value)