コード例 #1
0
def get_all_entities_from_tree(
        entity: Entity,
        result: Optional[List[Entity]] = None,
        seen_ids: Optional[Set[int]] = None) -> List[Entity]:
    """Returns a list of all entities in the tree below the entity,
    including the entity itself. Entities are deduplicated by Python object id.
    """

    if result is None:
        result = []
    if seen_ids is None:
        seen_ids = set()

    if id(entity) in seen_ids:
        return result

    result.append(entity)
    seen_ids.add(id(entity))

    fields = get_all_core_entity_field_names(entity,
                                             EntityFieldType.FORWARD_EDGE)

    for field in fields:
        child = entity.get_field(field)

        if child is None:
            continue

        if isinstance(child, list):
            for c in child:
                get_all_entities_from_tree(c, result, seen_ids)
        else:
            get_all_entities_from_tree(child, result, seen_ids)

    return result
コード例 #2
0
 def __init__(self, database_entity: Entity,
              ingested_entities: Sequence[Entity]):
     msg_template = (
         "Matched one database entity to multiple ingested entities."
         "\nDatabase entity: {}"
         "\nIngested entities: {}")
     msg = msg_template.format(database_entity,
                               '\n'.join(str(e) for e in ingested_entities))
     super(MatchedMultipleIngestedEntitiesError,
           self).__init__(msg, database_entity.get_entity_name())
コード例 #3
0
def _is_match(*, ingested_entity: Entity, db_entity: Entity) -> bool:
    """Returns true if the provided |ingested_entity| matches the provided
    |db_entity|. Otherwise returns False.
    """
    if not ingested_entity or not db_entity:
        return ingested_entity == db_entity

    if ingested_entity.__class__ != db_entity.__class__:
        raise EntityMatchingError(
            f"is_match received entities of two different classes: "
            f"ingested entity {ingested_entity.__class__.__name__} and "
            f"db_entity {db_entity.__class__.__name__}",
            ingested_entity.get_entity_name())

    if isinstance(ingested_entity, StatePerson):
        db_entity = cast(StatePerson, db_entity)
        for ingested_external_id in ingested_entity.external_ids:
            for db_external_id in db_entity.external_ids:
                if _is_match(ingested_entity=ingested_external_id,
                             db_entity=db_external_id):
                    return True
        return False

    if isinstance(ingested_entity, StatePersonExternalId):
        db_entity = cast(StatePersonExternalId, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.external_id == db_entity.external_id \
               and ingested_entity.id_type == db_entity.id_type

    # As person has already been matched, assume that any of these 'person
    # attribute' entities are matches if their state_codes align.
    if isinstance(ingested_entity, StatePersonAlias):
        db_entity = cast(StatePersonAlias, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.full_name == db_entity.full_name
    if isinstance(ingested_entity, StatePersonRace):
        db_entity = cast(StatePersonRace, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.race == db_entity.race
    if isinstance(ingested_entity, StatePersonEthnicity):
        db_entity = cast(StatePersonEthnicity, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.ethnicity == db_entity.ethnicity

    db_entity = cast(ExternalIdEntity, db_entity)
    ingested_entity = cast(ExternalIdEntity, ingested_entity)

    # Placeholders entities are considered equal
    if ingested_entity.external_id is None and db_entity.external_id is None:
        return is_placeholder(ingested_entity) and is_placeholder(db_entity)
    return ingested_entity.external_id == db_entity.external_id
コード例 #4
0
ファイル: entity_utils.py プロジェクト: dxy/pulse-data
def get_set_entity_field_names(
        entity: Entity,
        entity_field_type: EntityFieldType) -> Set[str]:
    """Returns a set of field_names that correspond to any set fields on the
    provided |entity| that match the provided |entity_field_type|.
    """
    if entity.get_entity_name().startswith('state_'):
        direction_checker = SchemaEdgeDirectionChecker.state_direction_checker()
    else:
        direction_checker = \
            SchemaEdgeDirectionChecker.county_direction_checker()

    back_edges = set()
    forward_edges = set()
    flat_fields = set()
    for field, _ in attr.fields_dict(entity.__class__).items():
        v = getattr(entity, field)

        if v is None:
            continue

        # TODO(1908): Update traversal logic if relationship fields can be
        # different types aside from Entity and List
        if issubclass(type(v), Entity):
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        elif isinstance(v, list):
            # Disregard empty lists
            if not v:
                continue
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        else:
            flat_fields.add(field)

    if entity_field_type is EntityFieldType.FLAT_FIELD:
        return flat_fields
    if entity_field_type is EntityFieldType.FORWARD_EDGE:
        return forward_edges
    if entity_field_type is EntityFieldType.BACK_EDGE:
        return back_edges
    raise EntityMatchingError(
        f"Unrecognized EntityFieldType {entity_field_type}",
        'entity_field_type')
コード例 #5
0
    def _is_match_with_relationships(*, db_entity: Entity,
                                     ingested_entity: Entity) -> bool:
        ing_entity_id = generate_id_from_obj(ingested_entity)
        db_entity_id = db_entity.get_id()
        matcher = getattr(county_matching_utils, "is_{}_match".format(name))
        obj_match = matcher(db_entity=db_entity,
                            ingested_entity=ingested_entity)
        # The relationships "match" if new relationships have been added
        # since the last scrape, but not if relationships have been removed.
        parents_of_db_entity = db_relationship_map[db_entity_id]
        parents_of_ing_entity = ing_relationship_map[ing_entity_id]
        relationship_match = parents_of_db_entity.issubset(
            parents_of_ing_entity)

        return obj_match and relationship_match
コード例 #6
0
ファイル: entity_utils.py プロジェクト: dxy/pulse-data
def set_field_from_list(entity: Entity, field_name: str, value: List):
    """Given the provided |value|, sets the value onto the provided |entity|
    based on the given |field_name|.
    """
    field = get_field(entity, field_name)
    if isinstance(field, list):
        set_field(entity, field_name, value)
    else:
        if not value:
            set_field(entity, field_name, None)
        elif len(value) == 1:
            set_field(entity, field_name, value[0])
        else:
            raise EntityMatchingError(
                f"Attempting to set singular field: {field_name} on entity: "
                f"{entity.get_entity_name()}, but got multiple values: "
                f"{value}.", entity.get_entity_name())
コード例 #7
0
def add_child_to_entity(*, entity: Entity, child_field_name: str,
                        child_to_add: Entity):
    """Adds the |child_to_add| to the |child_field_name| field on the
    |entity|.
    """
    child_field = get_field(entity, child_field_name)

    if isinstance(child_field, list):
        if child_to_add not in child_field:
            child_field.append(child_to_add)
    else:
        if child_field and child_field != child_to_add:
            raise EntityMatchingError(
                f"Attempting to add child {child_to_add} to entity {entity}, "
                f"but {child_field_name} already had different value "
                f"{child_field}", entity.get_entity_name())
        child_field = child_to_add
    set_field(entity, child_field_name, child_field)
コード例 #8
0
def _add_match_to_matched_entities_cache(
        *, db_entity_match: Entity, ingested_entity: Entity,
        matched_entities_by_db_ids: Dict[int, Entity]):
    """Records a new ingested_entity/db_entity match. If the DB entity has
    already been matched to a different ingested_entity, it raises an error.
    """
    matched_db_id = db_entity_match.get_id()

    if matched_db_id in matched_entities_by_db_ids:
        if ingested_entity != matched_entities_by_db_ids[matched_db_id]:
            matches = [
                ingested_entity, matched_entities_by_db_ids[matched_db_id]
            ]
            # It's ok for a DB object to match multiple ingested placeholders.
            if is_placeholder(matches[0]) and is_placeholder(matches[1]):
                return
            raise MatchedMultipleIngestedEntitiesError(db_entity_match,
                                                       matches)
    else:
        matched_entities_by_db_ids[matched_db_id] = ingested_entity
コード例 #9
0
ファイル: entity_utils.py プロジェクト: dxy/pulse-data
def set_field(entity: Entity, field_name: str, value: Any):
    if not hasattr(entity, field_name):
        raise EntityMatchingError(
            f"Expected entity {entity} to have field {field_name}, but it did "
            f"not.", entity.get_entity_name())
    return setattr(entity, field_name, value)