Beispiel #1
0
    def _copy_entity_fields_to_historical_snapshot(
            self, entity: DatabaseEntity,
            historical_snapshot: DatabaseEntity) -> None:
        """
        Copies all column values present on |entity| to |historical_snapshot|.

        NOTE: This method *only* copies values for properties which are present
        on both the master and historical tables. Any property that is only
        present on one table will be ignored. The only exception is the master
        key column, which is copied over regardless of property name (only based
        on *column* name), following module assumption #2.
        """
        for column_property_name in self._get_shared_column_property_names(
                type(entity), type(historical_snapshot)):
            entity_value = getattr(entity, column_property_name)
            setattr(historical_snapshot, column_property_name, entity_value)

        # See module assumption #2
        key_column_name = entity.get_primary_key_column_name()  # type: ignore
        historical_master_key_property_name = type(
            historical_snapshot).get_property_name_by_column_name(
                key_column_name)
        setattr(
            historical_snapshot,
            historical_master_key_property_name,
            entity.get_primary_key(),
        )  # type: ignore
Beispiel #2
0
def get_external_ids_from_entity(entity: DatabaseEntity):
    external_ids = []
    if isinstance(entity, schema.StatePerson):
        for external_id in entity.external_ids:
            if external_id:
                external_ids.append(external_id.external_id)
    else:
        if entity.get_external_id():
            external_ids.append(entity.get_external_id())
    return external_ids
Beispiel #3
0
def default_merge_flat_fields(
        *, new_entity: DatabaseEntity, old_entity: DatabaseEntity) -> DatabaseEntity:
    """Merges all set non-relationship fields on the |new_entity| onto the |old_entity|. Returns the newly merged
    entity."""
    for child_field_name in get_set_entity_field_names(new_entity, EntityFieldType.FLAT_FIELD):
        if child_field_name == old_entity.get_class_id_name():
            continue
        # Do not overwrite with default status
        if child_field_name == 'status' and new_entity.has_default_status():
            continue

        old_entity.set_field(child_field_name, new_entity.get_field(child_field_name))

    return old_entity
def remove_child_from_entity(*, entity: DatabaseEntity, child_field_name: str,
                             child_to_remove: DatabaseEntity):
    """If present, removes the |child_to_remove| from the |child_field_name|
    field on the |entity|.
    """

    child_field = entity.get_field(child_field_name)

    if isinstance(child_field, list):
        if child_to_remove in child_field:
            child_field.remove(child_to_remove)
    elif isinstance(child_field, DatabaseEntity):
        if child_field == child_to_remove:
            child_field = None
    entity.set_field(child_field_name, child_field)
Beispiel #5
0
def get_external_id_keys_from_multiple_id_entity(
        entity: DatabaseEntity) -> List[str]:
    """Returns a list of strings that uniquely represent all external ids
    on the given entity.
    """
    external_str_ids = []
    for external_id in entity.get_field_as_list('external_ids'):
        str_id = external_id.id_type + '|' + external_id.external_id
        external_str_ids.append(str_id)
    return external_str_ids
def add_child_to_entity(*, entity: DatabaseEntity, child_field_name: str,
                        child_to_add: DatabaseEntity):
    """Adds the |child_to_add| to the |child_field_name| field on the
    |entity|.
    """

    child_field = entity.get_field(child_field_name)

    if isinstance(child_field, list):
        if child_to_add not in child_field:
            child_field.append(child_to_add)
    else:
        if child_field and child_field != child_to_add:
            raise EntityMatchingError(
                f"Attempting to add child {child_to_add} to entity {entity}, "
                f"but {child_field_name} already had different value "
                f"{child_field}", entity.get_entity_name())
        child_field = child_to_add
        entity.set_field(child_field_name, child_field)
Beispiel #7
0
def _get_root_entity_helper(
        entity: DatabaseEntity) -> Optional[Type[DatabaseEntity]]:
    if not is_placeholder(entity):
        return entity.__class__

    for field_name in get_set_entity_field_names(
            entity, EntityFieldType.FORWARD_EDGE):
        field = entity.get_field_as_list(field_name)[0]
        result = _get_root_entity_helper(field)
        if result is not None:
            return result
    return None
def get_or_create_placeholder_child(parent_entity: DatabaseEntity,
                                    child_field_name: str,
                                    child_class: Type[DatabaseEntity],
                                    **child_kwargs):
    """Checks all the entities in the |parent_entity|'s field |child_field_name|. If there is a placeholder entity,
    returns that. Otherwise creates a new placeholder entity of type |child_class| on the parent's |child_field_name|
    using |child_kw_args|.
    """
    children = parent_entity.get_field_as_list(child_field_name)
    placeholder_children = [c for c in children if is_placeholder(c)]

    if placeholder_children:
        return placeholder_children[0]

    logging.info(
        'No placeholder children on entity with id [%s] of type [%s] exist on field [%s]. Have to create one.',
        parent_entity.get_external_id(), parent_entity.get_entity_name(),
        child_field_name)
    new_child = child_class(**child_kwargs)
    if not is_placeholder(new_child):
        raise EntityMatchingError(
            f'Child created with kwargs is not a placeholder [{child_kwargs}]',
            parent_entity.get_entity_name())

    children.append(new_child)
    parent_entity.set_field_from_list(child_field_name, children)
    return new_child
Beispiel #9
0
def _get_all_database_entity_field_names(entity: DatabaseEntity,
                                         entity_field_type: EntityFieldType,
                                         direction_checker):
    """Returns a set of field_names that correspond to any set fields on the
    provided DatabaseEntity |entity| that match the provided
    |entity_field_type|.
    """
    back_edges = set()
    forward_edges = set()
    flat_fields = set()
    foreign_keys = set()

    for relationship_field_name in entity.get_relationship_property_names():
        if direction_checker.is_back_edge(entity, relationship_field_name):
            back_edges.add(relationship_field_name)
        else:
            forward_edges.add(relationship_field_name)

    for foreign_key_name in entity.get_foreign_key_names():
        foreign_keys.add(foreign_key_name)

    for column_field_name in entity.get_column_property_names():
        if column_field_name not in foreign_keys:
            flat_fields.add(column_field_name)

    if entity_field_type is EntityFieldType.FLAT_FIELD:
        return flat_fields
    if entity_field_type is EntityFieldType.FOREIGN_KEYS:
        return foreign_keys
    if entity_field_type is EntityFieldType.FORWARD_EDGE:
        return forward_edges
    if entity_field_type is EntityFieldType.BACK_EDGE:
        return back_edges
    if entity_field_type is EntityFieldType.ALL:
        return flat_fields | foreign_keys | forward_edges | back_edges
    raise ValueError(
        f"Unrecognized EntityFieldType [{entity_field_type}] on entity [{entity}]"
    )
Beispiel #10
0
    def register_entity(self, schema_object: DatabaseEntity) -> None:
        """Creates (_SnapshotContext) for |entity| and adds it to registry

        Raises (ValueError) if |entity| has already been registered
        """
        type_name = type(schema_object).__name__
        if type_name not in self.snapshot_contexts:
            self.snapshot_contexts[type_name] = {}

        entity_id = schema_object.get_primary_key()
        if entity_id in self.snapshot_contexts[type_name]:
            raise ValueError(
                "Entity already registered with type {type} and primary key "
                "{primary_key}".format(type=type_name, primary_key=entity_id))

        self.snapshot_contexts[type_name][entity_id] = _SnapshotContext(
            schema_object=schema_object)
Beispiel #11
0
def get_all_db_objs_from_tree(db_obj: DatabaseEntity,
                              result=None) -> Set[DatabaseEntity]:
    if result is None:
        result = set()

    if db_obj in result:
        return result

    result.add(db_obj)

    set_fields = get_set_entity_field_names(
        db_obj, EntityFieldType.FORWARD_EDGE)
    for field in set_fields:
        child = db_obj.get_field_as_list(field)
        get_all_db_objs_from_trees(child, result)

    return result
Beispiel #12
0
def _base_entity_match(
        a: DatabaseEntity,
        b: DatabaseEntity,
        skip_fields: Set[str],
        allow_null_mismatch: bool = False
) -> bool:
    """Returns whether two objects of the same type are an entity match.

    Args:
        a: The first entity to match
        b: The second entity to match
        skip_fields: A list of names of fields that should be ignored when determining if two objects match based on
            flat fields.
        allow_null_mismatch: Allow for two objects to still match if one has a null value in a field where the other's
            is nonnull.
    """

    # Placeholders never match
    if is_placeholder(a) or is_placeholder(b):
        return False

    # Compare external ids if one is present
    if a.get_external_id() or b.get_external_id():
        return a.get_external_id() == b.get_external_id()

    # Compare all flat fields of the two entities
    all_set_flat_field_names = \
        get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \
        get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD)
    for field_name in all_set_flat_field_names:
        # Skip primary key
        if field_name == a.get_class_id_name() or field_name in skip_fields:
            continue
        a_field = a.get_field(field_name)
        b_field = b.get_field(field_name)

        if allow_null_mismatch and (a_field is None or b_field is None):
            # Do not disqualify a match if one of the fields is null
            continue

        if a_field != b_field:
            return False

    return True
Beispiel #13
0
    def _get_related_entities(self,
                              entity: DatabaseEntity) -> List[DatabaseEntity]:
        """Returns list of all entities related to |entity|"""

        related_entities = []
        for relationship_name in entity.get_relationship_property_names():
            # TODO(#1145): For County schema, fix direction checker to gracefully
            # handle the fact that SentenceRelationship exists in the schema
            # but not in the entity layer.
            if self.get_system_level() == SystemLevel.STATE:
                # Skip back edges
                direction_checker = SchemaEdgeDirectionChecker.state_direction_checker(
                )
                if direction_checker.is_back_edge(entity, relationship_name):
                    continue

            related = getattr(entity, relationship_name)

            # Relationship can return either a list or a single item
            if isinstance(related, list):
                related_entities.extend(related)
            elif related is not None:
                related_entities.append(related)
        return related_entities
Beispiel #14
0
def _base_entity_match(a: DatabaseEntity, b: DatabaseEntity) -> bool:
    # Placeholders never match
    if is_placeholder(a) or is_placeholder(b):
        return False

    # Compare external ids if one is present
    if a.get_external_id() or b.get_external_id():
        return a.get_external_id() == b.get_external_id()

    # Compare all flat fields of the two entities
    all_set_flat_field_names = \
        get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \
        get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD)
    for field_name in all_set_flat_field_names:
        # Skip primary key
        if field_name == a.get_class_id_name():
            continue
        a_field = a.get_field(field_name)
        b_field = b.get_field(field_name)
        if a_field != b_field:
            return False

    return True
Beispiel #15
0
 def snapshot_context(self, entity: DatabaseEntity) -> _SnapshotContext:
     """Returns (_SnapshotContext) for |entity|"""
     context_map = self.snapshot_contexts[type(entity).__name__]
     return context_map[entity.get_primary_key()]
Beispiel #16
0
def convert_to_placeholder(entity: DatabaseEntity):
    for field_name in get_set_entity_field_names(entity, EntityFieldType.FLAT_FIELD):
        if field_name == entity.get_class_id_name():
            continue
        if field_name == 'state_code':
            continue
        if field_name == 'status':
            entity.set_field(field_name, enum_canonical_strings.present_without_info)
            continue
        if field_name == 'incarceration_type':
            entity.set_field(field_name, StateIncarcerationType.STATE_PRISON.value)
            continue
        if field_name == 'court_type':
            entity.set_field(field_name, StateCourtType.PRESENT_WITHOUT_INFO.value)
            continue
        if field_name == 'agent_type':
            entity.set_field(field_name, StateAgentType.PRESENT_WITHOUT_INFO.value)
            continue
        entity.clear_field(field_name)
Beispiel #17
0
def _is_match(*,
              ingested_entity: DatabaseEntity,
              db_entity: DatabaseEntity) -> bool:
    """Returns true if the provided |ingested_entity| matches the provided
    |db_entity|. Otherwise returns False.
    """
    if not ingested_entity or not db_entity:
        return ingested_entity == db_entity

    if ingested_entity.__class__ != db_entity.__class__:
        raise EntityMatchingError(
            f"is_match received entities of two different classes: "
            f"ingested entity {ingested_entity.__class__.__name__} and "
            f"db_entity {db_entity.__class__.__name__}",
            ingested_entity.get_entity_name())

    if not isinstance(ingested_entity, DatabaseEntity):
        raise EntityMatchingError(
            f"Unexpected type for ingested entity[{type(ingested_entity)}]",
            'unknown')
    if not isinstance(db_entity, DatabaseEntity):
        raise EntityMatchingError(
            f"Unexpected type for db entity[{type(db_entity)}]",
            'unknown')

    if isinstance(ingested_entity, schema.StatePerson):
        db_entity = cast(schema.StatePerson, db_entity)
        for ingested_external_id in ingested_entity.external_ids:
            for db_external_id in db_entity.external_ids:
                if _is_match(ingested_entity=ingested_external_id,
                             db_entity=db_external_id):
                    return True
        return False

    # Aside from people, all entities are state specific.
    if ingested_entity.get_field('state_code') \
            != db_entity.get_field('state_code'):
        return False

    # TODO(2671): Update all person attributes below to use complete entity
    # equality instead of just comparing individual fields.
    if isinstance(ingested_entity, schema.StatePersonExternalId):
        db_entity = cast(schema.StatePersonExternalId, db_entity)
        return ingested_entity.external_id == db_entity.external_id \
            and ingested_entity.id_type == db_entity.id_type

    # As person has already been matched, assume that any of these 'person
    # attribute' entities are matches if specific attributes match.
    if isinstance(ingested_entity, schema.StatePersonAlias):
        db_entity = cast(schema.StatePersonAlias, db_entity)
        return ingested_entity.full_name == db_entity.full_name
    if isinstance(ingested_entity, schema.StatePersonRace):
        db_entity = cast(schema.StatePersonRace, db_entity)
        return ingested_entity.race == db_entity.race
    if isinstance(ingested_entity, schema.StatePersonEthnicity):
        db_entity = cast(schema.StatePersonEthnicity, db_entity)
        return ingested_entity.ethnicity == db_entity.ethnicity

    if isinstance(ingested_entity,
                  (schema.StateSupervisionViolationResponseDecisionEntry,
                   schema.StateSupervisionViolatedConditionEntry,
                   schema.StateSupervisionViolationTypeEntry,
                   schema.StateSupervisionCaseTypeEntry)):
        return _base_entity_match(ingested_entity, db_entity)

    # Placeholders entities are considered equal
    if ingested_entity.get_external_id() is None \
            and db_entity.get_external_id() is None:
        return is_placeholder(ingested_entity) and is_placeholder(db_entity)
    return ingested_entity.get_external_id() == db_entity.get_external_id()
Beispiel #18
0
def db_id_or_object_id(entity: DatabaseEntity) -> int:
    """If present, returns the primary key field from the provided |entity|,
    otherwise provides the object id.
    """
    return entity.get_id() if entity.get_id() else id(entity)