コード例 #1
0
def base_entity_match(ingested_entity: EntityTree,
                      db_entity: EntityTree) -> bool:
    """
    Matching logic for comparing entities that might not have external ids, by
    comparing all flat fields in the given entities. Should only be used for
    entities that we know might not have external_ids based on the ingested
    state data.
    """
    a = cast(ExternalIdEntity, ingested_entity.entity)
    b = cast(ExternalIdEntity, db_entity.entity)

    # Placeholders never match
    if is_placeholder(a) or is_placeholder(b):
        return False

    # Compare external ids if one is present
    if a.external_id or b.external_id:
        return a.external_id == b.external_id

    # Compare all flat fields of the two entities
    all_set_flat_field_names = \
        get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \
        get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD)
    for field_name in all_set_flat_field_names:
        # Skip primary key
        if field_name == a.get_class_id_name():
            continue
        a_field = get_field(a, field_name)
        b_field = get_field(b, field_name)
        if a_field != b_field:
            return False

    return True
コード例 #2
0
def _is_subset(entity: Entity, subset: Entity) -> bool:
    """Checks if all fields on the provided |subset| are present in the provided
    |entity|. Returns True if so, otherwise False.
    """
    for field_name in get_set_entity_field_names(subset,
                                                 EntityFieldType.FLAT_FIELD):
        if get_field(entity, field_name) != get_field(subset, field_name):
            return False
    for field_name in get_set_entity_field_names(subset,
                                                 EntityFieldType.FORWARD_EDGE):
        for field in get_field_as_list(subset, field_name):
            if field not in get_field_as_list(entity, field_name):
                return False
    return True
コード例 #3
0
ファイル: entity_utils_test.py プロジェクト: dxy/pulse-data
 def test_getEntityRelationshipFieldNames_flatFields(self):
     entity = StateSentenceGroup.new_with_defaults(
         fines=[StateFine.new_with_defaults()],
         person=[StatePerson.new_with_defaults()], sentence_group_id=_ID)
     self.assertEqual(
         {'sentence_group_id'},
         get_set_entity_field_names(entity, EntityFieldType.FLAT_FIELD))
コード例 #4
0
def _get_all_entity_trees_of_cls_helper(
        tree: EntityTree,
        cls: Type[DatabaseEntity],
        seen_ids: Set[int],
        seen_trees: List[EntityTree],
        direction_checker: SchemaEdgeDirectionChecker):
    """
    Finds all objects in the provided |tree| graph which have the type |cls|.
    When an object of type |cls| is found, updates the provided |seen_ids| and
    |seen_trees| with the object's id and EntityTree respectively.
    """
    entity = tree.entity
    entity_cls = entity.__class__

    # If |cls| is higher ranked than |entity_cls|, it is impossible to reach
    # an object of type |cls| from the current entity.
    if direction_checker.is_higher_ranked(cls, entity_cls):
        return
    if entity_cls == cls and id(entity) not in seen_ids:
        seen_ids.add(id(entity))
        seen_trees.append(tree)
        return
    for child_field_name in get_set_entity_field_names(
            entity, EntityFieldType.FORWARD_EDGE):
        child_trees = tree.generate_child_trees(
            entity.get_field_as_list(child_field_name))
        for child_tree in child_trees:
            _get_all_entity_trees_of_cls_helper(
                child_tree, cls, seen_ids, seen_trees, direction_checker)
コード例 #5
0
def _get_match_results_for_all_children(
        ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree],
        root_entity_cls) \
        -> List[Tuple[str, MatchResults]]:
    """Attempts to match all children of the |ingested_entity_tree| to children
    of the |db_entity_trees|. Matching for each child is independent and can
    match to different DB parents.

    Returns a list of tuples with the following values:
    - str: the string name of the child field
    - MatchResult: the result of matching this child field to children of the
        provided |db_entity_trees|
    """
    results = []
    ingested_entity = ingested_entity_tree.entity
    set_child_fields = get_set_entity_field_names(ingested_entity,
                                                  EntityFieldType.FORWARD_EDGE)

    for child_field_name in set_child_fields:
        ingested_child_field = get_field(ingested_entity, child_field_name)
        db_child_trees = generate_child_entity_trees(child_field_name,
                                                     db_entity_trees)
        if isinstance(ingested_child_field, list):
            ingested_child_list = ingested_child_field
        else:
            ingested_child_list = [ingested_child_field]

        ingested_child_trees = \
            ingested_entity_tree.generate_child_trees(ingested_child_list)
        match_results = _match_entity_trees(
            ingested_entity_trees=ingested_child_trees,
            db_entity_trees=db_child_trees,
            root_entity_cls=root_entity_cls)
        results.append((child_field_name, match_results))
    return results
コード例 #6
0
ファイル: entity_utils_test.py プロジェクト: dxy/pulse-data
 def test_getEntityRelationshipFieldNames_backedges(self):
     entity = StateSentenceGroup.new_with_defaults(
         fines=[StateFine.new_with_defaults()],
         person=[StatePerson.new_with_defaults()], sentence_group_id=_ID)
     self.assertEqual(
         {'person'},
         get_set_entity_field_names(entity, EntityFieldType.BACK_EDGE))
コード例 #7
0
 def test_getEntityRelationshipFieldNames_all(self):
     entity = schema.StateSentenceGroup(fines=[schema.StateFine()],
                                        person=schema.StatePerson(),
                                        person_id=_ID,
                                        sentence_group_id=_ID)
     self.assertEqual({'fines', 'person', 'person_id', 'sentence_group_id'},
                      get_set_entity_field_names(entity,
                                                 EntityFieldType.ALL))
コード例 #8
0
 def test_getEntityRelationshipFieldNames_foreignKeys(self):
     entity = schema.StateSentenceGroup(fines=[schema.StateFine()],
                                        person=schema.StatePerson(),
                                        person_id=_ID,
                                        sentence_group_id=_ID)
     self.assertEqual({'person_id'},
                      get_set_entity_field_names(
                          entity, EntityFieldType.FOREIGN_KEYS))
コード例 #9
0
 def test_getEntityRelationshipFieldNames_backedges(self):
     entity = schema.StateSentenceGroup(fines=[schema.StateFine()],
                                        person=schema.StatePerson(),
                                        person_id=_ID,
                                        sentence_group_id=_ID)
     self.assertEqual({'person'},
                      get_set_entity_field_names(entity,
                                                 EntityFieldType.BACK_EDGE))
コード例 #10
0
 def test_getDbEntityRelationshipFieldNames_children(self):
     entity = schema.StateSentenceGroup(fines=[schema.StateFine()],
                                        person=schema.StatePerson(),
                                        person_id=_ID,
                                        sentence_group_id=_ID)
     self.assertEqual({'fines'},
                      get_set_entity_field_names(
                          entity, EntityFieldType.FORWARD_EDGE))
コード例 #11
0
def merge_incomplete_periods(
    new_entity: schema.StateIncarcerationPeriod,
    old_entity: schema.StateIncarcerationPeriod,
) -> schema.StateIncarcerationPeriod:
    """Merges two incarceration periods with information about
    admission and release into one period. Assumes the status of
    the release event is the most relevant, up-to-date status.

    Args:
        new_entity: The out-of-session period (i.e. new to this ingest run).
        old_entity: The in-session period (i.e. pulled out of the DB), if there
                    is one.
    """

    # Complete match, perform normal merge.
    if new_entity.external_id == old_entity.external_id:
        default_merge_flat_fields(new_entity=new_entity, old_entity=old_entity)
        return old_entity

    # Determine updated external_id
    new_complete = is_incarceration_period_complete(new_entity)
    old_complete = is_incarceration_period_complete(old_entity)
    if new_complete != old_complete:
        updated_external_id = (
            new_entity.external_id if new_complete else old_entity.external_id
        )
    else:
        admission_period, release_period = (
            (new_entity, old_entity)
            if new_entity.admission_date
            else (old_entity, new_entity)
        )
        updated_external_id = (
            admission_period.external_id
            + _INCARCERATION_PERIOD_ID_DELIMITER
            + release_period.external_id
        )

    # Keep the new status if the new period is a release period
    updated_status = new_entity.status if new_entity.release_date else old_entity.status
    updated_status_raw_text = (
        new_entity.status_raw_text
        if new_entity.release_date
        else old_entity.status_raw_text
    )

    # Copy all fields from new onto old
    new_fields = get_set_entity_field_names(new_entity, EntityFieldType.FLAT_FIELD)
    for child_field_name in new_fields:
        old_entity.set_field(child_field_name, new_entity.get_field(child_field_name))

    # Always update the external id and status
    old_entity.external_id = updated_external_id
    old_entity.status = updated_status
    old_entity.status_raw_text = updated_status_raw_text

    return old_entity
コード例 #12
0
def clear_db_ids(db_entities: Sequence[CoreEntity]):
    """Clears primary key fields off of all entities in all of the provided
    |db_entities| graphs.
    """
    for entity in db_entities:
        entity.clear_id()
        for field_name in get_set_entity_field_names(
                entity, EntityFieldType.FORWARD_EDGE):
            clear_db_ids(entity.get_field_as_list(field_name))
コード例 #13
0
def _base_entity_match(
        a: DatabaseEntity,
        b: DatabaseEntity,
        skip_fields: Set[str],
        allow_null_mismatch: bool = False
) -> bool:
    """Returns whether two objects of the same type are an entity match.

    Args:
        a: The first entity to match
        b: The second entity to match
        skip_fields: A list of names of fields that should be ignored when determining if two objects match based on
            flat fields.
        allow_null_mismatch: Allow for two objects to still match if one has a null value in a field where the other's
            is nonnull.
    """

    # Placeholders never match
    if is_placeholder(a) or is_placeholder(b):
        return False

    # Compare external ids if one is present
    if a.get_external_id() or b.get_external_id():
        return a.get_external_id() == b.get_external_id()

    # Compare all flat fields of the two entities
    all_set_flat_field_names = \
        get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \
        get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD)
    for field_name in all_set_flat_field_names:
        # Skip primary key
        if field_name == a.get_class_id_name() or field_name in skip_fields:
            continue
        a_field = a.get_field(field_name)
        b_field = b.get_field(field_name)

        if allow_null_mismatch and (a_field is None or b_field is None):
            # Do not disqualify a match if one of the fields is null
            continue

        if a_field != b_field:
            return False

    return True
コード例 #14
0
def _get_root_entity_helper(entity: Entity) -> Optional[Type]:
    if not is_placeholder(entity):
        return entity.__class__

    for field_name in get_set_entity_field_names(entity,
                                                 EntityFieldType.FORWARD_EDGE):
        field = get_field_as_list(entity, field_name)[0]
        result = _get_root_entity_helper(field)
        if result is not None:
            return result
    return None
コード例 #15
0
 def test_getEntityRelationshipFieldNames_backedges(self) -> None:
     entity = schema.StateSentenceGroup(
         state_code="US_XX",
         fines=[schema.StateFine()],
         person=schema.StatePerson(),
         person_id=_ID,
         sentence_group_id=_ID,
     )
     self.assertEqual(
         {"person"}, get_set_entity_field_names(entity, EntityFieldType.BACK_EDGE)
     )
コード例 #16
0
 def test_getDbEntityRelationshipFieldNames_children(self) -> None:
     entity = schema.StateSentenceGroup(
         state_code="US_XX",
         fines=[schema.StateFine()],
         person=schema.StatePerson(),
         person_id=_ID,
         sentence_group_id=_ID,
     )
     self.assertEqual(
         {"fines"}, get_set_entity_field_names(entity, EntityFieldType.FORWARD_EDGE)
     )
コード例 #17
0
def _get_all_entities_of_cls_helper(entity: Entity, cls: Type,
                                    seen_ids: Set[int],
                                    seen_entities: List[Entity]):
    if isinstance(entity, cls) and id(entity) not in seen_ids:
        seen_ids.add(id(entity))
        seen_entities.append(entity)
        return
    for child_field_name in get_set_entity_field_names(
            entity, EntityFieldType.FORWARD_EDGE):
        for child_field in get_field_as_list(entity, child_field_name):
            _get_all_entities_of_cls_helper(child_field, cls, seen_ids,
                                            seen_entities)
コード例 #18
0
 def test_getEntityRelationshipFieldNames_all(self) -> None:
     entity = schema.StateSentenceGroup(
         state_code="US_XX",
         fines=[schema.StateFine()],
         person=schema.StatePerson(),
         person_id=_ID,
         sentence_group_id=_ID,
     )
     self.assertEqual(
         {"state_code", "fines", "person", "person_id", "sentence_group_id"},
         get_set_entity_field_names(entity, EntityFieldType.ALL),
     )
コード例 #19
0
 def test_getEntityRelationshipFieldNames_foreignKeys(self) -> None:
     entity = schema.StateSentenceGroup(
         state_code="US_XX",
         fines=[schema.StateFine()],
         person=schema.StatePerson(),
         person_id=_ID,
         sentence_group_id=_ID,
     )
     self.assertEqual(
         {"person_id"},
         get_set_entity_field_names(entity, EntityFieldType.FOREIGN_KEYS),
     )
コード例 #20
0
def _get_all_entities_of_type_helper(root: Entity, cls: Type, seen: Set[int],
                                     entities_of_type: List[Entity]):
    if isinstance(root, cls):
        if id(root) not in seen:
            root = cast(Entity, root)
            entities_of_type.append(root)
        return

    for field_name in get_set_entity_field_names(root,
                                                 EntityFieldType.FORWARD_EDGE):
        for field in get_field_as_list(root, field_name):
            _get_all_entities_of_type_helper(field, cls, seen,
                                             entities_of_type)
コード例 #21
0
def default_merge_flat_fields(
        *, new_entity: DatabaseEntity, old_entity: DatabaseEntity) -> DatabaseEntity:
    """Merges all set non-relationship fields on the |new_entity| onto the |old_entity|. Returns the newly merged
    entity."""
    for child_field_name in get_set_entity_field_names(new_entity, EntityFieldType.FLAT_FIELD):
        if child_field_name == old_entity.get_class_id_name():
            continue
        # Do not overwrite with default status
        if child_field_name == 'status' and new_entity.has_default_status():
            continue

        old_entity.set_field(child_field_name, new_entity.get_field(child_field_name))

    return old_entity
コード例 #22
0
def _base_entity_match(a: DatabaseEntity, b: DatabaseEntity) -> bool:
    # Placeholders never match
    if is_placeholder(a) or is_placeholder(b):
        return False

    # Compare external ids if one is present
    if a.get_external_id() or b.get_external_id():
        return a.get_external_id() == b.get_external_id()

    # Compare all flat fields of the two entities
    all_set_flat_field_names = \
        get_set_entity_field_names(a, EntityFieldType.FLAT_FIELD) | \
        get_set_entity_field_names(b, EntityFieldType.FLAT_FIELD)
    for field_name in all_set_flat_field_names:
        # Skip primary key
        if field_name == a.get_class_id_name():
            continue
        a_field = a.get_field(field_name)
        b_field = b.get_field(field_name)
        if a_field != b_field:
            return False

    return True
コード例 #23
0
def _populate_multiparent_map(entity: Entity, entity_cls: Type,
                              multiparent_map: Dict[str,
                                                    List[_EntityWithParents]]):
    """Looks through all children in the provided |entity|, and if they are of
    type |entity_cls|, adds an entry to the provided |multiparent_map|.
    """
    for child_field_name in get_set_entity_field_names(
            entity, EntityFieldType.FORWARD_EDGE):
        linked_parent = _LinkedParents(entity, child_field_name)
        for child in get_field_as_list(entity, child_field_name):
            _populate_multiparent_map(child, entity_cls, multiparent_map)

            if not isinstance(child, entity_cls):
                continue

            # All persistence entities are ExternalIdEntities
            child = cast(ExternalIdEntity, child)
            external_id = child.external_id

            # We're only matching entities if they have the same
            # external_id.
            if not external_id:
                continue

            if external_id in multiparent_map.keys():
                entities_with_parents = multiparent_map[external_id]
                found_entity = False

                # If the child object itself has already been seen, simply add
                # the |entity| parent to the list of linked parents
                for entity_with_parents in entities_with_parents:
                    if id(entity_with_parents.entity) == id(child):
                        found_entity = True
                        entity_with_parents.linked_parents.append(
                            linked_parent)

                # If the child object has not been seen, create a new
                # _EntityWithParents object for this external_id
                if not found_entity:
                    entity_with_parents = \
                        _EntityWithParents(child, [linked_parent])
                    entities_with_parents.append(entity_with_parents)

            # If the external_id has never been seen before, create a new
            # entry for it.
            else:
                entity_with_parents = _EntityWithParents(
                    child, [linked_parent])
                multiparent_map[external_id] = [entity_with_parents]
コード例 #24
0
 def test_getEntityRelationshipFieldNames_children(self) -> None:
     entity = StateSentenceGroup.new_with_defaults(
         state_code="US_XX",
         status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
         fines=[
             StateFine.new_with_defaults(
                 state_code="US_XX", status=StateFineStatus.PRESENT_WITHOUT_INFO
             )
         ],
         person=[StatePerson.new_with_defaults(state_code="US_XX")],
         sentence_group_id=_ID,
     )
     self.assertEqual(
         {"fines"}, get_set_entity_field_names(entity, EntityFieldType.FORWARD_EDGE)
     )
コード例 #25
0
def _default_merge_flat_fields(*, new_entity: Entity, old_entity: Entity) \
        -> Entity:
    """Merges all set non-relationship fields on the |new_entity| onto the
    |old_entity|. Returns the newly merged entity.
    """
    for child_field_name in get_set_entity_field_names(
            new_entity, EntityFieldType.FLAT_FIELD):
        # Do not overwrite with default status
        if child_field_name == 'status' and has_default_status(new_entity):
            continue

        set_field(old_entity, child_field_name,
                  get_field(new_entity, child_field_name))

    return old_entity
コード例 #26
0
def convert_to_placeholder(entity: DatabaseEntity):
    for field_name in get_set_entity_field_names(entity, EntityFieldType.FLAT_FIELD):
        if field_name == entity.get_class_id_name():
            continue
        if field_name == 'state_code':
            continue
        if field_name == 'status':
            entity.set_field(field_name, enum_canonical_strings.present_without_info)
            continue
        if field_name == 'incarceration_type':
            entity.set_field(field_name, StateIncarcerationType.STATE_PRISON.value)
            continue
        if field_name == 'court_type':
            entity.set_field(field_name, StateCourtType.PRESENT_WITHOUT_INFO.value)
            continue
        if field_name == 'agent_type':
            entity.set_field(field_name, StateAgentType.PRESENT_WITHOUT_INFO.value)
            continue
        entity.clear_field(field_name)
コード例 #27
0
def _match_placeholder_tree(
        *, ingested_placeholder_tree: EntityTree,
        db_entity_trees: List[EntityTree],
        matched_entities_by_db_ids: Dict[int, Entity],
        root_entity_cls) \
        -> IndividualMatchResult:
    """Attempts to match the provided |ingested_placeholder_tree| to entities in
    the provided |db_entity_trees| based off any child matches. When such a
    match is found, the child is moved off of the ingested entity and onto the
    matched db entity.

    Returns the results of matching as an IndividualMatchResult.
    """
    updated_entity_trees: List[EntityTree] = []
    error_count = 0
    match_results_by_child = _get_match_results_for_all_children(
        ingested_entity_tree=ingested_placeholder_tree,
        db_entity_trees=db_entity_trees,
        root_entity_cls=root_entity_cls)

    # Initialize so pylint doesn't yell.
    child_field_name = None
    child_match_result = None
    placeholder_children: List[Entity] = []

    def resolve_child_match_result():
        """Resolves any child matches by removing the child from the ingested
        placeholder entity and adding the child onto the corresponding DB
        entity.
        """

        if not child_field_name or not child_match_result:
            raise EntityMatchingError(
                f"Expected child_field_name and child_match_result to be set, "
                f"but instead got {child_field_name} and {child_match_result} "
                f"respectively.",
                ingested_placeholder_tree.entity.get_entity_name())

        # If the child wasn't matched, leave it on the placeholder object.
        if not child_match_result.merged_entity_trees:
            placeholder_children.append(
                child_match_result.ingested_entity_tree.entity)
            return

        # Ensure the merged children are on the correct entity
        for merged_child_tree in child_match_result.merged_entity_trees:
            merged_parent_tree = merged_child_tree.generate_parent_tree()

            # If one of the merged parents is the ingested placeholder entity,
            # simply keep track of the child in placeholder_children.
            if merged_parent_tree.entity == ingested_placeholder_tree.entity:
                placeholder_children.append(
                    child_match_result.ingested_entity_tree.entity)
                continue

            add_child_to_entity(entity=merged_parent_tree.entity,
                                child_field_name=child_field_name,
                                child_to_add=merged_child_tree.entity)

            # Keep track of all db parents of the merged children.
            updated_entities = [m.entity for m in updated_entity_trees]
            if merged_parent_tree.entity not in updated_entities:
                _add_match_to_matched_entities_cache(
                    db_entity_match=merged_parent_tree.entity,
                    ingested_entity=ingested_placeholder_tree.entity,
                    matched_entities_by_db_ids=matched_entities_by_db_ids)
                updated_entity_trees.append(merged_parent_tree)

    for child_field_name, match_results in match_results_by_child:
        placeholder_children = []
        error_count += match_results.error_count
        for child_match_result in match_results.individual_match_results:
            resolve_child_match_result()
        set_field_from_list(ingested_placeholder_tree.entity, child_field_name,
                            placeholder_children)

    # If we updated any of the entity trees, check to see if the placeholder
    # tree still has any children. If it doesn't have any children, it doesn't
    # need to be committed into our DB.
    if updated_entity_trees:
        set_child_fields = get_set_entity_field_names(
            ingested_placeholder_tree.entity,
            entity_field_type=EntityFieldType.FORWARD_EDGE)
        if set_child_fields:
            updated_entity_trees.append(ingested_placeholder_tree)

    return IndividualMatchResult(
        ingested_entity_tree=ingested_placeholder_tree,
        merged_entity_trees=updated_entity_trees,
        error_count=error_count)