コード例 #1
0
def _merge_matches_across_cycles(matching_views, org_id, given_state_id,
                                 StateClass):
    """
    This is a helper method for match_merge_link().

    Given a QS of matching -Views, group them by Cycle. Merge the corresponding
    -States of each group with priority given based on most recent AuditLog.

    If the given -View/-State has matches in its own Cycle, AuditLogs are still
    used to determine merge order, but overarching precedence is given to the
    provided -View's -State.

    The count of merges as well as the target -State ID is returned. The target
    -State ID is either the given -State ID or the merged -State ID of merges
    involving the given -State ID.
    """
    # Group matching -Views by Cycle and capture state_ids to be merged
    # For the purpose of merging, we only care if match_count is greater than 1.
    states_to_merge = matching_views.values('cycle_id').\
        annotate(state_ids=ArrayAgg('state_id'), match_count=Count('id')).\
        filter(match_count__gt=1).\
        values_list('state_ids', flat=True)

    target_state_id = given_state_id
    count = 0

    for state_ids in states_to_merge:
        ordered_ids = list(
            StateClass.objects.filter(
                id__in=state_ids).order_by('updated').values_list('id',
                                                                  flat=True))

        if given_state_id in ordered_ids:
            # If the given -State ID is included, give it precedence and
            # capture resulting merged_state ID to be returned
            ordered_ids.remove(given_state_id)
            ordered_ids.append(given_state_id)
            merged_state = merge_states_with_views(ordered_ids, org_id,
                                                   'System Match', StateClass)
            target_state_id = merged_state.id
        else:
            merge_states_with_views(ordered_ids, org_id, 'System Match',
                                    StateClass)

        count += len(ordered_ids)

    return count, target_state_id
コード例 #2
0
def whole_org_match_merge(org_id):
    """
    Scope: all PropertyViews and TaxLotViews for an Org.
    Algorithm:
        - Start with PropertyViews then repeat for TaxLotViews
            - For each Cycle,
            - Looking at the corresponding -States attached to these -Views,...
            - Disregard/ignore any -States where all matching criteria is None (likely a subquery or extra exclude).
            - Group together IDs of -States that match each other.
            - For each group of size larger than 1, run manual merging logic so
            that there's only one record left but make the -AuditLog a "System Match".
    """
    summary = {
        'PropertyState': {
            'merged_count': 0,
            'new_merged_state_ids': []
        },
        'TaxLotState': {
            'merged_count': 0,
            'new_merged_state_ids': []
        },
    }

    for StateClass in (PropertyState, TaxLotState):
        ViewClass = PropertyView if StateClass == PropertyState else TaxLotView

        column_names = matching_criteria_column_names(org_id, StateClass.__name__)
        cycle_ids = Cycle.objects.filter(organization_id=org_id).values_list('id', flat=True)
        for cycle_id in cycle_ids:
            existing_cycle_views = ViewClass.objects.filter(cycle_id=cycle_id)
            matched_id_groups = StateClass.objects.\
                filter(id__in=Subquery(existing_cycle_views.values('state_id'))).\
                exclude(**empty_criteria_filter(org_id, StateClass)).\
                values(*column_names).\
                annotate(matched_ids=ArrayAgg('id'), matched_count=Count('id')).\
                values_list('matched_ids', flat=True).\
                filter(matched_count__gt=1)

            for state_ids in matched_id_groups:
                state_ids.sort()  # Ensures priority given to most recently uploaded record
                merged_state = merge_states_with_views(state_ids, org_id, 'System Match', StateClass)

                summary[StateClass.__name__]['merged_count'] += len(state_ids)
                summary[StateClass.__name__]['new_merged_state_ids'].append(merged_state.id)

    return summary
コード例 #3
0
def match_merge_in_cycle(view_id, StateClassName):
    """
    Given a -View ID, this method matches and merges for the related -State.
    Match-eligible -States are scoped to those associated with -Views within
    the same Cycle.

    If the -State associated with the -View doesn't have any matching criteria
    values populated, the -State is not eligible for a match merge.
    """
    if StateClassName == 'PropertyState':
        StateClass = PropertyState
        ViewClass = PropertyView
    elif StateClassName == 'TaxLotState':
        StateClass = TaxLotState
        ViewClass = TaxLotView

    view = ViewClass.objects.get(pk=view_id)
    org_id = view.state.organization_id

    # Check if associated -State has empty matching criteria.
    if StateClass.objects.filter(pk=view.state_id, **empty_criteria_filter(org_id, StateClass)).exists():
        return 0, None

    matching_criteria = matching_filter_criteria(org_id, StateClassName, view.state)
    views_in_cycle = ViewClass.objects.filter(cycle_id=view.cycle_id)
    state_matches = StateClass.objects.filter(
        pk__in=Subquery(views_in_cycle.values('state_id')),
        **matching_criteria
    ).exclude(pk=view.state_id)

    state_ids = list(
        state_matches.order_by('updated').values_list('id', flat=True)
    )
    state_ids.append(view.state_id)  # Excluded above and appended to give merge precedence
    count = len(state_ids)

    if count > 1:
        # The following merge action ignores merge protection and prioritizes -States by most recent AuditLog
        merged_state = merge_states_with_views(state_ids, org_id, 'System Match', StateClass)
        view_id = ViewClass.objects.get(state_id=merged_state.id).id
        return count, view_id
    elif count == 1:
        return 0, None
コード例 #4
0
def states_to_views(unmatched_state_ids, org, cycle, StateClass):
    """
    The purpose of this method is to take incoming -States and, apply them to a
    -View. In the process of doing so, -States could be flagged for "deletion"
    (and not applied to a -View), merged with existing -States, or found to be
    brand new. Regardless, the goal is to ultimately associate -States to -Views.

    For incoming -States needing to be matched to an existing -State, merge
    them and take the existing -State's -View to be the -View for the new merged
    state.

    For directly promote-able -States, a new -View and canonical object
    (Property or TaxLot) are created for it.

    :param unmatched_states: list
    :param org: Organization object
    :param cycle: Cycle object
    :param StateClass: PropertyState or TaxLotState
    :return: processed_views, duplicate_count, new + matched counts
    """
    table_name = StateClass.__name__

    if table_name == 'PropertyState':
        ViewClass = PropertyView
    elif table_name == 'TaxLotState':
        ViewClass = TaxLotView

    # Identify existing used -States
    existing_cycle_views = ViewClass.objects.filter(cycle_id=cycle)
    existing_states = StateClass.objects.filter(
        pk__in=Subquery(existing_cycle_views.values('state_id')))

    # Apply DATA_STATE_DELETE to incoming duplicate -States of existing -States in Cycle
    duplicate_states = StateClass.objects.filter(
        pk__in=unmatched_state_ids,
        hash_object__in=Subquery(existing_states.values('hash_object')))
    duplicate_count = duplicate_states.update(data_state=DATA_STATE_DELETE)

    column_names = matching_criteria_column_names(org.id, table_name)

    # For the remaining incoming -States (filtering those duplicates), identify
    # -States with all matching criteria being None. These aren't eligible for matching.
    empty_matching_criteria = empty_criteria_filter(StateClass, column_names)
    promote_states = StateClass.objects.filter(
        pk__in=unmatched_state_ids, **empty_matching_criteria).exclude(
            pk__in=Subquery(duplicate_states.values('id')))

    # Identify and filter out -States that have been "handled".
    handled_states = promote_states | duplicate_states
    unmatched_states = StateClass.objects.filter(
        pk__in=unmatched_state_ids).exclude(
            pk__in=Subquery(handled_states.values('id')))

    # For the remaining -States, search for a match within the -States that are attached to -Views.
    # If one match is found, pass that along.
    # If multiple matches are found, merge them together, pass along the resulting record.
    # Otherwise, add current -State to be promoted as is.
    merged_between_existing_count = 0
    merge_state_pairs = []
    for state in unmatched_states:
        matching_criteria = matching_filter_criteria(state, column_names)
        existing_state_matches = StateClass.objects.filter(pk__in=Subquery(
            existing_cycle_views.values('state_id')),
                                                           **matching_criteria)
        count = existing_state_matches.count()

        if count > 1:
            merged_between_existing_count += count
            existing_state_ids = list(
                existing_state_matches.order_by('updated').values_list(
                    'id', flat=True))
            # The following merge action ignores merge protection and prioritizes -States by most recent AuditLog
            merged_state = merge_states_with_views(existing_state_ids, org.id,
                                                   'System Match', StateClass)
            merge_state_pairs.append((merged_state, state))
        elif count == 1:
            merge_state_pairs.append((existing_state_matches.first(), state))
        else:
            promote_states = promote_states | StateClass.objects.filter(
                pk=state.id)

    # Process -States into -Views either directly (promoted_ids) or post-merge (merge_state_pairs).
    _log.debug("There are %s merge_state_pairs and %s promote_states" %
               (len(merge_state_pairs), promote_states.count()))
    priorities = Column.retrieve_priorities(org.pk)
    processed_views = []
    promoted_ids = []
    merged_state_ids = []
    try:
        with transaction.atomic():
            for state_pair in merge_state_pairs:
                existing_state, newer_state = state_pair
                existing_view = ViewClass.objects.get(
                    state_id=existing_state.id)

                # Merge -States and assign new/merged -State to existing -View
                merged_state = save_state_match(existing_state, newer_state,
                                                priorities)
                existing_view.state = merged_state
                existing_view.save()

                processed_views.append(existing_view)
                merged_state_ids.append(merged_state.id)

            for state in promote_states:
                promoted_ids.append(state.id)
                created_view = state.promote(cycle)
                processed_views.append(created_view)
    except IntegrityError as e:
        raise IntegrityError("Could not merge results with error: %s" % (e))

    new_count = len(promoted_ids)
    # update merge_state while excluding any states that were a product of a previous, file-inclusive merge
    StateClass.objects.filter(pk__in=promoted_ids).exclude(
        merge_state=MERGE_STATE_MERGED).update(merge_state=MERGE_STATE_NEW)
    matched_count = StateClass.objects.filter(pk__in=merged_state_ids).update(
        data_state=DATA_STATE_MATCHING, merge_state=MERGE_STATE_MERGED)

    return list(
        set(processed_views)
    ), duplicate_count, new_count, matched_count, merged_between_existing_count
コード例 #5
0
def whole_org_match_merge_link(org_id, state_class_name, proposed_columns=[]):
    """
    For a given organization, run a match merge round for each cycle in
    isolation. Afterwards, run a match link round across all cycles at once.

    In this context, a Property/TaxLot Set refers to the -State, canonical
    record, and -View records associated by the -View.

    Algorithm - Run for either Property Sets or for TaxLot Sets:
        For each Cycle, run match and merges.
            - Focus on -States associated with -Views in this Cycle.
            - Ignore -States where all matching criteria is None.
            - Group -State IDs by whether they match each other.
            - Ignore each groups of size size 1 (not matched).
            - For each remaining group, run merge logic so that there's only one
            Set left. Any labels, notes, pairings, and meters are transferred to
            and persisted in this Set.

        Across all Cycles, run match and links.
            - Focus on all -States and canonical records associated to -Views
            in this organization.
            - Identify canonical records that currently have no links. These are
            unaffected during this process if the record remains unlinked. Also,
            these are canonical records that can potentially be reused.
            - Scope the next steps to ignore -Views with -States where all
            matching criteria is None.
            - Create link groups of canonical IDs and -View IDs according to
            whether their associated -States match each other.
            - Ignore groups of size 1 where the single member was previously
            unlinked as well.
            - For each remaining group, apply a new canonical record to
            each of -Views in this group. Any meters are transferred to this
            new canonical record.
            - For any records that had empty (all None) matching criteria
            values, disassociate any previous links by applying a new canonical
            record to each.
            - Delete any unused canonical records.
    """
    summary = {
        'PropertyState': {
            'merged_count': 0,
            'linked_sets_count': 0,
        },
        'TaxLotState': {
            'merged_count': 0,
            'linked_sets_count': 0,
        },
    }

    cycle_ids = Cycle.objects.filter(organization_id=org_id).values_list(
        'id', flat=True)

    if state_class_name == 'PropertyState':
        StateClass = PropertyState
        ViewClass = PropertyView
        CanonicalClass = Property
    elif state_class_name == 'TaxLotState':
        StateClass = TaxLotState
        ViewClass = TaxLotView
        CanonicalClass = TaxLot

    if proposed_columns:
        # Use column names as given (replacing address_line_1 with normalized_address)
        column_names = [
            column_name
            if column_name != 'address_line_1' else 'normalized_address'
            for column_name in proposed_columns
        ]
        preview_run = True
    else:
        column_names = matching_criteria_column_names(org_id, state_class_name)
        preview_run = False

    empty_matching_criteria = empty_criteria_filter(StateClass, column_names)

    with transaction.atomic():
        # Match merge within each Cycle
        for cycle_id in cycle_ids:
            view_in_cycle = ViewClass.objects.filter(cycle_id=cycle_id)

            matched_id_groups = StateClass.objects.\
                filter(id__in=Subquery(view_in_cycle.values('state_id'))).\
                exclude(**empty_matching_criteria).\
                values(*column_names).\
                annotate(matched_ids=ArrayAgg('id'), matched_count=Count('id')).\
                values_list('matched_ids', flat=True).\
                filter(matched_count__gt=1)

            for state_ids in matched_id_groups:
                ordered_ids = list(
                    StateClass.objects.filter(
                        id__in=state_ids).order_by('updated').values_list(
                            'id', flat=True))

                merge_states_with_views(ordered_ids, org_id, 'System Match',
                                        StateClass)

                summary[StateClass.__name__]['merged_count'] += len(state_ids)

        # Match link across the whole Organization
        # Append 'state__' to dict keys used for filtering so that filtering can be done across associations
        state_appended_col_names = {
            'state__' + col_name
            for col_name in column_names
        }
        state_appended_empty_matching_criteria = {
            'state__' + col_name: v
            for col_name, v in empty_matching_criteria.items()
        }

        canonical_id_col = 'property_id' if StateClass == PropertyState else 'taxlot_id'

        # Looking at all -Views in Org across Cycles
        org_views = ViewClass.objects.\
            filter(cycle_id__in=cycle_ids).\
            select_related('state')

        # Identify all canonical_ids that are currently used once and are potentially reusable
        reusable_canonical_ids = org_views.\
            values(canonical_id_col).\
            annotate(use_count=Count(canonical_id_col)).\
            values_list(canonical_id_col, flat=True).\
            filter(use_count=1)

        # Ignoring -Views associated to -States with empty matching critieria, group by columns
        link_groups = org_views.\
            exclude(**state_appended_empty_matching_criteria).\
            values(*state_appended_col_names).\
            annotate(
                canonical_ids=ArrayAgg(canonical_id_col),
                view_ids=ArrayAgg('id'),
                link_count=Count('id')
            ).\
            values_list('canonical_ids', 'view_ids', 'link_count')

        unused_canonical_ids = []
        for canonical_ids, view_ids, link_count in link_groups:
            # If the canonical record was unlinked and is still unlinked, do nothing
            if link_count == 1 and canonical_ids[0] in reusable_canonical_ids:
                continue

            # Otherwise, create a new canonical record, copy meters if applicable, and apply the new record to old -Views
            new_record = CanonicalClass.objects.create(organization_id=org_id)

            if CanonicalClass == Property:
                canonical_ids.sort(
                    reverse=True
                )  # Ensures priority given by most recently created canonical record
                for canonical_id in canonical_ids:
                    new_record.copy_meters(canonical_id, source_persists=True)

            ViewClass.objects.filter(id__in=view_ids).update(
                **{canonical_id_col: new_record.id})

            summary[StateClass.__name__]['linked_sets_count'] += 1

            unused_canonical_ids += canonical_ids

        # For records with empty criteria and without reusable canonical IDs, apply a new ID.
        empty_criteria_views = ViewClass.objects.\
            select_related('state').\
            filter(cycle_id__in=cycle_ids, **state_appended_empty_matching_criteria).\
            exclude(**{canonical_id_col + "__in": reusable_canonical_ids})

        for view in empty_criteria_views:
            # Create a new canonical record, copy meters if applicable, and apply the new record to old -Views
            new_record = CanonicalClass.objects.create(organization_id=org_id)

            if CanonicalClass == Property:
                new_record.copy_meters(getattr(view, canonical_id_col),
                                       source_persists=False)

            setattr(view, canonical_id_col, new_record.id)
            view.save()

        # Also delete these unusable canonical records
        unused_canonical_ids += empty_criteria_views.values_list(
            canonical_id_col, flat=True)

        # Delete canonical records that are no longer used.
        CanonicalClass.objects.filter(id__in=unused_canonical_ids).delete()

        # If this was a preview run, capture results here and rollback.
        if preview_run:
            if state_class_name == 'PropertyState':
                summary = properties_across_cycles(org_id, -1, cycle_ids)
            else:
                summary = taxlots_across_cycles(org_id, -1, cycle_ids)

            transaction.set_rollback(True)

    return summary