Example #1
0
    def fail(self, message, logger, progress_data_key=None):
        """Fails the analysis. Creates an AnalysisMessage and logs it

        :param message: str, message to create an AnalysisMessage with
        :param logger: logging.Logger
        :param progress_data_key: str, fails the progress data if this key is provided
        """
        with transaction.atomic():
            locked_analysis = Analysis.objects.select_for_update().get(
                id=self._analysis_id)

            if progress_data_key is not None:
                progress_data = ProgressData.from_key(progress_data_key)
                progress_data.finish_with_error(message)

            if locked_analysis.in_terminal_state():
                raise AnalysisPipelineException(
                    f'Analysis is already in a terminal state: status {locked_analysis.status}'
                )

            locked_analysis.status = Analysis.FAILED
            locked_analysis.end_time = tz.now()
            locked_analysis.save()

            AnalysisMessage.log_and_create(
                logger=logger,
                type_=AnalysisMessage.ERROR,
                user_message=message,
                debug_message='',
                analysis_id=self._analysis_id,
            )
Example #2
0
    def test_init_by_data(self):
        pd = ProgressData(func_name='test_func_3', unique_id='ghi789')
        pd.total = 100
        self.assertEqual(pd.key, ':1:SEED:test_func_3:PROG:ghi789')

        pd2 = ProgressData.from_key(pd.key)
        self.assertDictEqual(pd.data, pd2.data)
Example #3
0
    def test_whole_org_match_merge_link_endpoint_taxlots(self):
        url = reverse('api:v3:organizations-match-merge-link', args=[self.org.id])
        post_params = json.dumps({"inventory_type": "taxlots"})
        raw_result = self.client.post(url, post_params, content_type='application/json')

        self.assertEqual(200, raw_result.status_code)

        raw_content = json.loads(raw_result.content)

        identifier = ProgressData.from_key(raw_content['progress_key']).data['unique_id']
        result_key = "org_match_merge_link_result__%s" % identifier
        summary = get_cache_raw(result_key)

        summary_keys = list(summary.keys())

        self.assertCountEqual(['PropertyState', 'TaxLotState'], summary_keys)

        # try to get result using results endpoint
        get_result_url = reverse('api:v3:organizations-match-merge-link-result', args=[self.org.id]) + '?match_merge_link_id=' + str(identifier)

        get_result_raw_response = self.client.get(get_result_url)
        summary = json.loads(get_result_raw_response.content)

        summary_keys = list(summary.keys())

        self.assertCountEqual(['PropertyState', 'TaxLotState'], summary_keys)
Example #4
0
def task_create_analysis_property_views(analysis_id,
                                        property_view_ids,
                                        progress_data_key=None):
    """A celery task which batch creates the AnalysisPropertyViews for the analysis.
    It will create AnalysisMessages for any property view IDs that couldn't be
    used to create an AnalysisPropertyView.

    :param analysis_id: int
    :param property_view_ids: list[int]
    :param progress_data_key: str, optional
    :returns: list[int], IDs of the successfully created AnalysisPropertyViews
    """
    if progress_data_key is not None:
        progress_data = ProgressData.from_key(progress_data_key)
        progress_data.step('Copying property data')
    analysis_view_ids, failures = AnalysisPropertyView.batch_create(
        analysis_id, property_view_ids)
    for failure in failures:
        truncated_user_message = f'Failed to copy property data for PropertyView ID {failure.property_view_id}: {failure.message}'
        if len(truncated_user_message) > 255:
            truncated_user_message = truncated_user_message[:252] + '...'
        AnalysisMessage.objects.create(
            analysis_id=analysis_id,
            type=AnalysisMessage.DEFAULT,
            user_message=truncated_user_message,
        )
    return analysis_view_ids
Example #5
0
def delete_organization_inventory(org_pk,
                                  prog_key=None,
                                  chunk_size=100,
                                  *args,
                                  **kwargs):
    """Deletes all properties & taxlots within an organization."""
    sys.setrecursionlimit(5000)  # default is 1000

    progress_data = ProgressData.from_key(
        prog_key) if prog_key else ProgressData(
            func_name='delete_organization_inventory', unique_id=org_pk)

    property_ids = list(
        Property.objects.filter(organization_id=org_pk).values_list('id',
                                                                    flat=True))
    property_state_ids = list(
        PropertyState.objects.filter(organization_id=org_pk).values_list(
            'id', flat=True))
    taxlot_ids = list(
        TaxLot.objects.filter(organization_id=org_pk).values_list('id',
                                                                  flat=True))
    taxlot_state_ids = list(
        TaxLotState.objects.filter(organization_id=org_pk).values_list(
            'id', flat=True))

    total = len(property_ids) + len(property_state_ids) + len(
        taxlot_ids) + len(taxlot_state_ids)

    if total == 0:
        return progress_data.finish_with_success(
            'No inventory data to remove for organization')

    # total steps is the total number of properties divided by the chunk size
    progress_data.total = total / float(chunk_size)
    progress_data.save()

    tasks = []
    # we could also use .s instead of .subtask and not wrap the *args
    for del_ids in batch(property_ids, chunk_size):
        tasks.append(
            _delete_organization_property_chunk.subtask(
                (del_ids, progress_data.key, org_pk)))
    for del_ids in batch(property_state_ids, chunk_size):
        tasks.append(
            _delete_organization_property_state_chunk.subtask(
                (del_ids, progress_data.key, org_pk)))
    for del_ids in batch(taxlot_ids, chunk_size):
        tasks.append(
            _delete_organization_taxlot_chunk.subtask(
                (del_ids, progress_data.key, org_pk)))
    for del_ids in batch(taxlot_state_ids, chunk_size):
        tasks.append(
            _delete_organization_taxlot_state_chunk.subtask(
                (del_ids, progress_data.key, org_pk)))
    chord(tasks,
          interval=15)(_finish_delete.subtask([org_pk, progress_data.key]))

    return progress_data.result()
Example #6
0
def _delete_organization_related_data(org_pk, prog_key):
    # Get all org users
    user_ids = OrganizationUser.objects.filter(
        organization_id=org_pk).values_list('user_id', flat=True)
    users = list(User.objects.filter(pk__in=user_ids))

    Organization.objects.get(pk=org_pk).delete()

    # TODO: Delete measures in BRICR branch

    # Delete any abandoned users.
    for user in users:
        if not OrganizationUser.objects.filter(user_id=user.pk).exists():
            user.delete()

    progress_data = ProgressData.from_key(prog_key)
    return progress_data.result()
Example #7
0
 def test_key_missing(self):
     with self.assertRaises(Exception) as exc:
         ProgressData.from_key('some_random_key')
     self.assertEqual(str(exc.exception),
                      'Could not find key some_random_key in cache')
Example #8
0
    def test_whole_org_match_merge_link_preview_endpoint_taxlots(self):
        # Cycle 1 / ImportFile 1 - Create 1 taxlot
        base_taxlot_details = {
            'jurisdiction_tax_lot_id': '1st Non-Match Set',
            'city': 'City 1',
            'district': 'Match Set',
            'import_file_id': self.import_file_1.id,
            'data_state': DATA_STATE_MAPPING,
            'no_default_data': False,
        }

        tls_1 = self.taxlot_state_factory.get_taxlot_state(**base_taxlot_details)

        self.import_file_1.mapping_done = True
        self.import_file_1.save()
        match_buildings(self.import_file_1.id)

        # Cycle 2 / ImportFile 2 - Create 1 unlinked taxlot
        base_taxlot_details['jurisdiction_tax_lot_id'] = '2nd Non-Match Set'
        base_taxlot_details['district'] = 'Match Set'
        base_taxlot_details['import_file_id'] = self.import_file_2.id
        tls_2 = self.taxlot_state_factory.get_taxlot_state(**base_taxlot_details)

        self.import_file_2.mapping_done = True
        self.import_file_2.save()
        match_buildings(self.import_file_2.id)

        # Check there doesn't exist links
        self.assertNotEqual(tls_1.taxlotview_set.first().taxlot_id, tls_2.taxlotview_set.first().taxlot_id)

        url = reverse('api:v3:organizations-match-merge-link-preview', args=[self.org.id])
        post_params = json.dumps({
            "inventory_type": "taxlots",
            "add": ['district'],
            "remove": ['jurisdiction_tax_lot_id']
        })
        raw_result = self.client.post(url, post_params, content_type='application/json')

        # Check there *still* doesn't exist links
        self.assertNotEqual(tls_1.taxlotview_set.first().taxlot_id, tls_2.taxlotview_set.first().taxlot_id)

        self.assertEqual(200, raw_result.status_code)

        raw_content = json.loads(raw_result.content)

        identifier = ProgressData.from_key(raw_content['progress_key']).data['unique_id']
        result_key = "org_match_merge_link_result__%s" % identifier
        raw_summary = get_cache_raw(result_key)

        summary = {str(k): v for k, v in raw_summary.items() if v}  # ignore empty cycles

        # Check format of summary
        self.assertCountEqual([str(self.cycle_1.id), str(self.cycle_2.id)], summary.keys())

        # Check that preview shows links would be created
        self.assertEqual(summary[str(self.cycle_1.id)][0]['id'], summary[str(self.cycle_2.id)][0]['id'])

        # try to get result using results endpoint
        get_result_url = reverse('api:v3:organizations-match-merge-link-result', args=[self.org.id]) + '?match_merge_link_id=' + str(identifier)

        get_result_raw_response = self.client.get(get_result_url)
        raw_summary = json.loads(get_result_raw_response.content)

        summary = {str(k): v for k, v in raw_summary.items() if v}  # ignore empty cycles

        # Check format of summary
        self.assertCountEqual([str(self.cycle_1.id), str(self.cycle_2.id)], summary.keys())

        # Check that preview shows links would be created
        self.assertEqual(summary[str(self.cycle_1.id)][0]['id'], summary[str(self.cycle_2.id)][0]['id'])
Example #9
0
def cache_match_merge_link_result(summary, identifier, progress_key):
    result_key = _get_match_merge_link_key(identifier)
    set_cache_raw(result_key, summary)

    progress_data = ProgressData.from_key(progress_key)
    progress_data.finish_with_success()
Example #10
0
def match_and_link_incoming_properties_and_taxlots(file_pk, progress_key):
    """
    Match incoming the properties and taxlots. Then, search for links for them.

    The process starts by identifying the incoming PropertyStates
    then TaxLotStates of an ImportFile. The steps are exactly the same for each:
        - Remove duplicates amongst the -States within the ImportFile.
        - Merge together any matches amongst the -States within the ImportFile.
        - Parse through the remaining -States to ultimately associate them
          to -Views of the current Cycle.
            - Filter duplicates of existing -States.
            - Merge incoming -States into existing -States if they match,
              keeping the existing -View.
        - For these -Views, search for matches across Cycles for linking.

    Throughout the process, the results are captured and a summary of this is
    returned as a dict.

    :param file_pk: ImportFile Primary Key
    :return results: dict
    """
    from seed.data_importer.tasks import pair_new_states

    import_file = ImportFile.objects.get(pk=file_pk)
    progress_data = ProgressData.from_key(progress_key)

    # Don't query the org table here, just get the organization from the import_record
    org = import_file.import_record.super_organization

    # Set the progress to started - 33%
    progress_data.step('Matching data')

    # Set defaults
    property_duplicates_against_existing_count = 0
    property_duplicates_within_file_count = 0
    property_merges_against_existing_count = 0
    property_merges_between_existing_count = 0
    property_merges_within_file_count = 0
    property_new_count = 0

    tax_lot_duplicates_against_existing_count = 0
    tax_lot_duplicates_within_file_count = 0
    tax_lot_merges_against_existing_count = 0
    tax_lot_merges_between_existing_count = 0
    tax_lot_merges_within_file_count = 0
    tax_lot_new_count = 0

    merged_linked_property_views = []
    merged_linked_taxlot_views = []

    # Get lists and counts of all the properties and tax lots based on the import file.
    incoming_properties = import_file.find_unmatched_property_states()
    property_initial_incoming_count = incoming_properties.count()
    incoming_tax_lots = import_file.find_unmatched_tax_lot_states()
    tax_lot_initial_incoming_count = incoming_tax_lots.count()

    if incoming_properties.exists():
        # Within the ImportFile, filter out the duplicates.
        log_debug("Start Properties filter_duplicate_states")
        promoted_property_ids, property_duplicates_within_file_count = filter_duplicate_states(
            incoming_properties)

        # Within the ImportFile, merge -States together based on user defined matching_criteria
        log_debug('Start Properties inclusive_match_and_merge')
        promoted_property_ids, property_merges_within_file_count = inclusive_match_and_merge(
            promoted_property_ids, org, PropertyState)

        # Filter Cycle-wide duplicates then merge and/or assign -States to -Views
        log_debug('Start Properties states_to_views')
        merged_property_views, property_duplicates_against_existing_count, property_new_count, property_merges_against_existing_count, property_merges_between_existing_count = states_to_views(
            promoted_property_ids, org, import_file.cycle, PropertyState)

        # Look for links across Cycles
        log_debug('Start Properties link_views')
        merged_linked_property_views = link_views(merged_property_views,
                                                  PropertyView)

    if incoming_tax_lots.exists():
        # Within the ImportFile, filter out the duplicates.
        log_debug("Start TaxLots filter_duplicate_states")
        promoted_tax_lot_ids, tax_lot_duplicates_within_file_count = filter_duplicate_states(
            incoming_tax_lots)

        # Within the ImportFile, merge -States together based on user defined matching_criteria
        log_debug('Start TaxLots inclusive_match_and_merge')
        promoted_tax_lot_ids, tax_lot_merges_within_file_count = inclusive_match_and_merge(
            promoted_tax_lot_ids, org, TaxLotState)

        # Filter Cycle-wide duplicates then merge and/or assign -States to -Views
        log_debug('Start TaxLots states_to_views')
        merged_linked_taxlot_views, tax_lot_duplicates_against_existing_count, tax_lot_new_count, tax_lot_merges_against_existing_count, tax_lot_merges_between_existing_count = states_to_views(
            promoted_tax_lot_ids, org, import_file.cycle, TaxLotState)

        # Look for links across Cycles
        log_debug('Start TaxLots link_views')
        merged_linked_taxlot_views = link_views(merged_linked_taxlot_views,
                                                TaxLotView)

    log_debug('Start pair_new_states')
    progress_data.step('Pairing data')
    pair_new_states(merged_linked_property_views, merged_linked_taxlot_views)
    log_debug('End pair_new_states')

    return {
        'import_file_records': import_file.num_rows,
        'property_initial_incoming': property_initial_incoming_count,
        'property_duplicates_against_existing':
        property_duplicates_against_existing_count,
        'property_duplicates_within_file':
        property_duplicates_within_file_count,
        'property_merges_against_existing':
        property_merges_against_existing_count,
        'property_merges_between_existing':
        property_merges_between_existing_count,
        'property_merges_within_file': property_merges_within_file_count,
        'property_new': property_new_count,
        'tax_lot_initial_incoming': tax_lot_initial_incoming_count,
        'tax_lot_duplicates_against_existing':
        tax_lot_duplicates_against_existing_count,
        'tax_lot_duplicates_within_file': tax_lot_duplicates_within_file_count,
        'tax_lot_merges_against_existing':
        tax_lot_merges_against_existing_count,
        'tax_lot_merges_between_existing':
        tax_lot_merges_between_existing_count,
        'tax_lot_merges_within_file': tax_lot_merges_within_file_count,
        'tax_lot_new': tax_lot_new_count,
    }
Example #11
0
def _delete_organization_taxlot_state_chunk(del_ids, prog_key, org_pk, *args,
                                            **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    TaxLotState.objects.filter(organization_id=org_pk, pk__in=del_ids).delete()
    progress_data = ProgressData.from_key(prog_key)
    progress_data.step()
Example #12
0
def _finish_delete(results, org_pk, prog_key):
    sys.setrecursionlimit(1000)

    progress_data = ProgressData.from_key(prog_key)
    return progress_data.finish_with_success()