def fail(self, message, logger, progress_data_key=None): """Fails the analysis. Creates an AnalysisMessage and logs it :param message: str, message to create an AnalysisMessage with :param logger: logging.Logger :param progress_data_key: str, fails the progress data if this key is provided """ with transaction.atomic(): locked_analysis = Analysis.objects.select_for_update().get( id=self._analysis_id) if progress_data_key is not None: progress_data = ProgressData.from_key(progress_data_key) progress_data.finish_with_error(message) if locked_analysis.in_terminal_state(): raise AnalysisPipelineException( f'Analysis is already in a terminal state: status {locked_analysis.status}' ) locked_analysis.status = Analysis.FAILED locked_analysis.end_time = tz.now() locked_analysis.save() AnalysisMessage.log_and_create( logger=logger, type_=AnalysisMessage.ERROR, user_message=message, debug_message='', analysis_id=self._analysis_id, )
def test_init_by_data(self): pd = ProgressData(func_name='test_func_3', unique_id='ghi789') pd.total = 100 self.assertEqual(pd.key, ':1:SEED:test_func_3:PROG:ghi789') pd2 = ProgressData.from_key(pd.key) self.assertDictEqual(pd.data, pd2.data)
def test_whole_org_match_merge_link_endpoint_taxlots(self): url = reverse('api:v3:organizations-match-merge-link', args=[self.org.id]) post_params = json.dumps({"inventory_type": "taxlots"}) raw_result = self.client.post(url, post_params, content_type='application/json') self.assertEqual(200, raw_result.status_code) raw_content = json.loads(raw_result.content) identifier = ProgressData.from_key(raw_content['progress_key']).data['unique_id'] result_key = "org_match_merge_link_result__%s" % identifier summary = get_cache_raw(result_key) summary_keys = list(summary.keys()) self.assertCountEqual(['PropertyState', 'TaxLotState'], summary_keys) # try to get result using results endpoint get_result_url = reverse('api:v3:organizations-match-merge-link-result', args=[self.org.id]) + '?match_merge_link_id=' + str(identifier) get_result_raw_response = self.client.get(get_result_url) summary = json.loads(get_result_raw_response.content) summary_keys = list(summary.keys()) self.assertCountEqual(['PropertyState', 'TaxLotState'], summary_keys)
def task_create_analysis_property_views(analysis_id, property_view_ids, progress_data_key=None): """A celery task which batch creates the AnalysisPropertyViews for the analysis. It will create AnalysisMessages for any property view IDs that couldn't be used to create an AnalysisPropertyView. :param analysis_id: int :param property_view_ids: list[int] :param progress_data_key: str, optional :returns: list[int], IDs of the successfully created AnalysisPropertyViews """ if progress_data_key is not None: progress_data = ProgressData.from_key(progress_data_key) progress_data.step('Copying property data') analysis_view_ids, failures = AnalysisPropertyView.batch_create( analysis_id, property_view_ids) for failure in failures: truncated_user_message = f'Failed to copy property data for PropertyView ID {failure.property_view_id}: {failure.message}' if len(truncated_user_message) > 255: truncated_user_message = truncated_user_message[:252] + '...' AnalysisMessage.objects.create( analysis_id=analysis_id, type=AnalysisMessage.DEFAULT, user_message=truncated_user_message, ) return analysis_view_ids
def delete_organization_inventory(org_pk, prog_key=None, chunk_size=100, *args, **kwargs): """Deletes all properties & taxlots within an organization.""" sys.setrecursionlimit(5000) # default is 1000 progress_data = ProgressData.from_key( prog_key) if prog_key else ProgressData( func_name='delete_organization_inventory', unique_id=org_pk) property_ids = list( Property.objects.filter(organization_id=org_pk).values_list('id', flat=True)) property_state_ids = list( PropertyState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) taxlot_ids = list( TaxLot.objects.filter(organization_id=org_pk).values_list('id', flat=True)) taxlot_state_ids = list( TaxLotState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) total = len(property_ids) + len(property_state_ids) + len( taxlot_ids) + len(taxlot_state_ids) if total == 0: return progress_data.finish_with_success( 'No inventory data to remove for organization') # total steps is the total number of properties divided by the chunk size progress_data.total = total / float(chunk_size) progress_data.save() tasks = [] # we could also use .s instead of .subtask and not wrap the *args for del_ids in batch(property_ids, chunk_size): tasks.append( _delete_organization_property_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(property_state_ids, chunk_size): tasks.append( _delete_organization_property_state_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(taxlot_ids, chunk_size): tasks.append( _delete_organization_taxlot_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(taxlot_state_ids, chunk_size): tasks.append( _delete_organization_taxlot_state_chunk.subtask( (del_ids, progress_data.key, org_pk))) chord(tasks, interval=15)(_finish_delete.subtask([org_pk, progress_data.key])) return progress_data.result()
def _delete_organization_related_data(org_pk, prog_key): # Get all org users user_ids = OrganizationUser.objects.filter( organization_id=org_pk).values_list('user_id', flat=True) users = list(User.objects.filter(pk__in=user_ids)) Organization.objects.get(pk=org_pk).delete() # TODO: Delete measures in BRICR branch # Delete any abandoned users. for user in users: if not OrganizationUser.objects.filter(user_id=user.pk).exists(): user.delete() progress_data = ProgressData.from_key(prog_key) return progress_data.result()
def test_key_missing(self): with self.assertRaises(Exception) as exc: ProgressData.from_key('some_random_key') self.assertEqual(str(exc.exception), 'Could not find key some_random_key in cache')
def test_whole_org_match_merge_link_preview_endpoint_taxlots(self): # Cycle 1 / ImportFile 1 - Create 1 taxlot base_taxlot_details = { 'jurisdiction_tax_lot_id': '1st Non-Match Set', 'city': 'City 1', 'district': 'Match Set', 'import_file_id': self.import_file_1.id, 'data_state': DATA_STATE_MAPPING, 'no_default_data': False, } tls_1 = self.taxlot_state_factory.get_taxlot_state(**base_taxlot_details) self.import_file_1.mapping_done = True self.import_file_1.save() match_buildings(self.import_file_1.id) # Cycle 2 / ImportFile 2 - Create 1 unlinked taxlot base_taxlot_details['jurisdiction_tax_lot_id'] = '2nd Non-Match Set' base_taxlot_details['district'] = 'Match Set' base_taxlot_details['import_file_id'] = self.import_file_2.id tls_2 = self.taxlot_state_factory.get_taxlot_state(**base_taxlot_details) self.import_file_2.mapping_done = True self.import_file_2.save() match_buildings(self.import_file_2.id) # Check there doesn't exist links self.assertNotEqual(tls_1.taxlotview_set.first().taxlot_id, tls_2.taxlotview_set.first().taxlot_id) url = reverse('api:v3:organizations-match-merge-link-preview', args=[self.org.id]) post_params = json.dumps({ "inventory_type": "taxlots", "add": ['district'], "remove": ['jurisdiction_tax_lot_id'] }) raw_result = self.client.post(url, post_params, content_type='application/json') # Check there *still* doesn't exist links self.assertNotEqual(tls_1.taxlotview_set.first().taxlot_id, tls_2.taxlotview_set.first().taxlot_id) self.assertEqual(200, raw_result.status_code) raw_content = json.loads(raw_result.content) identifier = ProgressData.from_key(raw_content['progress_key']).data['unique_id'] result_key = "org_match_merge_link_result__%s" % identifier raw_summary = get_cache_raw(result_key) summary = {str(k): v for k, v in raw_summary.items() if v} # ignore empty cycles # Check format of summary self.assertCountEqual([str(self.cycle_1.id), str(self.cycle_2.id)], summary.keys()) # Check that preview shows links would be created self.assertEqual(summary[str(self.cycle_1.id)][0]['id'], summary[str(self.cycle_2.id)][0]['id']) # try to get result using results endpoint get_result_url = reverse('api:v3:organizations-match-merge-link-result', args=[self.org.id]) + '?match_merge_link_id=' + str(identifier) get_result_raw_response = self.client.get(get_result_url) raw_summary = json.loads(get_result_raw_response.content) summary = {str(k): v for k, v in raw_summary.items() if v} # ignore empty cycles # Check format of summary self.assertCountEqual([str(self.cycle_1.id), str(self.cycle_2.id)], summary.keys()) # Check that preview shows links would be created self.assertEqual(summary[str(self.cycle_1.id)][0]['id'], summary[str(self.cycle_2.id)][0]['id'])
def cache_match_merge_link_result(summary, identifier, progress_key): result_key = _get_match_merge_link_key(identifier) set_cache_raw(result_key, summary) progress_data = ProgressData.from_key(progress_key) progress_data.finish_with_success()
def match_and_link_incoming_properties_and_taxlots(file_pk, progress_key): """ Match incoming the properties and taxlots. Then, search for links for them. The process starts by identifying the incoming PropertyStates then TaxLotStates of an ImportFile. The steps are exactly the same for each: - Remove duplicates amongst the -States within the ImportFile. - Merge together any matches amongst the -States within the ImportFile. - Parse through the remaining -States to ultimately associate them to -Views of the current Cycle. - Filter duplicates of existing -States. - Merge incoming -States into existing -States if they match, keeping the existing -View. - For these -Views, search for matches across Cycles for linking. Throughout the process, the results are captured and a summary of this is returned as a dict. :param file_pk: ImportFile Primary Key :return results: dict """ from seed.data_importer.tasks import pair_new_states import_file = ImportFile.objects.get(pk=file_pk) progress_data = ProgressData.from_key(progress_key) # Don't query the org table here, just get the organization from the import_record org = import_file.import_record.super_organization # Set the progress to started - 33% progress_data.step('Matching data') # Set defaults property_duplicates_against_existing_count = 0 property_duplicates_within_file_count = 0 property_merges_against_existing_count = 0 property_merges_between_existing_count = 0 property_merges_within_file_count = 0 property_new_count = 0 tax_lot_duplicates_against_existing_count = 0 tax_lot_duplicates_within_file_count = 0 tax_lot_merges_against_existing_count = 0 tax_lot_merges_between_existing_count = 0 tax_lot_merges_within_file_count = 0 tax_lot_new_count = 0 merged_linked_property_views = [] merged_linked_taxlot_views = [] # Get lists and counts of all the properties and tax lots based on the import file. incoming_properties = import_file.find_unmatched_property_states() property_initial_incoming_count = incoming_properties.count() incoming_tax_lots = import_file.find_unmatched_tax_lot_states() tax_lot_initial_incoming_count = incoming_tax_lots.count() if incoming_properties.exists(): # Within the ImportFile, filter out the duplicates. log_debug("Start Properties filter_duplicate_states") promoted_property_ids, property_duplicates_within_file_count = filter_duplicate_states( incoming_properties) # Within the ImportFile, merge -States together based on user defined matching_criteria log_debug('Start Properties inclusive_match_and_merge') promoted_property_ids, property_merges_within_file_count = inclusive_match_and_merge( promoted_property_ids, org, PropertyState) # Filter Cycle-wide duplicates then merge and/or assign -States to -Views log_debug('Start Properties states_to_views') merged_property_views, property_duplicates_against_existing_count, property_new_count, property_merges_against_existing_count, property_merges_between_existing_count = states_to_views( promoted_property_ids, org, import_file.cycle, PropertyState) # Look for links across Cycles log_debug('Start Properties link_views') merged_linked_property_views = link_views(merged_property_views, PropertyView) if incoming_tax_lots.exists(): # Within the ImportFile, filter out the duplicates. log_debug("Start TaxLots filter_duplicate_states") promoted_tax_lot_ids, tax_lot_duplicates_within_file_count = filter_duplicate_states( incoming_tax_lots) # Within the ImportFile, merge -States together based on user defined matching_criteria log_debug('Start TaxLots inclusive_match_and_merge') promoted_tax_lot_ids, tax_lot_merges_within_file_count = inclusive_match_and_merge( promoted_tax_lot_ids, org, TaxLotState) # Filter Cycle-wide duplicates then merge and/or assign -States to -Views log_debug('Start TaxLots states_to_views') merged_linked_taxlot_views, tax_lot_duplicates_against_existing_count, tax_lot_new_count, tax_lot_merges_against_existing_count, tax_lot_merges_between_existing_count = states_to_views( promoted_tax_lot_ids, org, import_file.cycle, TaxLotState) # Look for links across Cycles log_debug('Start TaxLots link_views') merged_linked_taxlot_views = link_views(merged_linked_taxlot_views, TaxLotView) log_debug('Start pair_new_states') progress_data.step('Pairing data') pair_new_states(merged_linked_property_views, merged_linked_taxlot_views) log_debug('End pair_new_states') return { 'import_file_records': import_file.num_rows, 'property_initial_incoming': property_initial_incoming_count, 'property_duplicates_against_existing': property_duplicates_against_existing_count, 'property_duplicates_within_file': property_duplicates_within_file_count, 'property_merges_against_existing': property_merges_against_existing_count, 'property_merges_between_existing': property_merges_between_existing_count, 'property_merges_within_file': property_merges_within_file_count, 'property_new': property_new_count, 'tax_lot_initial_incoming': tax_lot_initial_incoming_count, 'tax_lot_duplicates_against_existing': tax_lot_duplicates_against_existing_count, 'tax_lot_duplicates_within_file': tax_lot_duplicates_within_file_count, 'tax_lot_merges_against_existing': tax_lot_merges_against_existing_count, 'tax_lot_merges_between_existing': tax_lot_merges_between_existing_count, 'tax_lot_merges_within_file': tax_lot_merges_within_file_count, 'tax_lot_new': tax_lot_new_count, }
def _delete_organization_taxlot_state_chunk(del_ids, prog_key, org_pk, *args, **kwargs): """deletes a list of ``del_ids`` and increments the cache""" TaxLotState.objects.filter(organization_id=org_pk, pk__in=del_ids).delete() progress_data = ProgressData.from_key(prog_key) progress_data.step()
def _finish_delete(results, org_pk, prog_key): sys.setrecursionlimit(1000) progress_data = ProgressData.from_key(prog_key) return progress_data.finish_with_success()