def delete_organization_inventory(org_pk, deleting_cache_key, chunk_size=100, *args, **kwargs): """Deletes all properties & taxlots within an organization.""" sys.setrecursionlimit(5000) # default is 1000 result = { 'status': 'success', 'progress_key': deleting_cache_key, 'progress': 0 } property_ids = list( Property.objects.filter(organization_id=org_pk).values_list('id', flat=True)) property_state_ids = list( PropertyState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) taxlot_ids = list( TaxLot.objects.filter(organization_id=org_pk).values_list('id', flat=True)) taxlot_state_ids = list( TaxLotState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) total = len(property_ids) + len(property_state_ids) + len( taxlot_ids) + len(taxlot_state_ids) if total == 0: result['progress'] = 100 set_cache(deleting_cache_key, result['status'], result) if total == 0: return step = float(chunk_size) / total tasks = [] # we could also use .s instead of .subtask and not wrap the *args for del_ids in batch(property_ids, chunk_size): tasks.append( _delete_organization_property_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk))) for del_ids in batch(property_state_ids, chunk_size): tasks.append( _delete_organization_property_state_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk))) for del_ids in batch(taxlot_ids, chunk_size): tasks.append( _delete_organization_taxlot_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk))) for del_ids in batch(taxlot_state_ids, chunk_size): tasks.append( _delete_organization_taxlot_state_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk))) chord(tasks, interval=15)(_finish_delete.subtask([org_pk, deleting_cache_key]))
def delete_organization_inventory(org_pk, prog_key=None, chunk_size=100, *args, **kwargs): """Deletes all properties & taxlots within an organization.""" sys.setrecursionlimit(5000) # default is 1000 progress_data = ProgressData.from_key( prog_key) if prog_key else ProgressData( func_name='delete_organization_inventory', unique_id=org_pk) property_ids = list( Property.objects.filter(organization_id=org_pk).values_list('id', flat=True)) property_state_ids = list( PropertyState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) taxlot_ids = list( TaxLot.objects.filter(organization_id=org_pk).values_list('id', flat=True)) taxlot_state_ids = list( TaxLotState.objects.filter(organization_id=org_pk).values_list( 'id', flat=True)) total = len(property_ids) + len(property_state_ids) + len( taxlot_ids) + len(taxlot_state_ids) if total == 0: return progress_data.finish_with_success( 'No inventory data to remove for organization') # total steps is the total number of properties divided by the chunk size progress_data.total = total / float(chunk_size) progress_data.save() tasks = [] # we could also use .s instead of .subtask and not wrap the *args for del_ids in batch(property_ids, chunk_size): tasks.append( _delete_organization_property_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(property_state_ids, chunk_size): tasks.append( _delete_organization_property_state_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(taxlot_ids, chunk_size): tasks.append( _delete_organization_taxlot_chunk.subtask( (del_ids, progress_data.key, org_pk))) for del_ids in batch(taxlot_state_ids, chunk_size): tasks.append( _delete_organization_taxlot_state_chunk.subtask( (del_ids, progress_data.key, org_pk))) chord(tasks, interval=15)(_finish_delete.subtask([org_pk, progress_data.key])) return progress_data.result()
def delete_organization(org_pk, deleting_cache_key, chunk_size=100, *args, **kwargs): result = { 'status': 'success', 'progress': 0, 'progress_key': deleting_cache_key } set_cache(deleting_cache_key, result['status'], result) if CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk).exists(): _delete_canonical_buildings.delay(org_pk) if BuildingSnapshot.objects.filter(super_organization=org_pk).exists(): ids = list( BuildingSnapshot.objects.filter( super_organization=org_pk).values_list('id', flat=True) ) step = float(chunk_size) / len(ids) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)(_delete_organization_related_data.subtask( [org_pk, deleting_cache_key])) else: _delete_organization_related_data(None, org_pk, deleting_cache_key)
def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs): """Deletes all BuildingSnapshot instances within an organization :param org_pk: int, str, the organization pk """ qs = BuildingSnapshot.objects.filter(super_organization=org_pk) ids = qs.values_list('id', flat=True) deleting_cache_key = get_prog_key( 'delete_organization_buildings', org_pk ) if not ids: set_cache(deleting_cache_key, 'success', 100) return # delete the canonical buildings can_ids = CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk ).values_list('id', flat=True) _delete_canonical_buildings.delay(can_ids) step = float(chunk_size) / len(ids) set_cache(deleting_cache_key, 'success', 0) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)(finish_delete.subtask([org_pk]))
def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs): """Deletes all BuildingSnapshot instances within an organization :param org_pk: int, str, the organization pk """ qs = BuildingSnapshot.objects.filter(super_organization=org_pk) ids = qs.values_list('id', flat=True) deleting_cache_key = get_prog_key('delete_organization_buildings', org_pk) if not ids: cache.set(deleting_cache_key, 100) return # delete the canonical buildings can_ids = CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk).values_list('id', flat=True) _delete_canonical_buildings.delay(can_ids) step = float(chunk_size) / len(ids) cache.set(deleting_cache_key, 0) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk))) chord(tasks, interval=15)(finish_delete.subtask([org_pk]))
def split_rows(self, chunk_size, callback, *args, **kwargs): """Break up the CSV into smaller pieces for parallel processing.""" row_num = 0 for batch in utils.batch(self.next(), chunk_size): row_num += len(batch) callback(batch, *args, **kwargs) return row_num
def _delete_canonical_buildings(ids, chunk_size=300): """deletes CanonicalBuildings :param ids: list of ids to delete from CanonicalBuilding :param chunk_size: number of CanonicalBuilding instances to delete per iteration """ for del_ids in batch(ids, chunk_size): CanonicalBuilding.objects.filter(pk__in=del_ids).delete()
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" result = {'status': 'success', 'progress': 100} prog_key = get_prog_key('save_raw_data', file_pk) try: import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: result['status'] = 'warning' result['message'] = 'Raw data already saved' cache.set(prog_key, result) return result if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append( _save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) except StopIteration: result['status'] = 'error' result['message'] = 'StopIteration Exception' result['stacktrace'] = traceback.format_exc() except Error as e: result['status'] = 'error' result['message'] = 'File Content Error: ' + e.message result['stacktrace'] = traceback.format_exc() except KeyError as e: result['status'] = 'error' result['message'] = 'Invalid Column Name: "' + e.message + '"' result['stacktrace'] = traceback.format_exc() except Exception as e: result['status'] = 'error' result['message'] = 'Unhandled Error: ' + e.message result['stacktrace'] = traceback.format_exc() cache.set(prog_key, result) return result
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV or XLSX file and save the raw data into the DB BuildingSnapshot table.""" result = {'status': 'success', 'progress': 100} prog_key = get_prog_key('save_raw_data', file_pk) try: import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: result['status'] = 'warning' result['message'] = 'Raw data already saved' set_cache(prog_key, result['status'], result) return result if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 import_file.num_columns = parser.num_columns() # Why are we setting the num_rows to the number of chunks? tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append(_save_raw_data_chunk.s(chunk, file_pk, prog_key)) import_file.save() # need to rework how the progress keys are implemented here tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_raw_save.s(file_pk)) else: finish_raw_save.s(file_pk) except StopIteration: result['status'] = 'error' result['message'] = 'StopIteration Exception' result['stacktrace'] = traceback.format_exc() except Error as e: result['status'] = 'error' result['message'] = 'File Content Error: ' + e.message result['stacktrace'] = traceback.format_exc() except KeyError as e: result['status'] = 'error' result['message'] = 'Invalid Column Name: "' + e.message + '"' result['stacktrace'] = traceback.format_exc() except Exception as e: result['status'] = 'error' result['message'] = 'Unhandled Error: ' + str(e.message) result['stacktrace'] = traceback.format_exc() set_cache(prog_key, result['status'], result) return result
def _delete_canonical_buildings(org_pk, chunk_size=300): """deletes CanonicalBuildings :param org_id: organization id :param chunk_size: number of CanonicalBuilding instances to delete per iteration """ ids = list(CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk ).values_list('id', flat=True)) for del_ids in batch(ids, chunk_size): CanonicalBuilding.objects.filter(pk__in=del_ids).delete()
def log_deleted_buildings(ids, user_pk, chunk_size=300): """ AuditLog logs a delete entry for the canonical building or each BuildingSnapshot in ``ids`` """ for del_ids in batch(ids, chunk_size): for b in BuildingSnapshot.objects.filter(pk__in=del_ids): AuditLog.objects.create(user_id=user_pk, content_object=b.canonical_building, organization=b.super_organization, action='delete_building', action_note='Deleted building.')
def _map_data(file_pk, *args, **kwargs): """Get all of the raw data and process it using appropriate mapping. @lock_and_track returns a progress_key :param file_pk: int, the id of the import_file we're working with. """ import_file = ImportFile.objects.get(pk=file_pk) # Don't perform this task if it's already been completed. if import_file.mapping_done: prog_key = get_prog_key('map_data', file_pk) result = { 'status': 'warning', 'progress': 100, 'message': 'mapping already complete' } set_cache(prog_key, result['status'], result) return result # If we haven't finished saving, we shouldn't proceed with mapping # Re-queue this task. if not import_file.raw_save_done: map_data.apply_async(args=[file_pk], countdown=60, expires=120) return {'status': 'error', 'message': 'waiting for raw data save.'} source_type_dict = { 'Portfolio Raw': PORTFOLIO_RAW, 'Assessed Raw': ASSESSED_RAW, 'Green Button Raw': GREEN_BUTTON_RAW, } source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW) qs = BuildingSnapshot.objects.filter( import_file=import_file, source_type=source_type, ).iterator() prog_key = get_prog_key('map_data', file_pk) tasks = [] for chunk in batch(qs, 100): serialized_data = [obj.extra_data for obj in chunk] tasks.append(map_row_chunk.s(serialized_data, file_pk, source_type, prog_key)) # need to rework how the progress keys are implemented here, but at least the method gets called above for cleansing tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_mapping.subtask([file_pk])) else: finish_mapping.subtask(file_pk) return {'status': 'success'}
def log_deleted_buildings(ids, user_pk, chunk_size=300): """ AuditLog logs a delete entry for the canonical building or each BuildingSnapshot in ``ids`` """ for del_ids in batch(ids, chunk_size): for b in BuildingSnapshot.objects.filter(pk__in=del_ids): AuditLog.objects.create( user_id=user_pk, content_object=b.canonical_building, organization=b.super_organization, action='delete_building', action_note='Deleted building.' )
def _map_data(file_pk, *args, **kwargs): """Get all of the raw data and process it using appropriate mapping. @lock_and_track returns a progress_key :param file_pk: int, the id of the import_file we're working with. """ import_file = ImportFile.objects.get(pk=file_pk) # Don't perform this task if it's already been completed. if import_file.mapping_done: prog_key = get_prog_key('map_data', file_pk) cache.set(prog_key, 100) return {'status': 'warning', 'message': 'mapping already complete'} # If we haven't finished saving, we shouldn't proceed with mapping # Re-queue this task. if not import_file.raw_save_done: map_data.apply_async(args=[file_pk], countdown=60, expires=120) return {'status': 'error', 'message': 'waiting for raw data save.'} source_type_dict = { 'Portfolio Raw': PORTFOLIO_RAW, 'Assessed Raw': ASSESSED_RAW, 'Green Button Raw': GREEN_BUTTON_RAW, } source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW) qs = BuildingSnapshot.objects.filter( import_file=import_file, source_type=source_type, ).iterator() prog_key = get_prog_key('map_data', file_pk) tasks = [] for chunk in batch(qs, 100): serialized_data = [obj.extra_data for obj in chunk] tasks.append( map_row_chunk.subtask( (serialized_data, file_pk, source_type, prog_key))) tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_mapping.subtask([file_pk])) else: finish_mapping.task(file_pk) return {'status': 'success'}
def _cleanse_data(file_pk): """ Get the mapped data and run the cleansing class against it in chunks. The mapped data are pulled from the BuildingSnapshot table. @lock_and_track returns a progress_key :param file_pk: int, the id of the import_file we're working with. """ import_file = ImportFile.objects.get(pk=file_pk) source_type_dict = { 'Portfolio Raw': PORTFOLIO_BS, 'Assessed Raw': ASSESSED_BS, 'Green Button Raw': GREEN_BUTTON_BS, } # This is non-ideal, but the source type of the input file is never updated, but the data are stages as if it were. # After the mapping stage occurs, the data end up in the BuildingSnapshot table under the *_BS value. source_type = source_type_dict.get(import_file.source_type, ASSESSED_BS) qs = BuildingSnapshot.objects.filter( import_file=import_file, source_type=source_type, ).iterator() # initialize the cache for the cleansing results using the cleansing static method Cleansing.initialize_cache(file_pk) prog_key = get_prog_key('cleanse_data', file_pk) tasks = [] for chunk in batch(qs, 100): # serialized_data = [obj.extra_data for obj in chunk] ids = [obj.id for obj in chunk] tasks.append(cleanse_data_chunk.s(ids, file_pk)) # note that increment will be added to end # need to rework how the progress keys are implemented here, but at least the method gets called above for cleansing tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_cleansing.subtask([file_pk])) else: finish_cleansing.subtask(file_pk) return {'status': 'success'}
def delete_organization_buildings(org_pk, deleting_cache_key, chunk_size=100, *args, **kwargs): """Deletes all BuildingSnapshot instances within an organization.""" result = { 'status': 'success', 'progress_key': deleting_cache_key } if not BuildingSnapshot.objects.filter(super_organization=org_pk).exists(): result['progress'] = 100 else: result['progress'] = 0 set_cache(deleting_cache_key, result['status'], result) if result['progress'] == 100: return _delete_canonical_buildings.delay(org_pk) ids = list( BuildingSnapshot.objects.filter(super_organization=org_pk).values_list( 'id', flat=True) ) step = float(chunk_size) / len(ids) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)( _finish_delete.subtask([org_pk, deleting_cache_key]))
def delete_organization_inventory(org_pk, deleting_cache_key, chunk_size=100, *args, **kwargs): """Deletes all properties & taxlots within an organization.""" result = { 'status': 'success', 'progress_key': deleting_cache_key, 'progress': 0 } property_ids = list( Property.objects.filter(organization_id=org_pk).values_list('id', flat=True) ) property_state_ids = list( PropertyState.objects.filter(organization_id=org_pk).values_list('id', flat=True) ) taxlot_ids = list( TaxLot.objects.filter(organization_id=org_pk).values_list('id', flat=True) ) taxlot_state_ids = list( TaxLotState.objects.filter(organization_id=org_pk).values_list('id', flat=True) ) total = len(property_ids) + len(property_state_ids) + len(taxlot_ids) + len(taxlot_state_ids) if total == 0: result['progress'] = 100 set_cache(deleting_cache_key, result['status'], result) if total == 0: return step = float(chunk_size) / total tasks = [] for del_ids in batch(property_ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_property_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) for del_ids in batch(property_state_ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_property_state_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) for del_ids in batch(taxlot_ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_taxlot_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) for del_ids in batch(taxlot_state_ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_taxlot_state_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)( _finish_delete.subtask([org_pk, deleting_cache_key]))