def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: return {'status': 'warning', 'message': 'raw data already saved'} if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 prog_key = get_prog_key('save_raw_data', file_pk) tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) return {'status': 'success'}
def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs): """Deletes all BuildingSnapshot instances within an organization :param org_pk: int, str, the organization pk """ qs = BuildingSnapshot.objects.filter(super_organization=org_pk) ids = qs.values_list('id', flat=True) deleting_cache_key = get_prog_key( 'delete_organization_buildings', org_pk ) if not ids: cache.set(deleting_cache_key, 100) return # delete the canonical buildings can_ids = CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk ).values_list('id', flat=True) _delete_canonical_buildings.delay(can_ids) step = float(chunk_size) / len(ids) cache.set(deleting_cache_key, 0) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)(finish_delete.subtask([org_pk]))
def split_rows(self, chunk_size, callback, *args, **kwargs): """Break up the CSV into smaller pieces for parallel processing.""" row_num = 0 for batch in utils.batch(self.next(), chunk_size): row_num += len(batch) callback(batch, *args, **kwargs) return row_num
def _delete_canonical_buildings(ids, chunk_size=300): """deletes CanonicalBuildings :param ids: list of ids to delete from CanonicalBuilding :param chunk_size: number of CanonicalBuilding instances to delete per iteration """ for del_ids in batch(ids, chunk_size): CanonicalBuilding.objects.filter(pk__in=del_ids).delete()
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" result = {'status': 'success', 'progress': 100} prog_key = get_prog_key('save_raw_data', file_pk) try: import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: result['status'] = 'warning' result['message'] = 'Raw data already saved' cache.set(prog_key, result) return result if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) except StopIteration: result['status'] = 'error' result['message'] = 'StopIteration Exception' result['stacktrace'] = traceback.format_exc() except Error as e: result['status'] = 'error' result['message'] = 'File Content Error: ' + e.message result['stacktrace'] = traceback.format_exc() except KeyError as e: result['status'] = 'error' result['message'] = 'Invalid Column Name: "' + e.message + '"' result['stacktrace'] = traceback.format_exc() except Exception as e: result['status'] = 'error' result['message'] = 'Unhandled Error: ' + e.message result['stacktrace'] = traceback.format_exc() cache.set(prog_key, result) return result
def log_deleted_buildings(ids, user_pk, chunk_size=300): """ AudigLog logs a delete entry for the canonical building or each BuildingSnapshot in ``ids`` """ for del_ids in batch(ids, chunk_size): for b in BuildingSnapshot.objects.filter(pk__in=del_ids): AuditLog.objects.create(user_id=user_pk, content_object=b.canonical_building, organization=b.super_organization, action='delete_building', action_note='Deleted building.')
def log_deleted_buildings(ids, user_pk, chunk_size=300): """ AuditLog logs a delete entry for the canonical building or each BuildingSnapshot in ``ids`` """ for del_ids in batch(ids, chunk_size): for b in BuildingSnapshot.objects.filter(pk__in=del_ids): AuditLog.objects.create( user_id=user_pk, content_object=b.canonical_building, organization=b.super_organization, action='delete_building', action_note='Deleted building.' )
def _map_data(file_pk, *args, **kwargs): """Get all of the raw data and process it using appropriate mapping. @lock_and_track returns a progress_key :param file_pk: int, the id of the import_file we're working with. """ import_file = ImportFile.objects.get(pk=file_pk) # Don't perform this task if it's already been completed. if import_file.mapping_done: prog_key = get_prog_key('map_data', file_pk) cache.set(prog_key, 100) return {'status': 'warning', 'message': 'mapping already complete'} # If we haven't finished saving, we shouldn't proceed with mapping # Re-queue this task. if not import_file.raw_save_done: map_data.apply_async(args=[file_pk], countdown=60, expires=120) return {'status': 'error', 'message': 'waiting for raw data save.'} source_type_dict = { 'Portfolio Raw': PORTFOLIO_RAW, 'Assessed Raw': ASSESSED_RAW, 'Green Button Raw': GREEN_BUTTON_RAW, } source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW) qs = BuildingSnapshot.objects.filter( import_file=import_file, source_type=source_type, ).iterator() prog_key = get_prog_key('map_data', file_pk) tasks = [] for chunk in batch(qs, 100): serialized_data = [obj.extra_data for obj in chunk] tasks.append(map_row_chunk.subtask( (serialized_data, file_pk, source_type, prog_key) )) tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_mapping.subtask([file_pk])) else: finish_mapping.task(file_pk) return {'status': 'success'}