def delete_model(self, request, obj): """ On delete, update full text metadata of related datasets. """ datasets = list(obj.datasets.all()) obj.delete() for dataset in datasets: dataset.update_full_text() solr.commit(settings.SOLR_DATASETS_CORE)
def save_model(self, request, obj, form, change): """ On save, update full text metadata of related datasets. """ if change: datasets = list(obj.datasets.all()) obj.save() for dataset in datasets: dataset.update_full_text(commit=False) solr.commit(settings.SOLR_DATASETS_CORE) else: obj.save()
def save(self, *args, **kwargs): super(User, self).save(*args, **kwargs) if self.first_name != self.__original_first_name or \ self.last_name != self.__original_last_name or \ self.email != self.__original_email: if self.datasets.count(): for dataset in self.datasets.all(): dataset.update_full_text(commit=False) solr.commit(settings.SOLR_DATASETS_CORE) self.__original_first_name = self.first_name self.__original_last_name = self.last_name self.__original_email = self.email
def run(self, dataset_slug, *args, **kwargs): """ Execute reindex. """ from panda.models import Dataset log = logging.getLogger(self.name) log.info("Beginning reindex, dataset_slug: %s" % dataset_slug) dataset = Dataset.objects.get(slug=dataset_slug) task_status = dataset.current_task task_status.begin("Preparing to reindex") if self.is_aborted(): task_status.abort("Aborted during preparation") log.warning("Reindex aborted, dataset_slug: %s" % dataset_slug) return read_buffer = [] add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value("PERF", "TASK_THROTTLE") i = 0 while i < dataset.row_count: if not read_buffer: query = "dataset_slug: %s" % (dataset.slug) response = solr.query(settings.SOLR_DATA_CORE, query, limit=SOLR_READ_BUFFER_SIZE, offset=i) read_buffer = response["response"]["docs"] data = read_buffer.pop(0) row = json.loads(data["data"]) new_data = utils.solr.make_data_row(dataset, row) new_data["id"] = data["id"] new_data["data_upload_id"] = data["data_upload_id"] new_data = data_typer(new_data, row) add_buffer.append(new_data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update("%.0f%% complete" % floor(float(i) / float(dataset.row_count) * 100)) if self.is_aborted(): task_status.abort( "Aborted after reindexing %.0f%%" % floor(float(i) / float(dataset.row_count) * 100) ) log.warning("Reindex aborted, dataset_slug: %s" % dataset_slug) return time.sleep(throttle) i += 1 if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update("100% complete") # Refresh dataset dataset = Dataset.objects.get(slug=dataset_slug) dataset.column_schema = data_typer.schema dataset.save() log.info("Finished reindex, dataset_slug: %s" % dataset_slug) return data_typer
def run(self, dataset_slug, *args, **kwargs): """ Execute reindex. """ from panda.models import Dataset log = logging.getLogger(self.name) log.info('Beginning reindex, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Reindexing failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return task_status = dataset.current_task task_status.begin(ugettext('Preparing to reindex')) if self.is_aborted(): task_status.abort(ugettext('Aborted during preparation')) log.warning('Reindex aborted, dataset_slug: %s' % dataset_slug) return read_buffer = [] add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') i = 0 while i < dataset.row_count: if not read_buffer: query = 'dataset_slug: %s' % (dataset.slug) response = solr.query(settings.SOLR_DATA_CORE, query, limit=SOLR_READ_BUFFER_SIZE, offset=i) read_buffer = response['response']['docs'] data = read_buffer.pop(0) row = json.loads(data['data']) new_data = utils.solr.make_data_row(dataset, row) new_data['id'] = data['id'] new_data['data_upload_id'] = data['data_upload_id'] new_data = data_typer(new_data, row) add_buffer.append(new_data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update(ugettext('%.0f%% complete') % floor(float(i) / float(dataset.row_count) * 100)) if self.is_aborted(): task_status.abort(ugettext('Aborted after reindexing %.0f%%') % floor(float(i) / float(dataset.row_count) * 100)) log.warning('Reindex aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) i += 1 if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update(ugettext('100% complete')) # Refresh dataset try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Reindexing could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return dataset.column_schema = data_typer.schema dataset.save() log.info('Finished reindex, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, *args, **kwargs): """ Execute reindex. """ from panda.models import Dataset log = logging.getLogger(self.name) log.info('Beginning reindex, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Reindexing failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return task_status = dataset.current_task task_status.begin(ugettext('Preparing to reindex')) if self.is_aborted(): task_status.abort(ugettext('Aborted during preparation')) log.warning('Reindex aborted, dataset_slug: %s' % dataset_slug) return read_buffer = [] add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') i = 0 while i < dataset.row_count: if not read_buffer: query = 'dataset_slug: %s' % (dataset.slug) response = solr.query(settings.SOLR_DATA_CORE, query, limit=SOLR_READ_BUFFER_SIZE, offset=i) read_buffer = response['response']['docs'] data = read_buffer.pop(0) row = json.loads(data['data']) new_data = utils.solr.make_data_row(dataset, row) new_data['id'] = data['id'] new_data['data_upload_id'] = data['data_upload_id'] new_data = data_typer(new_data, row) add_buffer.append(new_data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update( ugettext('%.0f%% complete') % floor(float(i) / float(dataset.row_count) * 100)) if self.is_aborted(): task_status.abort( ugettext('Aborted after reindexing %.0f%%') % floor(float(i) / float(dataset.row_count) * 100)) log.warning('Reindex aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) i += 1 if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update(ugettext('100% complete')) # Refresh dataset try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Reindexing could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return dataset.column_schema = data_typer.schema dataset.save() log.info('Finished reindex, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) dataset = Dataset.objects.get(slug=dataset_slug) upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin('Preparing to import') book = load_workbook(upload.get_path(), use_iterators=True) sheet = book.get_active_sheet() row_count = sheet.get_highest_row() add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') for i, row in enumerate(sheet.iter_rows()): # Skip header if i == 0: continue values = [] for c in row: value = c.internal_value if value.__class__ is datetime.datetime: value = utils.xlsx.normalize_date(value) elif value.__class__ is float: if value % 1 == 0: value = int(value) if value.__class__ in (datetime.datetime, datetime.date, datetime.time): value = value.isoformat() values.append(value) external_id = None if external_id_field_index is not None: external_id = values[external_id_field_index] data = utils.solr.make_data_row(dataset, values, external_id=external_id) data = data_typer(data, values) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update('%.0f%% complete' % floor(float(i) / float(row_count) * 100)) if self.is_aborted(): task_status.abort('Aborted after importing %.0f%%' % floor(float(i) / float(row_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update('100% complete') # Refresh dataset from database so there is no chance of crushing changes made since the task started dataset = Dataset.objects.get(slug=dataset_slug) if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Import failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin(ugettext('Preparing to import')) book = xlrd.open_workbook(upload.get_path(), on_demand=True) sheet = book.sheet_by_index(0) row_count = sheet.nrows add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') for i in range(1, row_count): values = sheet.row_values(i) types = sheet.row_types(i) normal_values = [] for v, t in zip(values, types): if t == xlrd.biffh.XL_CELL_DATE: v = utils.xls.normalize_date(v, book.datemode) elif t == xlrd.biffh.XL_CELL_NUMBER: if v % 1 == 0: v = int(v) normal_values.append(unicode(v)) external_id = None if external_id_field_index is not None: external_id = values[external_id_field_index] data = utils.solr.make_data_row(dataset, normal_values, data_upload=upload, external_id=external_id) data = data_typer(data, normal_values) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update(ugettext('%.0f%% complete') % floor(float(i) / float(row_count) * 100)) if self.is_aborted(): task_status.abort(ugettext('Aborted after importing %.0f%%') % floor(float(i) / float(row_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update(ugettext('100% complete')) # Refresh dataset from database so there is no chance of crushing changes made since the task started try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Import could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) dataset = Dataset.objects.get(slug=dataset_slug) upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin('Preparing to import') line_count = self._count_lines(upload.get_path()) if self.is_aborted(): task_status.abort('Aborted during preperation') log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return f = open(upload.get_path(), 'r') reader = CSVKitReader(f, encoding=upload.encoding, **upload.dialect_as_parameters()) reader.next() add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') i = 0 while True: # The row number which is about to be read, for error handling and indexing i += 1 try: row = reader.next() except StopIteration: i -= 1 break except UnicodeDecodeError: raise DataImportError('This CSV file contains characters that are not %s encoded in or after row %i. You need to re-upload this file and input the correct encoding in order to import data from this file.' % (upload.encoding, i)) external_id = None if external_id_field_index is not None: external_id = row[external_id_field_index] data = utils.solr.make_data_row(dataset, row, external_id=external_id) data = data_typer(data, row) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update('%.0f%% complete (estimated)' % floor(float(i) / float(line_count) * 100)) if self.is_aborted(): task_status.abort('Aborted after importing %.0f%% (estimated)' % floor(float(i) / float(line_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) f.close() task_status.update('100% complete') # Refresh dataset from database so there is no chance of crushing changes made since the task started dataset = Dataset.objects.get(slug=dataset_slug) if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) dataset = Dataset.objects.get(slug=dataset_slug) upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task self.task_start(task_status, 'Preparing to import') book = xlrd.open_workbook(upload.get_path(), on_demand=True) sheet = book.sheet_by_index(0) row_count = sheet.nrows add_buffer = [] for i in range(1, row_count): values = sheet.row_values(i) types = sheet.row_types(i) normal_values = [] for v, t in zip(values, types): if t == xlrd.biffh.XL_CELL_DATE: v = utils.xls.normalize_date(v, book.datemode) elif t == xlrd.biffh.XL_CELL_NUMBER: if v % 1 == 0: v = int(v) normal_values.append(unicode(v)) external_id = None if external_id_field_index is not None: external_id = values[external_id_field_index] data = utils.solr.make_data_row(dataset, values, external_id=external_id) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.message = '%.0f%% complete' % floor(float(i) / float(row_count) * 100) task_status.save() if self.is_aborted(): self.task_abort(self.task_status, 'Aborted after importing %.0f%%' % floor(float(i) / float(row_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) self.task_update(task_status, '100% complete') # Refresh dataset from database so there is no chance of crushing changes made since the task started dataset = Dataset.objects.get(slug=dataset_slug) if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug)
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Import failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin(ugettext('Preparing to import')) book = xlrd.open_workbook(upload.get_path(), on_demand=True) sheet = book.sheet_by_index(0) row_count = sheet.nrows add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') for i in range(1, row_count): values = sheet.row_values(i) types = sheet.row_types(i) normal_values = [] for v, t in zip(values, types): if t == xlrd.biffh.XL_CELL_DATE: v = utils.xls.normalize_date(v, book.datemode) elif t == xlrd.biffh.XL_CELL_NUMBER: if v % 1 == 0: v = int(v) normal_values.append(unicode(v)) external_id = None if external_id_field_index is not None: external_id = values[external_id_field_index] data = utils.solr.make_data_row(dataset, normal_values, data_upload=upload, external_id=external_id) data = data_typer(data, normal_values) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update( ugettext('%.0f%% complete') % floor(float(i) / float(row_count) * 100)) if self.is_aborted(): task_status.abort( ugettext('Aborted after importing %.0f%%') % floor(float(i) / float(row_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update(ugettext('100% complete')) # Refresh dataset from database so there is no chance of crushing changes made since the task started try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Import could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Import failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin('Preparing to import') line_count = self._count_lines(upload.get_path()) if self.is_aborted(): task_status.abort('Aborted during preperation') log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return f = open(upload.get_path(), 'r') reader = CSVKitReader(f, encoding=upload.encoding, **upload.dialect_as_parameters()) reader.next() add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') i = 0 while True: # The row number which is about to be read, for error handling and indexing i += 1 try: row = reader.next() except StopIteration: i -= 1 break except UnicodeDecodeError: raise DataImportError( 'This CSV file contains characters that are not %s encoded in or after row %i. You need to re-upload this file and input the correct encoding in order to import data from this file.' % (upload.encoding, i)) external_id = None if external_id_field_index is not None: external_id = row[external_id_field_index] data = utils.solr.make_data_row(dataset, row, data_upload=upload, external_id=external_id) data = data_typer(data, row) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update('%.0f%% complete (estimated)' % floor(float(i) / float(line_count) * 100)) if self.is_aborted(): task_status.abort( 'Aborted after importing %.0f%% (estimated)' % floor(float(i) / float(line_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) f.close() task_status.update('100% complete') # Refresh dataset from database so there is no chance of crushing changes made since the task started try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning( 'Import could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer
def run(self, dataset_slug, upload_id, external_id_field_index=None, *args, **kwargs): """ Execute import. """ from panda.models import Dataset, DataUpload log = logging.getLogger(self.name) log.info('Beginning import, dataset_slug: %s' % dataset_slug) try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Import failed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return upload = DataUpload.objects.get(id=upload_id) task_status = dataset.current_task task_status.begin('Preparing to import') book = load_workbook(upload.get_path(), use_iterators=True) sheet = book.get_active_sheet() row_count = sheet.get_highest_row() add_buffer = [] data_typer = DataTyper(dataset.column_schema) throttle = config_value('PERF', 'TASK_THROTTLE') for i, row in enumerate(sheet.iter_rows()): # Skip header if i == 0: continue values = [] for c in row: value = c.internal_value if value.__class__ is datetime.datetime: value = utils.xlsx.normalize_date(value) elif value.__class__ is float: if value % 1 == 0: value = int(value) if value.__class__ in (datetime.datetime, datetime.date, datetime.time): value = value.isoformat() values.append(value) external_id = None if external_id_field_index is not None: external_id = values[external_id_field_index] data = utils.solr.make_data_row(dataset, values, data_upload=upload, external_id=external_id) data = data_typer(data, values) add_buffer.append(data) if i % SOLR_ADD_BUFFER_SIZE == 0: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] task_status.update('%.0f%% complete' % floor(float(i) / float(row_count) * 100)) if self.is_aborted(): task_status.abort('Aborted after importing %.0f%%' % floor(float(i) / float(row_count) * 100)) log.warning('Import aborted, dataset_slug: %s' % dataset_slug) return time.sleep(throttle) if add_buffer: solr.add(settings.SOLR_DATA_CORE, add_buffer) add_buffer = [] solr.commit(settings.SOLR_DATA_CORE) task_status.update('100% complete') # Refresh dataset from database so there is no chance of crushing changes made since the task started try: dataset = Dataset.objects.get(slug=dataset_slug) except Dataset.DoesNotExist: log.warning('Import could not be completed due to Dataset being deleted, dataset_slug: %s' % dataset_slug) return if not dataset.row_count: dataset.row_count = i else: dataset.row_count += i dataset.column_schema = data_typer.schema dataset.save() # Refres upload = DataUpload.objects.get(id=upload_id) upload.imported = True upload.save() log.info('Finished import, dataset_slug: %s' % dataset_slug) return data_typer