def apply_changes(data_file, revert=False): mapping = { 'pk': 'pk', 'ob': 'old_storage_backend', 'nb': 'new_storage_backend', 'of': 'old_storage_file_id', 'nf': 'new_storage_file_id', 'on': 'old_filename', 'nn': 'new_filename', } click.echo('Parsing data...') data = defaultdict(list) for line in data_file: line_data = json.loads(line) converted = { mapping[k]: v for k, v in line_data.viewitems() if k in mapping } data[line_data['m']].append(converted) models = { model: len(data[model.__name__]) for model in StoredFileMixin.__subclasses__() if model.__name__ in data and len(data[model.__name__]) } labels = { model: cformat( 'Processing %{blue!}{}%{reset} (%{cyan}{}%{reset} rows)').format( model.__name__, total) for model, total in models.iteritems() } max_length = max(len(x) for x in labels.itervalues()) labels = { model: label.ljust(max_length) for model, label in labels.iteritems() } for model, total in sorted(models.items(), key=itemgetter(1)): pks = inspect(model).primary_key with click.progressbar(data[model.__name__], length=total, label=labels[model], show_percent=True, show_pos=True) as entries: for entry in committing_iterator(entries, 1000): updates = {} key = 'old' if revert else 'new' if key + '_storage_backend' in entry: updates[model.storage_backend] = entry[key + '_storage_backend'] if key + '_storage_file_id' in entry: updates[model.storage_file_id] = entry[key + '_storage_file_id'] if key + '_filename' in entry: updates[model.filename] = entry[key + '_filename'] model.query.filter(Tuple(*pks) == entry['pk']).update( updates, synchronize_session=False)
def query_chunked(self, model, chunk_size): pks = inspect(model).primary_key query = base_query = self.make_query(model).order_by(*pks) while True: row = None for row in query.limit(chunk_size): yield row if row is None: # no rows in the query break query = base_query.filter(Tuple(*pks) > inspect(row).identity)
def delete(self, entities_ids): """ Delete entities according to the entities_ids set/list :param entities_ids: Expect to be a list/set of entities_ids that'll be delete, i.e. ((1,2), (1,3)) :return: number of deleted """ deleted_entities = 0 try: self.session.query(self.EntityModel) \ .filter(Tuple(self.EntityModel.user_id, self.EntityModel.subject_id).in_(entities_ids)) \ .delete(synchronize_session='fetch') self.session.commit() deleted_entities = len(entities_ids) except Exception as exception: self.session.rollback() logger.error('Skipping due to unhandled exception') logger.error(exception) return deleted_entities
def delete(self, entities_ids, batch_size=500): """ Delete entities according to the entities_ids set/list :param entities_ids: Expect to be a list/set of entities_ids that'll be delete, i.e. ((1,2), (1,3)) :param batch_size: Number of records to delete in batch :return: number of deleted """ deleted_entities = 0 entities_ids_list = list(entities_ids) num_of_iteration = len(entities_ids_list) // batch_size + 1 for i in range(num_of_iteration): logger.info('Starting deleting iteration number %s', i) batch_ids = entities_ids_list[i * batch_size:(i + 1) * batch_size] try: self.session.query(self.EntityModel) \ .filter(Tuple(self.EntityModel.user_id, self.EntityModel.subject_id).in_(batch_ids)) \ .delete(synchronize_session='fetch') self.session.commit() deleted_entities += len(batch_ids) except Exception as exception: self.session.rollback() logger.error('Skipping due to unhandled exception') logger.error(exception) return deleted_entities