def apply_changes(data_file, revert=False):
    mapping = {
        'pk': 'pk',
        'ob': 'old_storage_backend',
        'nb': 'new_storage_backend',
        'of': 'old_storage_file_id',
        'nf': 'new_storage_file_id',
        'on': 'old_filename',
        'nn': 'new_filename',
    }
    click.echo('Parsing data...')
    data = defaultdict(list)
    for line in data_file:
        line_data = json.loads(line)
        converted = {
            mapping[k]: v
            for k, v in line_data.viewitems() if k in mapping
        }
        data[line_data['m']].append(converted)

    models = {
        model: len(data[model.__name__])
        for model in StoredFileMixin.__subclasses__()
        if model.__name__ in data and len(data[model.__name__])
    }
    labels = {
        model: cformat(
            'Processing %{blue!}{}%{reset} (%{cyan}{}%{reset} rows)').format(
                model.__name__, total)
        for model, total in models.iteritems()
    }
    max_length = max(len(x) for x in labels.itervalues())
    labels = {
        model: label.ljust(max_length)
        for model, label in labels.iteritems()
    }
    for model, total in sorted(models.items(), key=itemgetter(1)):
        pks = inspect(model).primary_key
        with click.progressbar(data[model.__name__],
                               length=total,
                               label=labels[model],
                               show_percent=True,
                               show_pos=True) as entries:
            for entry in committing_iterator(entries, 1000):
                updates = {}
                key = 'old' if revert else 'new'
                if key + '_storage_backend' in entry:
                    updates[model.storage_backend] = entry[key +
                                                           '_storage_backend']
                if key + '_storage_file_id' in entry:
                    updates[model.storage_file_id] = entry[key +
                                                           '_storage_file_id']
                if key + '_filename' in entry:
                    updates[model.filename] = entry[key + '_filename']
                model.query.filter(Tuple(*pks) == entry['pk']).update(
                    updates, synchronize_session=False)
 def query_chunked(self, model, chunk_size):
     pks = inspect(model).primary_key
     query = base_query = self.make_query(model).order_by(*pks)
     while True:
         row = None
         for row in query.limit(chunk_size):
             yield row
         if row is None:
             # no rows in the query
             break
         query = base_query.filter(Tuple(*pks) > inspect(row).identity)
Beispiel #3
0
    def delete(self, entities_ids):
        """
        Delete entities according to the entities_ids set/list
        :param entities_ids: Expect to be a list/set of entities_ids that'll be delete, i.e. ((1,2), (1,3))
        :return: number of deleted
        """
        deleted_entities = 0

        try:
            self.session.query(self.EntityModel) \
                .filter(Tuple(self.EntityModel.user_id, self.EntityModel.subject_id).in_(entities_ids)) \
                .delete(synchronize_session='fetch')
            self.session.commit()
            deleted_entities = len(entities_ids)
        except Exception as exception:
            self.session.rollback()
            logger.error('Skipping due to unhandled exception')
            logger.error(exception)
        return deleted_entities
Beispiel #4
0
 def delete(self, entities_ids, batch_size=500):
     """
     Delete entities according to the entities_ids set/list
     :param entities_ids: Expect to be a list/set of entities_ids that'll be delete, i.e. ((1,2), (1,3))
     :param batch_size: Number of records to delete in batch
     :return: number of deleted
     """
     deleted_entities = 0
     entities_ids_list = list(entities_ids)
     num_of_iteration = len(entities_ids_list) // batch_size + 1
     for i in range(num_of_iteration):
         logger.info('Starting deleting iteration number %s', i)
         batch_ids = entities_ids_list[i * batch_size:(i + 1) * batch_size]
         try:
             self.session.query(self.EntityModel) \
                 .filter(Tuple(self.EntityModel.user_id, self.EntityModel.subject_id).in_(batch_ids)) \
                 .delete(synchronize_session='fetch')
             self.session.commit()
             deleted_entities += len(batch_ids)
         except Exception as exception:
             self.session.rollback()
             logger.error('Skipping due to unhandled exception')
             logger.error(exception)
     return deleted_entities