def backfill_column(table, old_columns, new_columns): total = table.select().count() q = table.select( table._meta.primary_key, *old_columns ).tuples() idx = 0 modified = 0 start = time.time() with db.transaction() as txn: for values in q: idx += 1 if idx % 10000 == 0: print '[%ss] Backfilling %s %s/%s (wrote %s)' % (time.time() - start, str(table), idx, total, modified) if modified % 1000: txn.commit() obj = {new_column.name: values[i + 1] for i, new_column in enumerate(new_columns)} if not any(obj.values()): continue modified += 1 table.update( **{new_column.name: values[i + 1] for i, new_column in enumerate(new_columns)} ).where(table._meta.primary_key == values[0]).execute() txn.commit() print 'DONE, %s scanned %s written' % (idx, modified)
def backfill_column(self, table, old_columns, new_columns, pkeys=None, cast_funcs=None): total = table.select().count() if not pkeys: pkeys = [table._meta.primary_key] q = table.select(*(pkeys + old_columns)).tuples() idx = 0 modified = 0 start = time.time() with database.transaction() as txn: for values in q: idx += 1 if idx % 10000 == 0: print('[%ss] Backfilling %s %s/%s (wrote %s)' % (time.time() - start, str(table), idx, total, modified)) if modified % 1000: txn.commit() obj = { new_column.name: cast_funcs[new_column](values[i + len(pkeys)]) if cast_funcs and new_column in cast_funcs else values[i] + len(pkeys) for i, new_column in enumerate(new_columns) } if not any(obj.values()): continue modified += 1 table.update( **{ new_column.name: values[i + len(pkeys)] for i, new_column in enumerate(new_columns) }).where( reduce(operator.and_, [(iz == values[i]) for i, iz in enumerate(pkeys)])).execute() txn.commit() print('DONE, %s scanned %s written' % (idx, modified))