def backfill_column(table, old_columns, new_columns):
    total = table.select().count()

    q = table.select(
        table._meta.primary_key,
        *old_columns
    ).tuples()

    idx = 0
    modified = 0

    start = time.time()
    with db.transaction() as txn:
        for values in q:
            idx += 1

            if idx % 10000 == 0:
                print '[%ss] Backfilling %s %s/%s (wrote %s)' % (time.time() - start, str(table), idx, total, modified)

            if modified % 1000:
                txn.commit()

            obj = {new_column.name: values[i + 1] for i, new_column in enumerate(new_columns)}
            if not any(obj.values()):
                continue

            modified += 1
            table.update(
                **{new_column.name: values[i + 1] for i, new_column in enumerate(new_columns)}
            ).where(table._meta.primary_key == values[0]).execute()

    txn.commit()
    print 'DONE, %s scanned %s written' % (idx, modified)
Example #2
0
    def backfill_column(self,
                        table,
                        old_columns,
                        new_columns,
                        pkeys=None,
                        cast_funcs=None):
        total = table.select().count()

        if not pkeys:
            pkeys = [table._meta.primary_key]

        q = table.select(*(pkeys + old_columns)).tuples()

        idx = 0
        modified = 0

        start = time.time()
        with database.transaction() as txn:
            for values in q:
                idx += 1

                if idx % 10000 == 0:
                    print('[%ss] Backfilling %s %s/%s (wrote %s)' %
                          (time.time() - start, str(table), idx, total,
                           modified))

                if modified % 1000:
                    txn.commit()

                obj = {
                    new_column.name:
                    cast_funcs[new_column](values[i + len(pkeys)])
                    if cast_funcs and new_column in cast_funcs else values[i] +
                    len(pkeys)
                    for i, new_column in enumerate(new_columns)
                }
                if not any(obj.values()):
                    continue

                modified += 1
                table.update(
                    **{
                        new_column.name: values[i + len(pkeys)]
                        for i, new_column in enumerate(new_columns)
                    }).where(
                        reduce(operator.and_,
                               [(iz == values[i])
                                for i, iz in enumerate(pkeys)])).execute()

        txn.commit()
        print('DONE, %s scanned %s written' % (idx, modified))