def needsRecovery(self):
        """Do we have holding tables with recoverable data from previous run?

        Returns Boolean answer.
        """

        cur = cursor()

        # If there are any holding tables to be poured into their source
        # tables, there must at least be one for the last table that pour()
        # processes.
        last_holding_table = self.getRawHoldingTableName(self.tables[-1])
        if not postgresql.have_table(cur, last_holding_table):
            return False

        # If the first table in our list also still exists, and it still has
        # its new_id column, then the pouring process had not begun yet.
        # Assume the data was not ready for pouring.
        first_holding_table = self.getRawHoldingTableName(self.tables[0])
        if postgresql.table_has_column(cur, first_holding_table, 'new_id'):
            self.logger.info(
                "Previous run aborted too early for recovery; redo all")
            return False

        self.logger.info("Recoverable data found")
        return True
    def needsRecovery(self):
        """Do we have holding tables with recoverable data from previous run?

        Returns Boolean answer.
        """

        cur = cursor()

        # If there are any holding tables to be poured into their source
        # tables, there must at least be one for the last table that pour()
        # processes.
        last_holding_table = self.getRawHoldingTableName(self.tables[-1])
        if not postgresql.have_table(cur, last_holding_table):
            return False

        # If the first table in our list also still exists, and it still has
        # its new_id column, then the pouring process had not begun yet.
        # Assume the data was not ready for pouring.
        first_holding_table = self.getRawHoldingTableName(self.tables[0])
        if postgresql.table_has_column(cur, first_holding_table, 'new_id'):
            self.logger.info(
                "Previous run aborted too early for recovery; redo all")
            return False

        self.logger.info("Recoverable data found")
        return True
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted"
                % self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..."
                         % (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(
                cur, holding_table_unquoted, 'new_id')

            self._pourTable(
                holding_table, table, has_new_id, transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug(
                "Pouring %s took %.3f seconds."
                % (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted" %
                self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..." %
                             (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(cur,
                                                     holding_table_unquoted,
                                                     'new_id')

            self._pourTable(holding_table, table, has_new_id,
                            transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug("Pouring %s took %.3f seconds." %
                              (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)