def __init__(self, con): self.swift_enabled = getFeatureFlag('librarian.swift.enabled') or False self.con = con self.index = 1 self.total_deleted = 0 log.info("Deleting unreferenced LibraryFileContents.") cur = con.cursor() drop_tables(cur, "UnreferencedLibraryFileContent") cur.execute(""" CREATE TEMPORARY TABLE UnreferencedLibraryFileContent ( id bigserial PRIMARY KEY, content bigint UNIQUE) """) cur.execute(""" INSERT INTO UnreferencedLibraryFileContent (content) SELECT DISTINCT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.content IS NULL """) cur.execute(""" SELECT COALESCE(max(id), 0) FROM UnreferencedLibraryFileContent """) self.max_id = cur.fetchone()[0] log.info("%d unreferenced LibraryFileContents to remove." % self.max_id)
def __init__(self, con): self.con = con self.index = 1 self.total_deleted = 0 cur = con.cursor() drop_tables(cur, "UnreferencedLibraryFileContent") cur.execute(""" CREATE TEMPORARY TABLE UnreferencedLibraryFileContent ( id serial PRIMARY KEY, content integer UNIQUE) """) cur.execute(""" INSERT INTO UnreferencedLibraryFileContent (content) SELECT DISTINCT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.content IS NULL """) cur.execute(""" SELECT COALESCE(max(id), 0) FROM UnreferencedLibraryFileContent """) self.max_id = cur.fetchone()[0] log.debug( "%d unreferenced LibraryFileContent rows to remove." % self.max_id)
def __init__(self, con): self.con = con # Database connection to use self.total_deleted = 0 # Running total self.index = 1 log.info("Deleting unreferenced LibraryFileAliases") cur = con.cursor() drop_tables(cur, "ReferencedLibraryFileAlias") cur.execute(""" CREATE TEMPORARY TABLE ReferencedLibraryFileAlias ( alias integer) """) # Determine what columns link to LibraryFileAlias # references = [(table, column), ...] references = [ tuple(ref[:2]) for ref in listReferences(cur, 'libraryfilealias', 'id') if ref[0] != 'libraryfiledownloadcount' ] assert len(references) > 10, ( 'Database introspection returned nonsense') log.debug( "Found %d columns referencing LibraryFileAlias", len(references)) # Find all relevant LibraryFileAlias references and fill in # ReferencedLibraryFileAlias for table, column in references: cur.execute(""" INSERT INTO ReferencedLibraryFileAlias SELECT LibraryFileAlias.id FROM LibraryFileAlias, %(table)s WHERE LibraryFileAlias.id = %(table)s.%(column)s """ % { 'table': quoteIdentifier(table), 'column': quoteIdentifier(column)}) log.debug("%s.%s references %d LibraryFileContent rows." % ( table, column, cur.rowcount)) con.commit() log.debug("Calculating unreferenced LibraryFileAlias set.") drop_tables(cur, "UnreferencedLibraryFileAlias") cur.execute(""" CREATE TEMPORARY TABLE UnreferencedLibraryFileAlias ( id serial PRIMARY KEY, alias integer UNIQUE) """) # Calculate the set of unreferenced LibraryFileAlias. # We also exclude all unexpired records - we don't remove them # even if they are unlinked. We currently don't remove stuff # until it has been expired for more than one week, but we will # change this if disk space becomes short and it actually will # make a noticeable difference. We handle excluding recently # created content here rather than earlier when creating the # ReferencedLibraryFileAlias table to handle uploads going on # while this script is running. cur.execute(""" INSERT INTO UnreferencedLibraryFileAlias (alias) SELECT id AS alias FROM LibraryFileAlias WHERE content IS NULL OR ((expires IS NULL OR expires < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '1 week' ) AND date_created < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '1 week' ) EXCEPT SELECT alias FROM ReferencedLibraryFileAlias """) con.commit() drop_tables(cur, "ReferencedLibraryFileAlias") cur.execute( "SELECT COALESCE(max(id),0) FROM UnreferencedLibraryFileAlias") self.max_id = cur.fetchone()[0] log.debug( "%d unreferenced LibraryFileContent to remove." % self.max_id) con.commit()
def pour(self, transaction_manager): """Pour data from holding tables back into source tables. Rows in the holding table that have their new_id set to null are skipped. The transaction manager is committed and re-opened after every batch run. Batch sizes are dynamically adjusted to meet the stated time goal. """ if self.last_extracted_table is None: if not self.needsRecovery(): raise AssertionError("Can't pour: no tables extracted") elif self.last_extracted_table != len(self.tables) - 1: raise AssertionError( "Not safe to pour: last table '%s' was not extracted" % self.tables[-1]) cur = self._commit(transaction_manager) # Don't let postgres revert to slow sequential scans while we pour. # That might otherwise happen to the holding table as its vital "id" # index degrades with the removal of rows. postgresql.allow_sequential_scans(cur, False) # Main loop: for each of the source tables being copied, see if # there's a matching holding table. If so, prepare it, pour it back # into the source table, and drop. for table in self.tables: holding_table_unquoted = self.getRawHoldingTableName(table) if not postgresql.have_table(cur, holding_table_unquoted): # We know we're in a suitable state for pouring. If this # table does not exist, it must be because it's been poured # out completely and dropped in an earlier instance of this # loop, before the failure we're apparently recovering from. continue holding_table = self.getHoldingTableName(table) self.logger.info("Pouring %s back into %s..." % (holding_table, table)) tablestarttime = time.time() has_new_id = postgresql.table_has_column( cur, holding_table_unquoted, 'new_id') self._pourTable( holding_table, table, has_new_id, transaction_manager) # Drop holding table. It may still contain rows with id set to # null. Those must not be poured. postgresql.drop_tables(cursor(), holding_table) self.logger.debug( "Pouring %s took %.3f seconds." % (holding_table, time.time() - tablestarttime)) cur = self._commit(transaction_manager) # In future, let the database perform sequential scans again if it # decides that's best. postgresql.allow_sequential_scans(cur, True)
def dropHoldingTables(self): """Drop any holding tables that may exist for this MultiTableCopy.""" holding_tables = [self.getHoldingTableName(table) for table in self.tables] postgresql.drop_tables(cursor(), holding_tables)
def remove_translations(logger=None, submitter=None, reviewer=None, reject_license=False, ids=None, potemplate=None, language_code=None, not_language=False, is_current_ubuntu=None, is_current_upstream=None, msgid_singular=None, origin=None): """Remove specified translation messages. :param logger: Optional logger to write output to. :param submitter: Delete only messages submitted by this person. :param reviewer: Delete only messages reviewed by this person. :param reject_license: Delete only messages submitted by persons who have rejected the licensing agreement. :param ids: Delete only messages with these `TranslationMessage` ids. :param potemplate: Delete only messages in this template. :param language_code: Language code. Depending on `not_language`, either delete messages in this language or spare messages in this language that would otherwise be deleted. :param not_language: Whether to spare (True) or delete (False) messages in this language. :param is_current_ubuntu: Delete only messages with this is_current_ubuntu value. :param is_current_upstream: Delete only messages with this is_current_upstream value. :param msgid_singular: Delete only messages with this singular msgid. :param origin: Delete only messages with this `TranslationOrigin` code. :return: Number of messages deleted. """ joins = set() conditions = set() if submitter is not None: conditions.add( 'TranslationMessage.submitter = %s' % sqlvalues(submitter)) if reviewer is not None: conditions.add( 'TranslationMessage.reviewer = %s' % sqlvalues(reviewer)) if reject_license: joins.add('TranslationRelicensingAgreement') conditions.add( 'TranslationMessage.submitter = ' 'TranslationRelicensingAgreement.person') conditions.add( 'NOT TranslationRelicensingAgreement.allow_relicensing') if ids is not None: conditions.add('TranslationMessage.id IN %s' % sqlvalues(ids)) if potemplate is not None: joins.add('TranslationTemplateItem') conditions.add( 'TranslationTemplateItem.potmsgset ' ' = TranslationMessage.potmsgset') conditions.add( 'TranslationTemplateItem.potemplate = %s' % sqlvalues(potemplate)) if language_code is not None: joins.add('Language') conditions.add('Language.id = TranslationMessage.language') language_match = compose_language_match(language_code) if not_language: conditions.add('NOT (%s)' % language_match) else: conditions.add(language_match) add_bool_match( conditions, 'TranslationMessage.is_current_ubuntu', is_current_ubuntu) add_bool_match( conditions, 'TranslationMessage.is_current_upstream', is_current_upstream) if msgid_singular is not None: joins.add('POTMsgSet') conditions.add('POTMsgSet.id = TranslationMessage.potmsgset') joins.add('POMsgID') conditions.add('POMsgID.id = POTMsgSet.msgid_singular') conditions.add('POMsgID.msgid = %s' % sqlvalues(msgid_singular)) if origin is not None: conditions.add('TranslationMessage.origin = %s' % sqlvalues(origin)) assert len(conditions) > 0, "That would delete ALL translations, maniac!" cur = cursor() drop_tables(cur, 'temp_doomed_message') joins.add('TranslationMessage') from_text = ', '.join(joins) where_text = ' AND\n '.join(conditions) warn_about_deleting_current_messages(cur, from_text, where_text, logger) # Keep track of messages we're going to delete. # Don't bother indexing this. We'd more likely end up optimizing # away the operator's "oh-shit-ctrl-c" time than helping anyone. query = """ CREATE TEMP TABLE temp_doomed_message AS SELECT TranslationMessage.id, NULL::integer AS imported_message FROM %s WHERE %s """ % (from_text, where_text) cur.execute(query) # Note which shared messages are masked by the messages we're # going to delete. We'll be making those the current ones. query = """ UPDATE temp_doomed_message SET imported_message = Imported.id FROM TranslationMessage Doomed, TranslationMessage Imported WHERE Doomed.id = temp_doomed_message.id AND -- Is alternative for the message we're about to delete. Imported.potmsgset = Doomed.potmsgset AND Imported.language = Doomed.language AND Imported.potemplate IS NULL AND Doomed.potemplate IS NULL AND -- Is used upstream. Imported.is_current_upstream IS TRUE AND -- Was masked by the message we're about to delete. Doomed.is_current_ubuntu IS TRUE AND Imported.id <> Doomed.id """ cur.execute(query) if logger is not None and logger.getEffectiveLevel() <= logging.DEBUG: # Dump sample of doomed messages for debugging purposes. cur.execute(""" SELECT * FROM temp_doomed_message ORDER BY id LIMIT 20 """) rows = cur.fetchall() if cur.rowcount > 0: logger.debug("Sample of messages to be deleted follows.") logger.debug("%10s %10s" % ("[message]", "[unmasks]")) for (doomed, unmasked) in rows: if unmasked is None: unmasked = '--' logger.debug("%10s %10s" % (doomed, unmasked)) cur.execute(""" DELETE FROM TranslationMessage USING temp_doomed_message WHERE TranslationMessage.id = temp_doomed_message.id """) rows_deleted = cur.rowcount if logger is not None: if rows_deleted > 0: logger.info("Deleting %d message(s)." % rows_deleted) else: logger.warn("No rows match; not deleting anything.") cur.execute(""" UPDATE TranslationMessage SET is_current_ubuntu = TRUE FROM temp_doomed_message WHERE TranslationMessage.id = temp_doomed_message.imported_message """) if cur.rowcount > 0 and logger is not None: logger.debug("Unmasking %d imported message(s)." % cur.rowcount) drop_tables(cur, 'temp_doomed_message') return rows_deleted
def pour(self, transaction_manager): """Pour data from holding tables back into source tables. Rows in the holding table that have their new_id set to null are skipped. The transaction manager is committed and re-opened after every batch run. Batch sizes are dynamically adjusted to meet the stated time goal. """ if self.last_extracted_table is None: if not self.needsRecovery(): raise AssertionError("Can't pour: no tables extracted") elif self.last_extracted_table != len(self.tables) - 1: raise AssertionError( "Not safe to pour: last table '%s' was not extracted" % self.tables[-1]) cur = self._commit(transaction_manager) # Don't let postgres revert to slow sequential scans while we pour. # That might otherwise happen to the holding table as its vital "id" # index degrades with the removal of rows. postgresql.allow_sequential_scans(cur, False) # Main loop: for each of the source tables being copied, see if # there's a matching holding table. If so, prepare it, pour it back # into the source table, and drop. for table in self.tables: holding_table_unquoted = self.getRawHoldingTableName(table) if not postgresql.have_table(cur, holding_table_unquoted): # We know we're in a suitable state for pouring. If this # table does not exist, it must be because it's been poured # out completely and dropped in an earlier instance of this # loop, before the failure we're apparently recovering from. continue holding_table = self.getHoldingTableName(table) self.logger.info("Pouring %s back into %s..." % (holding_table, table)) tablestarttime = time.time() has_new_id = postgresql.table_has_column(cur, holding_table_unquoted, 'new_id') self._pourTable(holding_table, table, has_new_id, transaction_manager) # Drop holding table. It may still contain rows with id set to # null. Those must not be poured. postgresql.drop_tables(cursor(), holding_table) self.logger.debug("Pouring %s took %.3f seconds." % (holding_table, time.time() - tablestarttime)) cur = self._commit(transaction_manager) # In future, let the database perform sequential scans again if it # decides that's best. postgresql.allow_sequential_scans(cur, True)
def dropHoldingTables(self): """Drop any holding tables that may exist for this MultiTableCopy.""" holding_tables = [ self.getHoldingTableName(table) for table in self.tables ] postgresql.drop_tables(cursor(), holding_tables)
def remove_translations(logger=None, submitter=None, reviewer=None, reject_license=False, ids=None, potemplate=None, language_code=None, not_language=False, is_current_ubuntu=None, is_current_upstream=None, msgid_singular=None, origin=None): """Remove specified translation messages. :param logger: Optional logger to write output to. :param submitter: Delete only messages submitted by this person. :param reviewer: Delete only messages reviewed by this person. :param reject_license: Delete only messages submitted by persons who have rejected the licensing agreement. :param ids: Delete only messages with these `TranslationMessage` ids. :param potemplate: Delete only messages in this template. :param language_code: Language code. Depending on `not_language`, either delete messages in this language or spare messages in this language that would otherwise be deleted. :param not_language: Whether to spare (True) or delete (False) messages in this language. :param is_current_ubuntu: Delete only messages with this is_current_ubuntu value. :param is_current_upstream: Delete only messages with this is_current_upstream value. :param msgid_singular: Delete only messages with this singular msgid. :param origin: Delete only messages with this `TranslationOrigin` code. :return: Number of messages deleted. """ joins = set() conditions = set() if submitter is not None: conditions.add('TranslationMessage.submitter = %s' % sqlvalues(submitter)) if reviewer is not None: conditions.add('TranslationMessage.reviewer = %s' % sqlvalues(reviewer)) if reject_license: joins.add('TranslationRelicensingAgreement') conditions.add('TranslationMessage.submitter = ' 'TranslationRelicensingAgreement.person') conditions.add('NOT TranslationRelicensingAgreement.allow_relicensing') if ids is not None: conditions.add('TranslationMessage.id IN %s' % sqlvalues(ids)) if potemplate is not None: joins.add('TranslationTemplateItem') conditions.add('TranslationTemplateItem.potmsgset ' ' = TranslationMessage.potmsgset') conditions.add('TranslationTemplateItem.potemplate = %s' % sqlvalues(potemplate)) if language_code is not None: joins.add('Language') conditions.add('Language.id = TranslationMessage.language') language_match = compose_language_match(language_code) if not_language: conditions.add('NOT (%s)' % language_match) else: conditions.add(language_match) add_bool_match(conditions, 'TranslationMessage.is_current_ubuntu', is_current_ubuntu) add_bool_match(conditions, 'TranslationMessage.is_current_upstream', is_current_upstream) if msgid_singular is not None: joins.add('POTMsgSet') conditions.add('POTMsgSet.id = TranslationMessage.potmsgset') joins.add('POMsgID') conditions.add('POMsgID.id = POTMsgSet.msgid_singular') conditions.add('POMsgID.msgid = %s' % sqlvalues(msgid_singular)) if origin is not None: conditions.add('TranslationMessage.origin = %s' % sqlvalues(origin)) assert len(conditions) > 0, "That would delete ALL translations, maniac!" cur = cursor() drop_tables(cur, 'temp_doomed_message') joins.add('TranslationMessage') from_text = ', '.join(joins) where_text = ' AND\n '.join(conditions) warn_about_deleting_current_messages(cur, from_text, where_text, logger) # Keep track of messages we're going to delete. # Don't bother indexing this. We'd more likely end up optimizing # away the operator's "oh-shit-ctrl-c" time than helping anyone. query = """ CREATE TEMP TABLE temp_doomed_message AS SELECT TranslationMessage.id, NULL::integer AS imported_message FROM %s WHERE %s """ % (from_text, where_text) cur.execute(query) # Note which shared messages are masked by the messages we're # going to delete. We'll be making those the current ones. query = """ UPDATE temp_doomed_message SET imported_message = Imported.id FROM TranslationMessage Doomed, TranslationMessage Imported WHERE Doomed.id = temp_doomed_message.id AND -- Is alternative for the message we're about to delete. Imported.potmsgset = Doomed.potmsgset AND Imported.language = Doomed.language AND Imported.potemplate IS NULL AND Doomed.potemplate IS NULL AND -- Is used upstream. Imported.is_current_upstream IS TRUE AND -- Was masked by the message we're about to delete. Doomed.is_current_ubuntu IS TRUE AND Imported.id <> Doomed.id """ cur.execute(query) if logger is not None and logger.getEffectiveLevel() <= logging.DEBUG: # Dump sample of doomed messages for debugging purposes. cur.execute(""" SELECT * FROM temp_doomed_message ORDER BY id LIMIT 20 """) rows = cur.fetchall() if cur.rowcount > 0: logger.debug("Sample of messages to be deleted follows.") logger.debug("%10s %10s" % ("[message]", "[unmasks]")) for (doomed, unmasked) in rows: if unmasked is None: unmasked = '--' logger.debug("%10s %10s" % (doomed, unmasked)) cur.execute(""" DELETE FROM TranslationMessage USING temp_doomed_message WHERE TranslationMessage.id = temp_doomed_message.id """) rows_deleted = cur.rowcount if logger is not None: if rows_deleted > 0: logger.info("Deleting %d message(s)." % rows_deleted) else: logger.warn("No rows match; not deleting anything.") cur.execute(""" UPDATE TranslationMessage SET is_current_ubuntu = TRUE FROM temp_doomed_message WHERE TranslationMessage.id = temp_doomed_message.imported_message """) if cur.rowcount > 0 and logger is not None: logger.debug("Unmasking %d imported message(s)." % cur.rowcount) drop_tables(cur, 'temp_doomed_message') return rows_deleted