Beispiel #1
0
 def __init__(self, con):
     self.swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
     self.con = con
     self.index = 1
     self.total_deleted = 0
     log.info("Deleting unreferenced LibraryFileContents.")
     cur = con.cursor()
     drop_tables(cur, "UnreferencedLibraryFileContent")
     cur.execute("""
         CREATE TEMPORARY TABLE UnreferencedLibraryFileContent (
             id bigserial PRIMARY KEY,
             content bigint UNIQUE)
         """)
     cur.execute("""
         INSERT INTO UnreferencedLibraryFileContent (content)
         SELECT DISTINCT LibraryFileContent.id
         FROM LibraryFileContent
         LEFT OUTER JOIN LibraryFileAlias
             ON LibraryFileContent.id = LibraryFileAlias.content
         WHERE LibraryFileAlias.content IS NULL
     """)
     cur.execute("""
         SELECT COALESCE(max(id), 0) FROM UnreferencedLibraryFileContent
         """)
     self.max_id = cur.fetchone()[0]
     log.info("%d unreferenced LibraryFileContents to remove." %
              self.max_id)
Beispiel #2
0
 def __init__(self, con):
     self.con = con
     self.index = 1
     self.total_deleted = 0
     cur = con.cursor()
     drop_tables(cur, "UnreferencedLibraryFileContent")
     cur.execute("""
         CREATE TEMPORARY TABLE UnreferencedLibraryFileContent (
             id serial PRIMARY KEY,
             content integer UNIQUE)
         """)
     cur.execute("""
         INSERT INTO UnreferencedLibraryFileContent (content)
         SELECT DISTINCT LibraryFileContent.id
         FROM LibraryFileContent
         LEFT OUTER JOIN LibraryFileAlias
             ON LibraryFileContent.id = LibraryFileAlias.content
         WHERE LibraryFileAlias.content IS NULL
     """)
     cur.execute("""
         SELECT COALESCE(max(id), 0) FROM UnreferencedLibraryFileContent
         """)
     self.max_id = cur.fetchone()[0]
     log.debug(
         "%d unreferenced LibraryFileContent rows to remove."
         % self.max_id)
Beispiel #3
0
    def __init__(self, con):
        self.con = con  # Database connection to use
        self.total_deleted = 0  # Running total
        self.index = 1

        log.info("Deleting unreferenced LibraryFileAliases")

        cur = con.cursor()

        drop_tables(cur, "ReferencedLibraryFileAlias")
        cur.execute("""
            CREATE TEMPORARY TABLE ReferencedLibraryFileAlias (
                alias integer)
            """)

        # Determine what columns link to LibraryFileAlias
        # references = [(table, column), ...]
        references = [
            tuple(ref[:2])
            for ref in listReferences(cur, 'libraryfilealias', 'id')
            if ref[0] != 'libraryfiledownloadcount'
            ]
        assert len(references) > 10, (
            'Database introspection returned nonsense')
        log.debug(
            "Found %d columns referencing LibraryFileAlias", len(references))

        # Find all relevant LibraryFileAlias references and fill in
        # ReferencedLibraryFileAlias
        for table, column in references:
            cur.execute("""
                INSERT INTO ReferencedLibraryFileAlias
                SELECT LibraryFileAlias.id
                FROM LibraryFileAlias, %(table)s
                WHERE LibraryFileAlias.id = %(table)s.%(column)s
                """ % {
                    'table': quoteIdentifier(table),
                    'column': quoteIdentifier(column)})
            log.debug("%s.%s references %d LibraryFileContent rows." % (
                table, column, cur.rowcount))
            con.commit()

        log.debug("Calculating unreferenced LibraryFileAlias set.")
        drop_tables(cur, "UnreferencedLibraryFileAlias")
        cur.execute("""
            CREATE TEMPORARY TABLE UnreferencedLibraryFileAlias (
                id serial PRIMARY KEY,
                alias integer UNIQUE)
            """)
        # Calculate the set of unreferenced LibraryFileAlias.
        # We also exclude all unexpired records - we don't remove them
        # even if they are unlinked. We currently don't remove stuff
        # until it has been expired for more than one week, but we will
        # change this if disk space becomes short and it actually will
        # make a noticeable difference. We handle excluding recently
        # created content here rather than earlier when creating the
        # ReferencedLibraryFileAlias table to handle uploads going on
        # while this script is running.
        cur.execute("""
            INSERT INTO UnreferencedLibraryFileAlias (alias)
            SELECT id AS alias FROM LibraryFileAlias
            WHERE
                content IS NULL
                OR ((expires IS NULL OR
                     expires <
                         CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
                             - interval '1 week'
                    )
                    AND date_created <
                        CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
                            - interval '1 week'
                   )
            EXCEPT
            SELECT alias FROM ReferencedLibraryFileAlias
            """)
        con.commit()
        drop_tables(cur, "ReferencedLibraryFileAlias")
        cur.execute(
            "SELECT COALESCE(max(id),0) FROM UnreferencedLibraryFileAlias")
        self.max_id = cur.fetchone()[0]
        log.debug(
            "%d unreferenced LibraryFileContent to remove." % self.max_id)
        con.commit()
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted"
                % self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..."
                         % (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(
                cur, holding_table_unquoted, 'new_id')

            self._pourTable(
                holding_table, table, has_new_id, transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug(
                "Pouring %s took %.3f seconds."
                % (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)
 def dropHoldingTables(self):
     """Drop any holding tables that may exist for this MultiTableCopy."""
     holding_tables = [self.getHoldingTableName(table)
                       for table in self.tables]
     postgresql.drop_tables(cursor(), holding_tables)
def remove_translations(logger=None, submitter=None, reviewer=None,
                        reject_license=False, ids=None, potemplate=None,
                        language_code=None, not_language=False,
                        is_current_ubuntu=None, is_current_upstream=None,
                        msgid_singular=None, origin=None):
    """Remove specified translation messages.

    :param logger: Optional logger to write output to.
    :param submitter: Delete only messages submitted by this person.
    :param reviewer: Delete only messages reviewed by this person.
    :param reject_license: Delete only messages submitted by persons who
        have rejected the licensing agreement.
    :param ids: Delete only messages with these `TranslationMessage` ids.
    :param potemplate: Delete only messages in this template.
    :param language_code: Language code.  Depending on `not_language`,
        either delete messages in this language or spare messages in this
        language that would otherwise be deleted.
    :param not_language: Whether to spare (True) or delete (False)
        messages in this language.
    :param is_current_ubuntu: Delete only messages with this is_current_ubuntu
        value.
    :param is_current_upstream: Delete only messages with this
        is_current_upstream value.
    :param msgid_singular: Delete only messages with this singular msgid.
    :param origin: Delete only messages with this `TranslationOrigin` code.

    :return: Number of messages deleted.
    """
    joins = set()
    conditions = set()
    if submitter is not None:
        conditions.add(
            'TranslationMessage.submitter = %s' % sqlvalues(submitter))
    if reviewer is not None:
        conditions.add(
            'TranslationMessage.reviewer = %s' % sqlvalues(reviewer))
    if reject_license:
        joins.add('TranslationRelicensingAgreement')
        conditions.add(
            'TranslationMessage.submitter = '
            'TranslationRelicensingAgreement.person')
        conditions.add(
            'NOT TranslationRelicensingAgreement.allow_relicensing')
    if ids is not None:
        conditions.add('TranslationMessage.id IN %s' % sqlvalues(ids))
    if potemplate is not None:
        joins.add('TranslationTemplateItem')
        conditions.add(
            'TranslationTemplateItem.potmsgset '
            ' = TranslationMessage.potmsgset')
        conditions.add(
            'TranslationTemplateItem.potemplate = %s' % sqlvalues(potemplate))

    if language_code is not None:
        joins.add('Language')
        conditions.add('Language.id = TranslationMessage.language')
        language_match = compose_language_match(language_code)
        if not_language:
            conditions.add('NOT (%s)' % language_match)
        else:
            conditions.add(language_match)

    add_bool_match(
        conditions, 'TranslationMessage.is_current_ubuntu', is_current_ubuntu)
    add_bool_match(
        conditions, 'TranslationMessage.is_current_upstream',
        is_current_upstream)

    if msgid_singular is not None:
        joins.add('POTMsgSet')
        conditions.add('POTMsgSet.id = TranslationMessage.potmsgset')
        joins.add('POMsgID')
        conditions.add('POMsgID.id = POTMsgSet.msgid_singular')
        conditions.add('POMsgID.msgid = %s' % sqlvalues(msgid_singular))

    if origin is not None:
        conditions.add('TranslationMessage.origin = %s' % sqlvalues(origin))

    assert len(conditions) > 0, "That would delete ALL translations, maniac!"

    cur = cursor()
    drop_tables(cur, 'temp_doomed_message')

    joins.add('TranslationMessage')
    from_text = ', '.join(joins)
    where_text = ' AND\n    '.join(conditions)

    warn_about_deleting_current_messages(cur, from_text, where_text, logger)

    # Keep track of messages we're going to delete.
    # Don't bother indexing this.  We'd more likely end up optimizing
    # away the operator's "oh-shit-ctrl-c" time than helping anyone.
    query = """
        CREATE TEMP TABLE temp_doomed_message AS
        SELECT TranslationMessage.id, NULL::integer AS imported_message
        FROM %s
        WHERE %s
        """ % (from_text, where_text)
    cur.execute(query)

    # Note which shared messages are masked by the messages we're
    # going to delete.  We'll be making those the current ones.
    query = """
         UPDATE temp_doomed_message
        SET imported_message = Imported.id
        FROM TranslationMessage Doomed, TranslationMessage Imported
        WHERE
            Doomed.id = temp_doomed_message.id AND
            -- Is alternative for the message we're about to delete.
            Imported.potmsgset = Doomed.potmsgset AND
            Imported.language = Doomed.language AND
            Imported.potemplate IS NULL AND
            Doomed.potemplate IS NULL AND
            -- Is used upstream.
            Imported.is_current_upstream IS TRUE AND
            -- Was masked by the message we're about to delete.
            Doomed.is_current_ubuntu IS TRUE AND
            Imported.id <> Doomed.id
            """
    cur.execute(query)

    if logger is not None and logger.getEffectiveLevel() <= logging.DEBUG:
        # Dump sample of doomed messages for debugging purposes.
        cur.execute("""
            SELECT *
            FROM temp_doomed_message
            ORDER BY id
            LIMIT 20
            """)
        rows = cur.fetchall()
        if cur.rowcount > 0:
            logger.debug("Sample of messages to be deleted follows.")
            logger.debug("%10s %10s" % ("[message]", "[unmasks]"))
            for (doomed, unmasked) in rows:
                if unmasked is None:
                    unmasked = '--'
                logger.debug("%10s %10s" % (doomed, unmasked))

    cur.execute("""
        DELETE FROM TranslationMessage
        USING temp_doomed_message
        WHERE TranslationMessage.id = temp_doomed_message.id
        """)

    rows_deleted = cur.rowcount
    if logger is not None:
        if rows_deleted > 0:
            logger.info("Deleting %d message(s)." % rows_deleted)
        else:
            logger.warn("No rows match; not deleting anything.")

    cur.execute("""
        UPDATE TranslationMessage
        SET is_current_ubuntu = TRUE
        FROM temp_doomed_message
        WHERE TranslationMessage.id = temp_doomed_message.imported_message
        """)

    if cur.rowcount > 0 and logger is not None:
        logger.debug("Unmasking %d imported message(s)." % cur.rowcount)

    drop_tables(cur, 'temp_doomed_message')

    return rows_deleted
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted" %
                self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..." %
                             (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(cur,
                                                     holding_table_unquoted,
                                                     'new_id')

            self._pourTable(holding_table, table, has_new_id,
                            transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug("Pouring %s took %.3f seconds." %
                              (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)
 def dropHoldingTables(self):
     """Drop any holding tables that may exist for this MultiTableCopy."""
     holding_tables = [
         self.getHoldingTableName(table) for table in self.tables
     ]
     postgresql.drop_tables(cursor(), holding_tables)
def remove_translations(logger=None,
                        submitter=None,
                        reviewer=None,
                        reject_license=False,
                        ids=None,
                        potemplate=None,
                        language_code=None,
                        not_language=False,
                        is_current_ubuntu=None,
                        is_current_upstream=None,
                        msgid_singular=None,
                        origin=None):
    """Remove specified translation messages.

    :param logger: Optional logger to write output to.
    :param submitter: Delete only messages submitted by this person.
    :param reviewer: Delete only messages reviewed by this person.
    :param reject_license: Delete only messages submitted by persons who
        have rejected the licensing agreement.
    :param ids: Delete only messages with these `TranslationMessage` ids.
    :param potemplate: Delete only messages in this template.
    :param language_code: Language code.  Depending on `not_language`,
        either delete messages in this language or spare messages in this
        language that would otherwise be deleted.
    :param not_language: Whether to spare (True) or delete (False)
        messages in this language.
    :param is_current_ubuntu: Delete only messages with this is_current_ubuntu
        value.
    :param is_current_upstream: Delete only messages with this
        is_current_upstream value.
    :param msgid_singular: Delete only messages with this singular msgid.
    :param origin: Delete only messages with this `TranslationOrigin` code.

    :return: Number of messages deleted.
    """
    joins = set()
    conditions = set()
    if submitter is not None:
        conditions.add('TranslationMessage.submitter = %s' %
                       sqlvalues(submitter))
    if reviewer is not None:
        conditions.add('TranslationMessage.reviewer = %s' %
                       sqlvalues(reviewer))
    if reject_license:
        joins.add('TranslationRelicensingAgreement')
        conditions.add('TranslationMessage.submitter = '
                       'TranslationRelicensingAgreement.person')
        conditions.add('NOT TranslationRelicensingAgreement.allow_relicensing')
    if ids is not None:
        conditions.add('TranslationMessage.id IN %s' % sqlvalues(ids))
    if potemplate is not None:
        joins.add('TranslationTemplateItem')
        conditions.add('TranslationTemplateItem.potmsgset '
                       ' = TranslationMessage.potmsgset')
        conditions.add('TranslationTemplateItem.potemplate = %s' %
                       sqlvalues(potemplate))

    if language_code is not None:
        joins.add('Language')
        conditions.add('Language.id = TranslationMessage.language')
        language_match = compose_language_match(language_code)
        if not_language:
            conditions.add('NOT (%s)' % language_match)
        else:
            conditions.add(language_match)

    add_bool_match(conditions, 'TranslationMessage.is_current_ubuntu',
                   is_current_ubuntu)
    add_bool_match(conditions, 'TranslationMessage.is_current_upstream',
                   is_current_upstream)

    if msgid_singular is not None:
        joins.add('POTMsgSet')
        conditions.add('POTMsgSet.id = TranslationMessage.potmsgset')
        joins.add('POMsgID')
        conditions.add('POMsgID.id = POTMsgSet.msgid_singular')
        conditions.add('POMsgID.msgid = %s' % sqlvalues(msgid_singular))

    if origin is not None:
        conditions.add('TranslationMessage.origin = %s' % sqlvalues(origin))

    assert len(conditions) > 0, "That would delete ALL translations, maniac!"

    cur = cursor()
    drop_tables(cur, 'temp_doomed_message')

    joins.add('TranslationMessage')
    from_text = ', '.join(joins)
    where_text = ' AND\n    '.join(conditions)

    warn_about_deleting_current_messages(cur, from_text, where_text, logger)

    # Keep track of messages we're going to delete.
    # Don't bother indexing this.  We'd more likely end up optimizing
    # away the operator's "oh-shit-ctrl-c" time than helping anyone.
    query = """
        CREATE TEMP TABLE temp_doomed_message AS
        SELECT TranslationMessage.id, NULL::integer AS imported_message
        FROM %s
        WHERE %s
        """ % (from_text, where_text)
    cur.execute(query)

    # Note which shared messages are masked by the messages we're
    # going to delete.  We'll be making those the current ones.
    query = """
         UPDATE temp_doomed_message
        SET imported_message = Imported.id
        FROM TranslationMessage Doomed, TranslationMessage Imported
        WHERE
            Doomed.id = temp_doomed_message.id AND
            -- Is alternative for the message we're about to delete.
            Imported.potmsgset = Doomed.potmsgset AND
            Imported.language = Doomed.language AND
            Imported.potemplate IS NULL AND
            Doomed.potemplate IS NULL AND
            -- Is used upstream.
            Imported.is_current_upstream IS TRUE AND
            -- Was masked by the message we're about to delete.
            Doomed.is_current_ubuntu IS TRUE AND
            Imported.id <> Doomed.id
            """
    cur.execute(query)

    if logger is not None and logger.getEffectiveLevel() <= logging.DEBUG:
        # Dump sample of doomed messages for debugging purposes.
        cur.execute("""
            SELECT *
            FROM temp_doomed_message
            ORDER BY id
            LIMIT 20
            """)
        rows = cur.fetchall()
        if cur.rowcount > 0:
            logger.debug("Sample of messages to be deleted follows.")
            logger.debug("%10s %10s" % ("[message]", "[unmasks]"))
            for (doomed, unmasked) in rows:
                if unmasked is None:
                    unmasked = '--'
                logger.debug("%10s %10s" % (doomed, unmasked))

    cur.execute("""
        DELETE FROM TranslationMessage
        USING temp_doomed_message
        WHERE TranslationMessage.id = temp_doomed_message.id
        """)

    rows_deleted = cur.rowcount
    if logger is not None:
        if rows_deleted > 0:
            logger.info("Deleting %d message(s)." % rows_deleted)
        else:
            logger.warn("No rows match; not deleting anything.")

    cur.execute("""
        UPDATE TranslationMessage
        SET is_current_ubuntu = TRUE
        FROM temp_doomed_message
        WHERE TranslationMessage.id = temp_doomed_message.imported_message
        """)

    if cur.rowcount > 0 and logger is not None:
        logger.debug("Unmasking %d imported message(s)." % cur.rowcount)

    drop_tables(cur, 'temp_doomed_message')

    return rows_deleted