예제 #1
0
def massacre(database):
    con = connect()
    con.set_isolation_level(0)  # Autocommit
    cur = con.cursor()

    # Allow connections to the doomed database if something turned this off,
    # such as an aborted run of this script.
    cur.execute("UPDATE pg_database SET datallowconn=TRUE WHERE datname=%s",
                [database])

    # Rollback prepared transactions.
    rollback_prepared_transactions(database)

    try:
        # Stop connections to the doomed database.
        cur.execute(
            "UPDATE pg_database SET datallowconn=FALSE WHERE datname=%s",
            [database])

        # New connections are disabled, but pg_stat_activity is only
        # updated every 500ms. Ensure that pg_stat_activity has
        # been refreshed to catch any connections that opened
        # immediately before setting datallowconn.
        time.sleep(1)

        # Terminate open connections.
        cur.execute(
            """
            SELECT %(pid)s, pg_terminate_backend(%(pid)s)
            FROM pg_stat_activity
            WHERE datname=%%s AND %(pid)s <> pg_backend_pid()
            """ % activity_cols(cur), [database])
        for pid, success in cur.fetchall():
            if not success:
                print >> sys.stderr, ("pg_terminate_backend(%s) failed" % pid)
        con.close()

        if still_open(database):
            print >> sys.stderr, (
                "Unable to kill all backends! Database not destroyed.")
            return 9

        # Destroy the database.
        con = connect()
        # AUTOCOMMIT required to execute commands like DROP DATABASE.
        con.set_isolation_level(0)
        cur = con.cursor()
        cur.execute("DROP DATABASE %s" % database)  # Not quoted.
        con.close()
        return 0
    finally:
        # In case something messed up, allow connections again so we can
        # inspect the damage.
        con = connect()
        con.set_isolation_level(0)
        cur = con.cursor()
        cur.execute(
            "UPDATE pg_database SET datallowconn=TRUE WHERE datname=%s",
            [database])
        con.close()
예제 #2
0
    def check_fragile_connections(self):
        """Fail if any FRAGILE_USERS are connected to the cluster.

        If we interrupt these processes, we may have a mess to clean
        up. If they are connected, the preflight check should fail.
        """
        success = True
        for node in self.lpmain_nodes:
            cur = node.con.cursor()
            cur.execute(("""
                SELECT datname, usename, COUNT(*) AS num_connections
                FROM pg_stat_activity
                WHERE
                    datname=current_database()
                    AND %(pid)s <> pg_backend_pid()
                    AND usename IN %%s
                GROUP BY datname, usename
                """ % activity_cols(cur))
                % sqlvalues(FRAGILE_USERS))
            for datname, usename, num_connections in cur.fetchall():
                self.log.fatal(
                    "Fragile system %s running. %s has %d connections.",
                    usename, datname, num_connections)
                success = False
        if success:
            self.log.debug(
                "No fragile systems connected to the cluster (%s)"
                % ', '.join(FRAGILE_USERS))
        return success
예제 #3
0
def still_open(database, max_wait=120):
    """Return True if there are still open connections, apart from our own.

    Waits a while to ensure that connections shutting down have a chance
    to. This might take a while if there is a big transaction to
    rollback.
    """
    con = connect()
    con.set_isolation_level(0)  # Autocommit.
    cur = con.cursor()
    # Keep checking until the timeout is reached, returning True if all
    # of the backends are gone.
    start = time.time()
    while time.time() < start + max_wait:
        cur.execute(
            """
            SELECT TRUE FROM pg_stat_activity
            WHERE
                datname=%%s
                AND %(pid)s != pg_backend_pid()
            LIMIT 1
            """ % activity_cols(cur), [database])
        if cur.fetchone() is None:
            return False
        time.sleep(0.6)  # Stats only updated every 500ms.
    con.close()
    return True
예제 #4
0
    def check_open_connections(self):
        """False if any lpmain nodes have connections from non-system users.

        We only check on subscribed nodes, as there will be active systems
        connected to other nodes in the replication cluster (such as the
        SSO servers).

        System users are defined by SYSTEM_USERS.
        """
        success = True
        for node in self.lpmain_nodes:
            cur = node.con.cursor()
            cur.execute("""
                SELECT datname, usename, COUNT(*) AS num_connections
                FROM pg_stat_activity
                WHERE
                    datname=current_database()
                    AND %(pid)s <> pg_backend_pid()
                GROUP BY datname, usename
                """ % activity_cols(cur))
            for datname, usename, num_connections in cur.fetchall():
                if usename in SYSTEM_USERS:
                    self.log.debug(
                        "%s has %d connections by %s",
                        datname, num_connections, usename)
                else:
                    self.log.fatal(
                        "%s has %d connections by %s",
                        datname, num_connections, usename)
                    success = False
        if success:
            self.log.info("Only system users connected to the cluster")
        return success
예제 #5
0
    def dropDb(self):
        '''Drop the database if it exists.

        Raises an exception if there are open connections
        '''
        attempts = 100
        for i in range(0, attempts):
            try:
                con = self.superuser_connection(self.template)
            except psycopg2.OperationalError as x:
                if 'does not exist' in str(x):
                    return
                raise
            try:
                con.set_isolation_level(0)

                # Kill all backend connections if this helper happens to be
                # available. We could create it if it doesn't exist if not
                # always having this is a problem.
                try:
                    cur = con.cursor()
                    cur.execute(
                        """
                        SELECT pg_terminate_backend(%(pid)s)
                        FROM pg_stat_activity
                        WHERE %(pid)s <> pg_backend_pid() AND datname=%%s
                        """ % activity_cols(cur), [self.dbname])
                except psycopg2.DatabaseError:
                    pass

                # Drop the database, trying for a number of seconds in case
                # connections are slow in dropping off.
                try:
                    cur = con.cursor()
                    cur.execute('DROP DATABASE %s' % self.dbname)
                except psycopg2.DatabaseError as x:
                    if i == attempts - 1:
                        # Too many failures - raise an exception
                        raise
                    if 'being accessed by other users' in str(x):
                        if i < attempts - 1:
                            time.sleep(0.1)
                            continue
                    if 'does not exist' in str(x):
                        break
                    raise
                PgTestSetup._vacuum_shdepend_counter += 1
                if (PgTestSetup._vacuum_shdepend_counter %
                        PgTestSetup.vacuum_shdepend_every) == 0:
                    cur.execute('VACUUM pg_catalog.pg_shdepend')
            finally:
                con.close()
        # Any further setUp's must make a new DB.
        PgTestSetup._reset_db = True
예제 #6
0
    def check_open_connections(self):
        """Kill all non-system connections to Launchpad databases.

        If replication is paused, only connections on the master database
        are killed.

        System users are defined by SYSTEM_USERS.
        """
        # We keep trying to terminate connections every 0.5 seconds for
        # up to 10 seconds.
        num_tries = 20
        seconds_to_pause = 0.1
        if self.replication_paused:
            nodes = set([self.lpmain_master_node])
        else:
            nodes = self.lpmain_nodes

        for loop_count in range(num_tries):
            all_clear = True
            for node in nodes:
                cur = node.con.cursor()
                cur.execute(("""
                    SELECT
                        %(pid)s, datname, usename,
                        pg_terminate_backend(%(pid)s)
                    FROM pg_stat_activity
                    WHERE
                        datname=current_database()
                        AND %(pid)s <> pg_backend_pid()
                        AND usename NOT IN %%s
                    """ % activity_cols(cur))
                    % sqlvalues(SYSTEM_USERS))
                for pid, datname, usename, ignored in cur.fetchall():
                    all_clear = False
                    if loop_count == num_tries - 1:
                        self.log.fatal(
                            "Unable to kill %s [%s] on %s.",
                            usename, pid, datname)
                    elif usename in BAD_USERS:
                        self.log.info(
                            "Killed %s [%s] on %s.",
                            usename, pid, datname)
                    else:
                        self.log.warning(
                            "Killed %s [%s] on %s.",
                            usename, pid, datname)
            if all_clear:
                break

            # Wait a little for any terminated connections to actually
            # terminate.
            time.sleep(seconds_to_pause)
        return all_clear
예제 #7
0
def report_open_connections(database):
    con = connect()
    cur = con.cursor()
    cur.execute("""
        SELECT usename, datname, count(*)
        FROM pg_stat_activity
        WHERE %(pid)s != pg_backend_pid()
        GROUP BY usename, datname
        ORDER BY datname, usename
        """ % activity_cols(cur))
    for usename, datname, num_connections in cur.fetchall():
        print >> sys.stderr, "%d connections by %s to %s" % (num_connections,
                                                             usename, datname)
    con.close()
예제 #8
0
def main():
    parser = OptionParser()
    logger_options(parser)
    db_options(parser)

    options, args = parser.parse_args()

    if len(args) > 0:
        parser.error("Too many arguments.")

    log = logger(options)

    log.debug("Connecting")
    con = connect()
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()

    log.debug("Disabling autovacuum on all tables in the database.")
    cur.execute("""
        SELECT nspname,relname
        FROM pg_namespace, pg_class
        WHERE relnamespace = pg_namespace.oid
            AND relkind = 'r' AND nspname <> 'pg_catalog'
        """)
    for namespace, table in list(cur.fetchall()):
        cur.execute("""
            ALTER TABLE ONLY "%s"."%s" SET (
                autovacuum_enabled=false,
                toast.autovacuum_enabled=false)
            """ % (namespace, table))

    log.debug("Killing existing autovacuum processes")
    num_autovacuums = -1
    while num_autovacuums != 0:
        # Sleep long enough for pg_stat_activity to be updated.
        time.sleep(0.6)
        cur.execute("""
            SELECT %(pid)s FROM pg_stat_activity
            WHERE
                datname=current_database()
                AND %(query)s LIKE 'autovacuum: %%'
            """ % activity_cols(cur))
        autovacuums = [row[0] for row in cur.fetchall()]
        num_autovacuums = len(autovacuums)
        for pid in autovacuums:
            log.debug("Cancelling %d" % pid)
            cur.execute("SELECT pg_cancel_backend(%d)" % pid)
예제 #9
0
    def _blockForLongRunningTransactions(self):
        """If there are long running transactions, block to avoid making
        bloat worse."""
        if self.long_running_transaction is None:
            return
        from lp.services.librarian.model import LibraryFileAlias
        store = IMasterStore(LibraryFileAlias)
        msg_counter = 0
        while not self._isTimedOut():
            results = list(
                store.execute(("""
                SELECT
                    CURRENT_TIMESTAMP - xact_start,
                    %(pid)s,
                    usename,
                    datname,
                    %(query)s
                FROM activity()
                WHERE xact_start < CURRENT_TIMESTAMP - interval '%%f seconds'
                    AND datname = current_database()
                ORDER BY xact_start LIMIT 4
                """ % activity_cols(store)) %
                              self.long_running_transaction).get_all())
            if not results:
                break

            # Check for long running transactions every 10 seconds, but
            # only report every 10 minutes to avoid log spam.
            msg_counter += 1
            if msg_counter % 60 == 1:
                for runtime, pid, usename, datname, query in results:
                    self.log.info("Blocked on %s old xact %s@%s/%d - %s.",
                                  runtime, usename, datname, pid, query)
                self.log.info("Sleeping for up to 10 minutes.")
            # Don't become a long running transaction!
            transaction.abort()
            self._sleep(10)
예제 #10
0
def setup_test_database():
    """Set up a test instance of our postgresql database.

    Returns 0 for success, 1 for errors.
    """
    # Sanity check PostgreSQL version. No point in trying to create a test
    # database when PostgreSQL is too old.
    con = psycopg2.connect('dbname=template1')
    if con.server_version < 100000:
        print 'Your PostgreSQL version is too old.  You need at least 10.x'
        print 'You have %s' % con.get_parameter_status('server_version')
        return 1

    # Drop the template database if it exists - the Makefile does this
    # too, but we can explicity check for errors here
    con = psycopg2.connect('dbname=template1')
    con.set_isolation_level(0)
    cur = con.cursor()
    try:
        cur.execute('drop database launchpad_ftest_template')
    except psycopg2.ProgrammingError as x:
        if 'does not exist' not in str(x):
            raise

    # If there are existing database connections, terminate. We have
    # rogue processes still connected to the database.
    for loop in range(2):
        cur.execute("""
            SELECT usename, %(query)s
            FROM pg_stat_activity
            WHERE datname IN (
                'launchpad_dev', 'launchpad_ftest_template', 'launchpad_ftest')
            """ % activity_cols(cur))
        results = list(cur.fetchall())
        if not results:
            break
        # Rogue processes. Report, sleep for a bit, and try again.
        for usename, query in results:
            print '!! Open connection %s - %s' % (usename, query)
        print 'Sleeping'
        time.sleep(20)
    else:
        print 'Cannot rebuild database. There are open connections.'
        return 1

    cur.close()
    con.close()

    # Build the template database. Tests duplicate this.
    schema_dir = os.path.join(HERE, 'database', 'schema')
    if os.system('cd %s; make test > /dev/null' % (schema_dir)) != 0:
        print 'Failed to create database or load sampledata.'
        return 1

    # Sanity check the database. No point running tests if the
    # bedrock is crumbling.
    con = psycopg2.connect('dbname=launchpad_ftest_template')
    cur = con.cursor()
    cur.execute("""
        select pg_encoding_to_char(encoding) as encoding from pg_database
        where datname='launchpad_ftest_template'
        """)
    enc = cur.fetchone()[0]
    if enc not in ('UNICODE', 'UTF8'):
        print 'Database encoding incorrectly set'
        return 1
    cur.execute(r"""
        SELECT setting FROM pg_settings
        WHERE context='internal' AND name='lc_ctype'
        """)
    loc = cur.fetchone()[0]
    #if not (loc.startswith('en_') or loc in ('C', 'en')):
    if loc != 'C':
        print 'Database locale incorrectly set. Need to rerun initdb.'
        return 1

    # Explicity close our connections - things will fail if we leave open
    # connections.
    cur.close()
    del cur
    con.close()
    del con

    return 0
예제 #11
0
def main():
    parser = OptionParser()
    parser.add_option(
        '-c',
        '--connection',
        type='string',
        dest='connect_string',
        default='',
        help="Psycopg connection string",
    )
    parser.add_option(
        '-s',
        '--max-seconds',
        type='int',
        dest='max_seconds',
        default=60 * 60,
        help='Maximum seconds time connections are allowed to remain active.',
    )
    parser.add_option(
        '-q',
        '--quiet',
        action='store_true',
        dest="quiet",
        default=False,
        help='Silence output',
    )
    parser.add_option(
        '-n',
        '--dry-run',
        action='store_true',
        default=False,
        dest='dry_run',
        help="Dry run - don't kill anything",
    )
    parser.add_option('-u',
                      '--user',
                      action='append',
                      dest='users',
                      help='Kill connection of users matching REGEXP',
                      metavar='REGEXP')
    options, args = parser.parse_args()
    if len(args) > 0:
        parser.error('Too many arguments')
    if not options.users:
        parser.error('--user is required')

    user_match_sql = 'AND (%s)' % ' OR '.join(
        ['usename ~* %s'] * len(options.users))

    con = psycopg2.connect(options.connect_string)
    cur = con.cursor()
    cur.execute(("""
        SELECT usename, %(pid)s, backend_start, xact_start
        FROM pg_stat_activity
        WHERE xact_start < CURRENT_TIMESTAMP - '%%d seconds'::interval %%s
        ORDER BY %(pid)s
        """ % activity_cols(cur)) % (options.max_seconds, user_match_sql),
                options.users)

    rows = list(cur.fetchall())

    if len(rows) == 0:
        if not options.quiet:
            print 'No transactions to kill'
            return 0

    for usename, pid, backend_start, transaction_start in rows:
        print 'Killing %s (%d), %s, %s' % (
            usename,
            pid,
            backend_start,
            transaction_start,
        )
        if not options.dry_run:
            os.kill(pid, signal.SIGTERM)
    return 0
예제 #12
0
def main():
    parser = OptionParser()
    parser.add_option(
        '-c',
        '--connection',
        type='string',
        dest='connect_string',
        default='',
        help="Psycopg connection string",
    )
    parser.add_option(
        '-s',
        '--max-idle-seconds',
        type='int',
        dest='max_idle_seconds',
        default=10 * 60,
        help='Maximum seconds time idle but open transactions are allowed',
    )
    parser.add_option(
        '-q',
        '--quiet',
        action='store_true',
        dest="quiet",
        default=False,
        help='Silence output',
    )
    parser.add_option(
        '-n',
        '--dry-run',
        action='store_true',
        default=False,
        dest='dryrun',
        help="Dry run - don't kill anything",
    )
    parser.add_option('-i',
                      '--ignore',
                      action='append',
                      dest='ignore',
                      help='Ignore connections by USER',
                      metavar='USER')
    options, args = parser.parse_args()
    if len(args) > 0:
        parser.error('Too many arguments')

    ignore_sql = ' AND usename <> %s' * len(options.ignore or [])

    con = psycopg2.connect(options.connect_string)
    cur = con.cursor()
    cur.execute(("""
        SELECT usename, %(pid)s, backend_start, query_start
        FROM pg_stat_activity
        WHERE %(query)s = '<IDLE> in transaction'
            AND query_start < CURRENT_TIMESTAMP - '%%d seconds'::interval %%s
        ORDER BY %(pid)s
        """ % activity_cols(cur)) % (options.max_idle_seconds, ignore_sql),
                options.ignore)

    rows = cur.fetchall()

    if len(rows) == 0:
        if not options.quiet:
            print 'No IDLE transactions to kill'
            return 0

    for usename, pid, backend_start, query_start in rows:
        print 'Killing %s(%d), %s, %s' % (
            usename,
            pid,
            backend_start,
            query_start,
        )
        if not options.dryrun:
            os.kill(pid, signal.SIGTERM)
    return 0