def liverebuild(con): """Rebuild the data in all the fti columns against possibly live database. """ # Update number of rows per transaction. batch_size = 50 cur = con.cursor() for table, ignored in ALL_FTI: table = quote_identifier(table) cur.execute("SELECT max(id) FROM %s" % table) max_id = cur.fetchone()[0] if max_id is None: log.info("No data in %s - skipping", table) continue log.info("Rebuilding fti column on %s", table) for id in range(0, max_id, batch_size): try: query = """ UPDATE %s SET fti=NULL WHERE id BETWEEN %d AND %d """ % (table, id + 1, id + batch_size) log.debug(query) cur.execute(query) except psycopg2.Error: # No commit - we are in autocommit mode log.exception('psycopg error') con = connect() con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
def main(): parser = OptionParser( '%prog [options] (username|email) [...]' ) db_options(parser) logger_options(parser) (options, args) = parser.parse_args() if len(args) == 0: parser.error("Must specify username (Person.name)") log = logger(options) con = None try: log.debug("Connecting to database") con = connect() for username in args: if not close_account(con, log, username): log.debug("Rolling back") con.rollback() return 1 log.debug("Committing changes") con.commit() return 0 except: log.exception("Unhandled exception") log.debug("Rolling back") if con is not None: con.rollback() return 1
def main(): parser = OptionParser('%prog [options] (username|email) [...]') db_options(parser) logger_options(parser) (options, args) = parser.parse_args() if len(args) == 0: parser.error("Must specify username (Person.name)") log = logger(options) con = None try: log.debug("Connecting to database") con = connect() for username in args: if not close_account(con, log, username): log.debug("Rolling back") con.rollback() return 1 log.debug("Committing changes") con.commit() return 0 except: log.exception("Unhandled exception") log.debug("Rolling back") if con is not None: con.rollback() return 1
def main(): parser = OptionParser() logger_options(parser) db_options(parser) options, args = parser.parse_args() if len(args) > 0: parser.error("Too many arguments.") log = logger(options) log.debug("Connecting") con = connect() con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = con.cursor() cur.execute('show server_version') pg_version = LooseVersion(cur.fetchone()[0]) log.debug("Disabling autovacuum on all tables in the database.") if pg_version < LooseVersion('8.4.0'): cur.execute(""" INSERT INTO pg_autovacuum SELECT pg_class.oid, FALSE, -1,-1,-1,-1,-1,-1,-1,-1 FROM pg_class WHERE relkind in ('r','t') AND pg_class.oid NOT IN (SELECT vacrelid FROM pg_autovacuum) """) else: cur.execute(""" SELECT nspname,relname FROM pg_namespace, pg_class WHERE relnamespace = pg_namespace.oid AND relkind = 'r' AND nspname <> 'pg_catalog' """) for namespace, table in list(cur.fetchall()): cur.execute(""" ALTER TABLE ONLY "%s"."%s" SET ( autovacuum_enabled=false, toast.autovacuum_enabled=false) """ % (namespace, table)) log.debug("Killing existing autovacuum processes") num_autovacuums = -1 while num_autovacuums != 0: # Sleep long enough for pg_stat_activity to be updated. time.sleep(0.6) cur.execute(""" SELECT procpid FROM pg_stat_activity WHERE datname=current_database() AND current_query LIKE 'autovacuum: %' """) autovacuums = [row[0] for row in cur.fetchall()] num_autovacuums = len(autovacuums) for procpid in autovacuums: log.debug("Cancelling %d" % procpid) cur.execute("SELECT pg_cancel_backend(%d)" % procpid)
def setUp(self): # We need some fake options so that this test doesn't try to parse # sys.args. We don't care about the log messages, so just throw them # away. class FakeOptions: log_file = None loglevel = 1000 verbose = False milliseconds = False self.con = connect() self.log = logger(FakeOptions())
def test_connect(self): # Ensure connect() method returns a connection with the correct # default isolation con = connect() self.assertEqual(self.getCurrentIsolation(con), 'read committed') con.rollback() self.assertEqual(self.getCurrentIsolation(con), 'read committed') # Ensure that changing the isolation sticks. con = connect(isolation=ISOLATION_LEVEL_SERIALIZABLE) self.assertEqual(self.getCurrentIsolation(con), 'serializable') con.rollback() self.assertEqual(self.getCurrentIsolation(con), 'serializable') # But on a fresh connection, it works just fine. con = connect() con.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) self.assertEqual(self.getCurrentIsolation(con), 'serializable') con.rollback() self.assertEqual(self.getCurrentIsolation(con), 'serializable')
def test_connect(self): # Ensure connect() method returns a connection with the correct # default isolation con = connect() self.failUnlessEqual(self.getCurrentIsolation(con), 'read committed') con.rollback() self.failUnlessEqual(self.getCurrentIsolation(con), 'read committed') # Ensure that changing the isolation sticks. con = connect(isolation=ISOLATION_LEVEL_SERIALIZABLE) self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable') con.rollback() self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable') # But on a fresh connection, it works just fine. con = connect() con.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable') con.rollback() self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable')
def main(options, master_con=None): # Load the config file config = SafeConfigParser(CONFIG_DEFAULTS) configfile_name = os.path.join(os.path.dirname(__file__), 'security.cfg') config.read([configfile_name]) if master_con is None: master_con = connect() log.info("Resetting permissions.") reset_permissions(master_con, config, options) return 0
def main(options): con = connect() cur = con.cursor() cur.execute(""" SELECT relname FROM pg_class,pg_namespace WHERE pg_class.relnamespace = pg_namespace.oid AND pg_namespace.nspname='public' AND pg_class.relkind = 'r' ORDER BY relname """) for table in (row[0] for row in cur.fetchall()): cur.execute("SELECT TRUE FROM public.%s LIMIT 1" % quote_identifier(table)) if cur.fetchone() is None: print table
def main(options): con = connect() cur = con.cursor() cur.execute(""" SELECT relname FROM pg_class,pg_namespace WHERE pg_class.relnamespace = pg_namespace.oid AND pg_namespace.nspname='public' AND pg_class.relkind = 'r' ORDER BY relname """) for table in (row[0] for row in cur.fetchall()): cur.execute( "SELECT TRUE FROM public.%s LIMIT 1" % quote_identifier(table) ) if cur.fetchone() is None: print table
def main(): parser = OptionParser() logger_options(parser) db_options(parser) options, args = parser.parse_args() if len(args) > 0: parser.error("Too many arguments.") log = logger(options) log.debug("Connecting") con = connect() con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = con.cursor() log.debug("Disabling autovacuum on all tables in the database.") cur.execute(""" SELECT nspname,relname FROM pg_namespace, pg_class WHERE relnamespace = pg_namespace.oid AND relkind = 'r' AND nspname <> 'pg_catalog' """) for namespace, table in list(cur.fetchall()): cur.execute(""" ALTER TABLE ONLY "%s"."%s" SET ( autovacuum_enabled=false, toast.autovacuum_enabled=false) """ % (namespace, table)) log.debug("Killing existing autovacuum processes") num_autovacuums = -1 while num_autovacuums != 0: # Sleep long enough for pg_stat_activity to be updated. time.sleep(0.6) cur.execute(""" SELECT %(pid)s FROM pg_stat_activity WHERE datname=current_database() AND %(query)s LIKE 'autovacuum: %%' """ % activity_cols(cur)) autovacuums = [row[0] for row in cur.fetchall()] num_autovacuums = len(autovacuums) for pid in autovacuums: log.debug("Cancelling %d" % pid) cur.execute("SELECT pg_cancel_backend(%d)" % pid)
def setUp(self): super(TestLibrarianGarbageCollection, self).setUp() self.client = LibrarianClient() self.patch(librariangc, 'log', BufferLogger()) # A value we use in a number of tests. This represents the # stay of execution hard coded into the garbage collector. # We don't destroy any data unless it has been waiting to be # destroyed for longer than this period. We pick a value # that is close enough to the stay of execution so that # forgetting timezone information will break things, but # far enough so that how long it takes the test to run # is not an issue. 'stay_of_excution - 1 hour' fits these # criteria. self.recent_past = utc_now() - timedelta(days=6, hours=23) # A time beyond the stay of execution. self.ancient_past = utc_now() - timedelta(days=30) self.f1_id, self.f2_id = self._makeDupes() switch_dbuser(config.librarian_gc.dbuser) self.ztm = self.layer.txn # Make sure the files exist. We do this in setup, because we # need to use the get_file_path method later in the setup and we # want to be sure it is working correctly. path = librariangc.get_file_path(self.f1_id) self.failUnless(os.path.exists(path), "Librarian uploads failed") # Make sure that every file the database knows about exists on disk. # We manually remove them for tests that need to cope with missing # library items. self.ztm.begin() cur = cursor() cur.execute("SELECT id FROM LibraryFileContent") for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) if not os.path.exists(path): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) open(path, 'w').write('whatever') self.ztm.abort() self.con = connect( user=config.librarian_gc.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
def main(con=None): if con is None: con = connect() patches = get_patchlist(con) log.info("Applying patches.") apply_patches_normal(con) report_patch_times(con, patches) # Commit changes if options.commit: log.debug("Committing changes") con.commit() return 0
def preamble(con=None): """Return the preable needed at the start of all slonik scripts.""" if con is None: con = connect(user='******') master_node = get_master_node(con) nodes = get_all_cluster_nodes(con) if master_node is None and len(nodes) == 1: master_node = nodes[0] preamble = [ dedent("""\ # # Every slonik script must start with a clustername, which cannot # be changed once the cluster is initialized. # cluster name = sl; # Symbolic ids for replication sets. define lpmain_set %d; define holding_set %d; define sso_set %d; define lpmirror_set %d; """ % (LPMAIN_SET_ID, HOLDING_SET_ID, SSO_SET_ID, LPMIRROR_SET_ID)) ] if master_node is not None: preamble.append( dedent("""\ # Symbolic id for the main replication set master node. define master_node %d; define master_node_conninfo '%s'; """ % (master_node.node_id, master_node.connection_string))) for node in nodes: preamble.append( dedent("""\ define %s %d; define %s_conninfo '%s'; node @%s admin conninfo = @%s_conninfo; """ % (node.nickname, node.node_id, node.nickname, node.connection_string, node.nickname, node.nickname))) return '\n\n'.join(preamble)
def preamble(con=None): """Return the preable needed at the start of all slonik scripts.""" if con is None: con = connect(user='******') master_node = get_master_node(con) nodes = get_all_cluster_nodes(con) if master_node is None and len(nodes) == 1: master_node = nodes[0] preamble = [dedent("""\ # # Every slonik script must start with a clustername, which cannot # be changed once the cluster is initialized. # cluster name = sl; # Symbolic ids for replication sets. define lpmain_set %d; define holding_set %d; define sso_set %d; define lpmirror_set %d; """ % (LPMAIN_SET_ID, HOLDING_SET_ID, SSO_SET_ID, LPMIRROR_SET_ID))] if master_node is not None: preamble.append(dedent("""\ # Symbolic id for the main replication set master node. define master_node %d; define master_node_conninfo '%s'; """ % (master_node.node_id, master_node.connection_string))) for node in nodes: preamble.append(dedent("""\ define %s %d; define %s_conninfo '%s'; node @%s admin conninfo = @%s_conninfo; """ % ( node.nickname, node.node_id, node.nickname, node.connection_string, node.nickname, node.nickname))) return '\n\n'.join(preamble)
def main(): parser = OptionParser() parser.add_option( "-0", "--null", dest="null", action="store_true", default=False, help="Set all full text index column values to NULL.", ) parser.add_option( "-l", "--live-rebuild", dest="liverebuild", action="store_true", default=False, help="Rebuild all the indexes against a live database.", ) db_options(parser) logger_options(parser) global options, args (options, args) = parser.parse_args() if options.null + options.liverebuild > 1: parser.error("Incompatible options") global log log = logger(options) con = connect() if options.liverebuild: con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) liverebuild(con) elif options.null: con.set_isolation_level(ISOLATION_LEVEL_READ_COMMITTED) nullify(con) else: parser.error("Required argument not specified") con.commit() return 0
def main(): # XXX: Tom Haddon 2007-07-12 # There's a lot of untested stuff here: parsing options and sending # emails - this should be moved into a testable location. # Also duplicated code in scripts/script-monitor-nagios.py parser = OptionParser( '%prog [options] (minutes) (host:scriptname) [host:scriptname]') db_options(parser) logger_options(parser) (options, args) = parser.parse_args() if len(args) < 2: parser.error("Must specify at time in minutes and " "at least one host and script") # First argument is the number of minutes into the past # we want to look for the scripts on the specified hosts try: minutes_ago, args = int(args[0]), args[1:] start_date = datetime.now() - timedelta(minutes=minutes_ago) completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple()) completed_to = strftime("%Y-%m-%d %H:%M:%S", datetime.now().timetuple()) hosts_scripts = [] for arg in args: try: hostname, scriptname = arg.split(':') except TypeError: parser.error("%r is not in the format 'host:scriptname'" % (arg, )) hosts_scripts.append((hostname, scriptname)) except ValueError: parser.error("Must specify time in minutes and " "at least one host and script") log = logger(options) try: log.debug("Connecting to database") con = connect() error_found = False msg, subj = [], [] for hostname, scriptname in hosts_scripts: failure_msg = check_script(con, log, hostname, scriptname, completed_from, completed_to) if failure_msg is not None: msg.append(failure_msg) subj.append("%s:%s" % (hostname, scriptname)) error_found = 2 if error_found: # Construct our email. msg = MIMEText('\n'.join(msg)) msg['Subject'] = "Scripts failed to run: %s" % ", ".join(subj) msg['From'] = '*****@*****.**' msg['Reply-To'] = '*****@*****.**' msg['To'] = '*****@*****.**' # Send out the email. smtp = smtplib.SMTP() smtp.connect() smtp.sendmail('*****@*****.**', ['*****@*****.**'], msg.as_string()) smtp.close() return 2 except: log.exception("Unhandled exception") return 1
def main(): parser = OptionParser() db_options(parser) parser.add_option("-f", "--from", dest="from_date", default=None, metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)") parser.add_option("-u", "--until", dest="until_date", default=None, metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)") options, args = parser.parse_args() if len(args) > 0: parser.error("Too many command line arguments.") # Handle date filters. We use LibraryFileContent.datecreated rather # than LibraryFileAlias.datecreated as this report is about actual # disk space usage. A new row in the database linking to a # previously existing file in the Librarian takes up no new space. if options.from_date is not None: from_date = 'AND LFC.datecreated >= %s' % sqlvalues(options.from_date) else: from_date = '' if options.until_date is not None: until_date = 'AND LFC.datecreated <= %s' % sqlvalues( options.until_date) else: until_date = '' con = connect() cur = con.cursor() # Collect direct references to the LibraryFileAlias table. references = set( (from_table, from_column) # Note that listReferences is recursive, which we don't # care about in this simple report. We also ignore the # irrelevant constraint type update and delete flags. for from_table, from_column, to_table, to_column, update, delete in listReferences(cur, 'libraryfilealias', 'id') if to_table == 'libraryfilealias') totals = set() for referring_table, referring_column in sorted(references): if referring_table == 'libraryfiledownloadcount': continue quoted_referring_table = quoteIdentifier(referring_table) quoted_referring_column = quoteIdentifier(referring_column) cur.execute(""" SELECT COALESCE(SUM(filesize), 0), pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)), COUNT(*) FROM ( SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s WHERE LFC.id = LFA.content AND LFA.id = %s.%s AND ( LFA.expires IS NULL OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC') %s %s ORDER BY LFC.id ) AS Whatever """ % (quoted_referring_table, quoted_referring_table, quoted_referring_column, from_date, until_date)) total_bytes, formatted_size, num_files = cur.fetchone() totals.add((total_bytes, referring_table, formatted_size, num_files)) for total_bytes, tab_name, formatted_size, num_files in sorted( totals, reverse=True): print '%-10s %s in %d files' % (formatted_size, tab_name, num_files) return 0
def main(): parser = LPOptionParser() db_options(parser) parser.add_option("-f", "--from", dest="from_ts", type=datetime, default=None, metavar="TIMESTAMP", help="Use statistics collected since TIMESTAMP.") parser.add_option("-u", "--until", dest="until_ts", type=datetime, default=None, metavar="TIMESTAMP", help="Use statistics collected up until TIMESTAMP.") parser.add_option( "-i", "--interval", dest="interval", type=str, default=None, metavar="INTERVAL", help=("Use statistics collected over the last INTERVAL period. " "INTERVAL is a string parsable by PostgreSQL " "such as '5 minutes'.")) parser.add_option("-n", "--limit", dest="limit", type=int, default=15, metavar="NUM", help="Display the top NUM items in each category.") parser.add_option( "-b", "--bloat", dest="bloat", type=float, default=40, metavar="BLOAT", help="Display tables and indexes bloated by more than BLOAT%.") parser.add_option( "--min-bloat", dest="min_bloat", type=int, default=10000000, metavar="BLOAT", help="Don't report tables bloated less than BLOAT bytes.") parser.set_defaults(dbuser="******") options, args = parser.parse_args() if options.from_ts and options.until_ts and options.interval: parser.error( "Only two of --from, --until and --interval may be specified.") con = connect() cur = con.cursor() tables = list(get_table_stats(cur, options)) if len(tables) == 0: parser.error("No statistics available in that time range.") arbitrary_table = tables[0] interval = arbitrary_table.date_end - arbitrary_table.date_start per_second = float(interval.days * 24 * 60 * 60 + interval.seconds) if per_second == 0: parser.error("Only one sample in that time range.") user_cpu = get_cpu_stats(cur, options) print "== Most Active Users ==" print for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]: print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10) print print "== Most Written Tables ==" print tables_sort = [ 'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname' ] most_written_tables = sorted(tables, key=attrgetter(*tables_sort), reverse=True) for table in most_written_tables[:options.limit]: print "%40s || %10.2f tuples/sec" % ( table.relname, table.total_tup_written / per_second) print print "== Most Read Tables ==" print # These match the pg_user_table_stats view. schemaname is the # namespace (normally 'public'), relname is the table (relation) # name. total_tup_red is the total number of rows read. # idx_tup_fetch is the number of rows looked up using an index. tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname'] most_read_tables = sorted(tables, key=attrgetter(*tables_sort), reverse=True) for table in most_read_tables[:options.limit]: print "%40s || %10.2f tuples/sec" % (table.relname, table.total_tup_read / per_second) table_bloat_stats = get_bloat_stats(cur, options, 'r') if not table_bloat_stats: print print "(There is no bloat information available in this time range.)" else: print print "== Most Bloated Tables ==" print for bloated_table in table_bloat_stats[:options.limit]: print "%40s || %2d%% || %s of %s" % ( bloated_table.name, bloated_table.end_bloat_percent, bloated_table.bloat_size, bloated_table.table_size) index_bloat_stats = get_bloat_stats(cur, options, 'i') print print "== Most Bloated Indexes ==" print for bloated_index in index_bloat_stats[:options.limit]: print "%65s || %2d%% || %s of %s" % ( bloated_index.sub_name, bloated_index.end_bloat_percent, bloated_index.bloat_size, bloated_index.table_size) # Order bloat delta report by size of bloat increase. # We might want to change this to percentage bloat increase. bloating_sort_key = lambda x: x.delta_bloat_len table_bloating_stats = sorted(table_bloat_stats, key=bloating_sort_key, reverse=True) if table_bloating_stats[0].num_samples <= 1: print print fill( dedent("""\ (There are not enough samples in this time range to display bloat change statistics) """)) else: print print "== Most Bloating Tables ==" print for bloated_table in table_bloating_stats[:options.limit]: # Bloat decreases are uninteresting, and would need to be in # a separate table sorted in reverse anyway. if bloated_table.delta_bloat_percent > 0: print "%40s || +%4.2f%% || +%s" % ( bloated_table.name, bloated_table.delta_bloat_percent, bloated_table.delta_bloat_size) index_bloating_stats = sorted(index_bloat_stats, key=bloating_sort_key, reverse=True) print print "== Most Bloating Indexes ==" print for bloated_index in index_bloating_stats[:options.limit]: # Bloat decreases are uninteresting, and would need to be in # a separate table sorted in reverse anyway. if bloated_index.delta_bloat_percent > 0: print "%65s || +%4.2f%% || +%s" % ( bloated_index.sub_name, bloated_index.delta_bloat_percent, bloated_index.delta_bloat_size)
The sampledata does not update the current values of all the sequences used to populate the primary keys (this was removed to aid in merging changes to the sampledata). This script resets all of these sequences to the correct value based on the maximum value currently found in the corresponding table. """ __metaclass__ = type import _pythonpath from optparse import OptionParser from lp.services.database.postgresql import resetSequences from lp.services.database.sqlbase import connect from lp.services.scripts import db_options if __name__ == '__main__': parser = OptionParser() db_options(parser) (options, args) = parser.parse_args() if args: parser.error("Too many options given") if not options.dbname: parser.error("Required option --dbname not given") con = connect() resetSequences(con.cursor()) con.commit()
def main(): parser = OptionParser() db_options(parser) parser.add_option( "-f", "--from", dest="from_date", default=None, metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)") parser.add_option( "-u", "--until", dest="until_date", default=None, metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)") options, args = parser.parse_args() if len(args) > 0: parser.error("Too many command line arguments.") # Handle date filters. We use LibraryFileContent.datecreated rather # than LibraryFileAlias.datecreated as this report is about actual # disk space usage. A new row in the database linking to a # previously existing file in the Librarian takes up no new space. if options.from_date is not None: from_date = 'AND LFC.datecreated >= %s' % sqlvalues( options.from_date) else: from_date = '' if options.until_date is not None: until_date = 'AND LFC.datecreated <= %s' % sqlvalues( options.until_date) else: until_date = '' con = connect() cur = con.cursor() # Collect direct references to the LibraryFileAlias table. references = set( (from_table, from_column) # Note that listReferences is recursive, which we don't # care about in this simple report. We also ignore the # irrelevant constraint type update and delete flags. for from_table, from_column, to_table, to_column, update, delete in listReferences(cur, 'libraryfilealias', 'id') if to_table == 'libraryfilealias' ) totals = set() for referring_table, referring_column in sorted(references): if referring_table == 'libraryfiledownloadcount': continue quoted_referring_table = quoteIdentifier(referring_table) quoted_referring_column = quoteIdentifier(referring_column) cur.execute(""" SELECT COALESCE(SUM(filesize), 0), pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)), COUNT(*) FROM ( SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s WHERE LFC.id = LFA.content AND LFA.id = %s.%s AND ( LFA.expires IS NULL OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC') %s %s ORDER BY LFC.id ) AS Whatever """ % ( quoted_referring_table, quoted_referring_table, quoted_referring_column, from_date, until_date)) total_bytes, formatted_size, num_files = cur.fetchone() totals.add((total_bytes, referring_table, formatted_size, num_files)) for total_bytes, tab_name, formatted_size, num_files in sorted( totals, reverse=True): print '%-10s %s in %d files' % (formatted_size, tab_name, num_files) return 0
# GNU Affero General Public License version 3 (see the file LICENSE). """Generate a preamble for slonik(1) scripts based on the current LPCONFIG. """ __metaclass__ = type __all__ = [] import _pythonpath from optparse import OptionParser import time from lp.services import scripts from lp.services.config import config from lp.services.database.sqlbase import connect import replication.helpers if __name__ == '__main__': parser = OptionParser() scripts.db_options(parser) (options, args) = parser.parse_args() if args: parser.error("Too many arguments") scripts.execute_zcml_for_scripts(use_web_security=False) con = connect() print '# slonik(1) preamble generated %s' % time.ctime() print '# LPCONFIG=%s' % config.instance_name print print replication.helpers.preamble(con)
def main(): # XXX: Tom Haddon 2007-07-12 # There's a lot of untested stuff here: parsing options and sending # emails - this should be moved into a testable location. # Also duplicated code in scripts/script-monitor-nagios.py parser = OptionParser( '%prog [options] (minutes) (host:scriptname) [host:scriptname]' ) db_options(parser) logger_options(parser) (options, args) = parser.parse_args() if len(args) < 2: parser.error("Must specify at time in minutes and " "at least one host and script") # First argument is the number of minutes into the past # we want to look for the scripts on the specified hosts try: minutes_ago, args = int(args[0]), args[1:] start_date = datetime.now() - timedelta(minutes=minutes_ago) completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple()) completed_to = strftime( "%Y-%m-%d %H:%M:%S", datetime.now().timetuple()) hosts_scripts = [] for arg in args: try: hostname, scriptname = arg.split(':') except TypeError: parser.error( "%r is not in the format 'host:scriptname'" % (arg,)) hosts_scripts.append((hostname, scriptname)) except ValueError: parser.error("Must specify time in minutes and " "at least one host and script") log = logger(options) try: log.debug("Connecting to database") con = connect() error_found = False msg, subj = [], [] for hostname, scriptname in hosts_scripts: failure_msg = check_script(con, log, hostname, scriptname, completed_from, completed_to) if failure_msg is not None: msg.append(failure_msg) subj.append("%s:%s" % (hostname, scriptname)) error_found = 2 if error_found: # Construct our email. msg = MIMEText('\n'.join(msg)) msg['Subject'] = "Scripts failed to run: %s" % ", ".join(subj) msg['From'] = '*****@*****.**' msg['Reply-To'] = '*****@*****.**' msg['To'] = '*****@*****.**' # Send out the email. smtp = smtplib.SMTP() smtp.connect() smtp.sendmail( '*****@*****.**', ['*****@*****.**'], msg.as_string()) smtp.close() return 2 except: log.exception("Unhandled exception") return 1
def setUp(self): super(TestBlobCollection, self).setUp() # Add in some sample data cur = cursor() # First a blob that has been unclaimed and expired. cur.execute(""" INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256) VALUES (666, 'whatever', 'whatever', 'whatever') """) cur.execute("""SELECT currval('libraryfilecontent_id_seq')""") self.expired_lfc_id = cur.fetchone()[0] cur.execute(""" INSERT INTO LibraryFileAlias ( content, filename, mimetype, expires) VALUES ( %s, 'whatever', 'whatever', CURRENT_TIMESTAMP - '1 day'::interval ) """, (self.expired_lfc_id,)) cur.execute("""SELECT currval('libraryfilealias_id_seq')""") self.expired_lfa_id = cur.fetchone()[0] cur.execute(""" INSERT INTO TemporaryBlobStorage (uuid, file_alias) VALUES ('uuid', %s) """, (self.expired_lfa_id,)) cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""") self.expired_blob_id = cur.fetchone()[0] # Add ApportJob and Job entries - these need to be removed # too. cur.execute(""" INSERT INTO Job (status, date_finished) VALUES (0, CURRENT_TIMESTAMP - interval '2 days') RETURNING id """) self.expired_job_id = cur.fetchone()[0] cur.execute(""" INSERT INTO ApportJob (job, blob, job_type) VALUES (%s, %s, 0) RETURNING id """, (self.expired_job_id, self.expired_blob_id)) self.expired_apportjob_id = cur.fetchone()[0] # Next a blob that has expired, but claimed and now linked to # elsewhere in the database cur.execute(""" INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256) VALUES (666, 'whatever', 'whatever', 'whatever') """) cur.execute("""SELECT currval('libraryfilecontent_id_seq')""") self.expired2_lfc_id = cur.fetchone()[0] cur.execute(""" INSERT INTO LibraryFileAlias ( content, filename, mimetype, expires) VALUES ( %s, 'whatever', 'whatever', CURRENT_TIMESTAMP - '1 day'::interval ) """, (self.expired2_lfc_id,)) cur.execute("""SELECT currval('libraryfilealias_id_seq')""") self.expired2_lfa_id = cur.fetchone()[0] cur.execute(""" INSERT INTO TemporaryBlobStorage (uuid, file_alias) VALUES ('uuid2', %s) """, (self.expired2_lfa_id,)) cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""") self.expired2_blob_id = cur.fetchone()[0] # Link it somewhere else, unexpired cur.execute(""" INSERT INTO LibraryFileAlias (content, filename, mimetype) VALUES (%s, 'whatever', 'whatever') """, (self.expired2_lfc_id,)) cur.execute(""" UPDATE Person SET mugshot=currval('libraryfilealias_id_seq') WHERE name='stub' """) # And a non expired blob cur.execute(""" INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256) VALUES (666, 'whatever', 'whatever', 'whatever') """) cur.execute("""SELECT currval('libraryfilecontent_id_seq')""") self.unexpired_lfc_id = cur.fetchone()[0] cur.execute(""" INSERT INTO LibraryFileAlias ( content, filename, mimetype, expires) VALUES ( %s, 'whatever', 'whatever', CURRENT_TIMESTAMP + '1 day'::interval ) """, (self.unexpired_lfc_id,)) cur.execute("""SELECT currval('libraryfilealias_id_seq')""") self.unexpired_lfa_id = cur.fetchone()[0] cur.execute(""" INSERT INTO TemporaryBlobStorage (uuid, file_alias) VALUES ('uuid3', %s) """, (self.unexpired_lfa_id,)) cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""") self.unexpired_blob_id = cur.fetchone()[0] self.layer.txn.commit() # Make sure all the librarian files actually exist on disk cur = cursor() cur.execute("SELECT id FROM LibraryFileContent") for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) if not os.path.exists(path): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) open(path, 'w').write('whatever') self.layer.txn.abort() switch_dbuser(config.librarian_gc.dbuser) # Open a connection for our test self.con = connect( user=config.librarian_gc.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT) self.patch(librariangc, 'log', BufferLogger())
def main(): parser = LPOptionParser() db_options(parser) parser.add_option( "-f", "--from", dest="from_ts", type=datetime, default=None, metavar="TIMESTAMP", help="Use statistics collected since TIMESTAMP.") parser.add_option( "-u", "--until", dest="until_ts", type=datetime, default=None, metavar="TIMESTAMP", help="Use statistics collected up until TIMESTAMP.") parser.add_option( "-i", "--interval", dest="interval", type=str, default=None, metavar="INTERVAL", help=( "Use statistics collected over the last INTERVAL period. " "INTERVAL is a string parsable by PostgreSQL " "such as '5 minutes'.")) parser.add_option( "-n", "--limit", dest="limit", type=int, default=15, metavar="NUM", help="Display the top NUM items in each category.") parser.add_option( "-b", "--bloat", dest="bloat", type=float, default=40, metavar="BLOAT", help="Display tables and indexes bloated by more than BLOAT%.") parser.add_option( "--min-bloat", dest="min_bloat", type=int, default=10000000, metavar="BLOAT", help="Don't report tables bloated less than BLOAT bytes.") parser.set_defaults(dbuser="******") options, args = parser.parse_args() if options.from_ts and options.until_ts and options.interval: parser.error( "Only two of --from, --until and --interval may be specified.") con = connect() cur = con.cursor() tables = list(get_table_stats(cur, options)) if len(tables) == 0: parser.error("No statistics available in that time range.") arbitrary_table = tables[0] interval = arbitrary_table.date_end - arbitrary_table.date_start per_second = float(interval.days * 24 * 60 * 60 + interval.seconds) if per_second == 0: parser.error("Only one sample in that time range.") user_cpu = get_cpu_stats(cur, options) print "== Most Active Users ==" print for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]: print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10) print print "== Most Written Tables ==" print tables_sort = [ 'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname'] most_written_tables = sorted( tables, key=attrgetter(*tables_sort), reverse=True) for table in most_written_tables[:options.limit]: print "%40s || %10.2f tuples/sec" % ( table.relname, table.total_tup_written / per_second) print print "== Most Read Tables ==" print # These match the pg_user_table_stats view. schemaname is the # namespace (normally 'public'), relname is the table (relation) # name. total_tup_red is the total number of rows read. # idx_tup_fetch is the number of rows looked up using an index. tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname'] most_read_tables = sorted( tables, key=attrgetter(*tables_sort), reverse=True) for table in most_read_tables[:options.limit]: print "%40s || %10.2f tuples/sec" % ( table.relname, table.total_tup_read / per_second) table_bloat_stats = get_bloat_stats(cur, options, 'r') if not table_bloat_stats: print print "(There is no bloat information available in this time range.)" else: print print "== Most Bloated Tables ==" print for bloated_table in table_bloat_stats[:options.limit]: print "%40s || %2d%% || %s of %s" % ( bloated_table.name, bloated_table.end_bloat_percent, bloated_table.bloat_size, bloated_table.table_size) index_bloat_stats = get_bloat_stats(cur, options, 'i') print print "== Most Bloated Indexes ==" print for bloated_index in index_bloat_stats[:options.limit]: print "%65s || %2d%% || %s of %s" % ( bloated_index.sub_name, bloated_index.end_bloat_percent, bloated_index.bloat_size, bloated_index.table_size) # Order bloat delta report by size of bloat increase. # We might want to change this to percentage bloat increase. bloating_sort_key = lambda x: x.delta_bloat_len table_bloating_stats = sorted( table_bloat_stats, key=bloating_sort_key, reverse=True) if table_bloating_stats[0].num_samples <= 1: print print fill(dedent("""\ (There are not enough samples in this time range to display bloat change statistics) """)) else: print print "== Most Bloating Tables ==" print for bloated_table in table_bloating_stats[:options.limit]: # Bloat decreases are uninteresting, and would need to be in # a separate table sorted in reverse anyway. if bloated_table.delta_bloat_percent > 0: print "%40s || +%4.2f%% || +%s" % ( bloated_table.name, bloated_table.delta_bloat_percent, bloated_table.delta_bloat_size) index_bloating_stats = sorted( index_bloat_stats, key=bloating_sort_key, reverse=True) print print "== Most Bloating Indexes ==" print for bloated_index in index_bloating_stats[:options.limit]: # Bloat decreases are uninteresting, and would need to be in # a separate table sorted in reverse anyway. if bloated_index.delta_bloat_percent > 0: print "%65s || +%4.2f%% || +%s" % ( bloated_index.sub_name, bloated_index.delta_bloat_percent, bloated_index.delta_bloat_size)
def main(): # XXX: Tom Haddon 2007-07-12 # There's a lot of untested stuff here: parsing options - # this should be moved into a testable location. # Also duplicated code in scripts/script-monitor.py parser = OptionParser( '%prog [options] (minutes) (host:scriptname) [host:scriptname]' ) db_options(parser) logger_options(parser) (options, args) = parser.parse_args() if len(args) < 2: print "Must specify time in minutes and " \ "at least one host and script" return 3 # First argument is the number of minutes into the past # we want to look for the scripts on the specified hosts try: minutes_ago, args = int(args[0]), args[1:] start_date = datetime.now() - timedelta(minutes=minutes_ago) completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple()) completed_to = strftime( "%Y-%m-%d %H:%M:%S", datetime.now().timetuple()) hosts_scripts = [] for arg in args: try: hostname, scriptname = arg.split(':') except TypeError: print "%r is not in the format 'host:scriptname'" % arg return 3 hosts_scripts.append((hostname, scriptname)) except ValueError: print "Must specify time in minutes and " \ "at least one host and script" return 3 log = logger(options) try: log.debug("Connecting to database") con = connect() error_found = False msg = [] for hostname, scriptname in hosts_scripts: failure_msg = check_script(con, log, hostname, scriptname, completed_from, completed_to) if failure_msg is not None: msg.append("%s:%s" % (hostname, scriptname)) error_found = True if error_found: # Construct our return message print "Scripts failed to run: %s" % ', '.join(msg) return 2 else: # Construct our return message print "All scripts ran as expected" return 0 except Exception as e: # Squeeze the exception type and stringification of the exception # value on to one line. print "Unhandled exception: %s %r" % (e.__class__.__name__, str(e)) return 3