Esempio n. 1
0
def liverebuild(con):
    """Rebuild the data in all the fti columns against possibly live database.
    """
    # Update number of rows per transaction.
    batch_size = 50

    cur = con.cursor()
    for table, ignored in ALL_FTI:
        table = quote_identifier(table)
        cur.execute("SELECT max(id) FROM %s" % table)
        max_id = cur.fetchone()[0]
        if max_id is None:
            log.info("No data in %s - skipping", table)
            continue

        log.info("Rebuilding fti column on %s", table)
        for id in range(0, max_id, batch_size):
            try:
                query = """
                    UPDATE %s SET fti=NULL WHERE id BETWEEN %d AND %d
                    """ % (table, id + 1, id + batch_size)
                log.debug(query)
                cur.execute(query)
            except psycopg2.Error:
                # No commit - we are in autocommit mode
                log.exception('psycopg error')
                con = connect()
                con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
Esempio n. 2
0
def liverebuild(con):
    """Rebuild the data in all the fti columns against possibly live database.
    """
    # Update number of rows per transaction.
    batch_size = 50

    cur = con.cursor()
    for table, ignored in ALL_FTI:
        table = quote_identifier(table)
        cur.execute("SELECT max(id) FROM %s" % table)
        max_id = cur.fetchone()[0]
        if max_id is None:
            log.info("No data in %s - skipping", table)
            continue

        log.info("Rebuilding fti column on %s", table)
        for id in range(0, max_id, batch_size):
            try:
                query = """
                    UPDATE %s SET fti=NULL WHERE id BETWEEN %d AND %d
                    """ % (table, id + 1, id + batch_size)
                log.debug(query)
                cur.execute(query)
            except psycopg2.Error:
                # No commit - we are in autocommit mode
                log.exception('psycopg error')
                con = connect()
                con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
Esempio n. 3
0
def main():
    parser = OptionParser(
            '%prog [options] (username|email) [...]'
            )
    db_options(parser)
    logger_options(parser)

    (options, args) = parser.parse_args()

    if len(args) == 0:
        parser.error("Must specify username (Person.name)")

    log = logger(options)

    con = None
    try:
        log.debug("Connecting to database")
        con = connect()
        for username in args:
            if not close_account(con, log, username):
                log.debug("Rolling back")
                con.rollback()
                return 1
        log.debug("Committing changes")
        con.commit()
        return 0
    except:
        log.exception("Unhandled exception")
        log.debug("Rolling back")
        if con is not None:
            con.rollback()
        return 1
Esempio n. 4
0
def main():
    parser = OptionParser('%prog [options] (username|email) [...]')
    db_options(parser)
    logger_options(parser)

    (options, args) = parser.parse_args()

    if len(args) == 0:
        parser.error("Must specify username (Person.name)")

    log = logger(options)

    con = None
    try:
        log.debug("Connecting to database")
        con = connect()
        for username in args:
            if not close_account(con, log, username):
                log.debug("Rolling back")
                con.rollback()
                return 1
        log.debug("Committing changes")
        con.commit()
        return 0
    except:
        log.exception("Unhandled exception")
        log.debug("Rolling back")
        if con is not None:
            con.rollback()
        return 1
def main():
    parser = OptionParser()
    logger_options(parser)
    db_options(parser)

    options, args = parser.parse_args()

    if len(args) > 0:
        parser.error("Too many arguments.")

    log = logger(options)

    log.debug("Connecting")
    con = connect()
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()

    cur.execute('show server_version')
    pg_version = LooseVersion(cur.fetchone()[0])

    log.debug("Disabling autovacuum on all tables in the database.")
    if pg_version < LooseVersion('8.4.0'):
        cur.execute("""
            INSERT INTO pg_autovacuum
            SELECT pg_class.oid, FALSE, -1,-1,-1,-1,-1,-1,-1,-1
            FROM pg_class
            WHERE relkind in ('r','t')
                AND pg_class.oid NOT IN (SELECT vacrelid FROM pg_autovacuum)
            """)
    else:
        cur.execute("""
            SELECT nspname,relname
            FROM pg_namespace, pg_class
            WHERE relnamespace = pg_namespace.oid
                AND relkind = 'r' AND nspname <> 'pg_catalog'
            """)
        for namespace, table in list(cur.fetchall()):
            cur.execute("""
                ALTER TABLE ONLY "%s"."%s" SET (
                    autovacuum_enabled=false,
                    toast.autovacuum_enabled=false)
                """ % (namespace, table))

    log.debug("Killing existing autovacuum processes")
    num_autovacuums = -1
    while num_autovacuums != 0:
        # Sleep long enough for pg_stat_activity to be updated.
        time.sleep(0.6)
        cur.execute("""
            SELECT procpid FROM pg_stat_activity
            WHERE
                datname=current_database()
                AND current_query LIKE 'autovacuum: %'
            """)
        autovacuums = [row[0] for row in cur.fetchall()]
        num_autovacuums = len(autovacuums)
        for procpid in autovacuums:
            log.debug("Cancelling %d" % procpid)
            cur.execute("SELECT pg_cancel_backend(%d)" % procpid)
Esempio n. 6
0
def main():
    parser = OptionParser()
    logger_options(parser)
    db_options(parser)

    options, args = parser.parse_args()

    if len(args) > 0:
        parser.error("Too many arguments.")

    log = logger(options)

    log.debug("Connecting")
    con = connect()
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()

    cur.execute('show server_version')
    pg_version = LooseVersion(cur.fetchone()[0])

    log.debug("Disabling autovacuum on all tables in the database.")
    if pg_version < LooseVersion('8.4.0'):
        cur.execute("""
            INSERT INTO pg_autovacuum
            SELECT pg_class.oid, FALSE, -1,-1,-1,-1,-1,-1,-1,-1
            FROM pg_class
            WHERE relkind in ('r','t')
                AND pg_class.oid NOT IN (SELECT vacrelid FROM pg_autovacuum)
            """)
    else:
        cur.execute("""
            SELECT nspname,relname
            FROM pg_namespace, pg_class
            WHERE relnamespace = pg_namespace.oid
                AND relkind = 'r' AND nspname <> 'pg_catalog'
            """)
        for namespace, table in list(cur.fetchall()):
            cur.execute("""
                ALTER TABLE ONLY "%s"."%s" SET (
                    autovacuum_enabled=false,
                    toast.autovacuum_enabled=false)
                """ % (namespace, table))

    log.debug("Killing existing autovacuum processes")
    num_autovacuums = -1
    while num_autovacuums != 0:
        # Sleep long enough for pg_stat_activity to be updated.
        time.sleep(0.6)
        cur.execute("""
            SELECT procpid FROM pg_stat_activity
            WHERE
                datname=current_database()
                AND current_query LIKE 'autovacuum: %'
            """)
        autovacuums = [row[0] for row in cur.fetchall()]
        num_autovacuums = len(autovacuums)
        for procpid in autovacuums:
            log.debug("Cancelling %d" % procpid)
            cur.execute("SELECT pg_cancel_backend(%d)" % procpid)
 def setUp(self):
     # We need some fake options so that this test doesn't try to parse
     # sys.args.  We don't care about the log messages, so just throw them
     # away.
     class FakeOptions:
         log_file = None
         loglevel = 1000
         verbose = False
         milliseconds = False
     self.con = connect()
     self.log = logger(FakeOptions())
    def test_connect(self):
        # Ensure connect() method returns a connection with the correct
        # default isolation
        con = connect()
        self.assertEqual(self.getCurrentIsolation(con), 'read committed')
        con.rollback()
        self.assertEqual(self.getCurrentIsolation(con), 'read committed')

        # Ensure that changing the isolation sticks.
        con = connect(isolation=ISOLATION_LEVEL_SERIALIZABLE)
        self.assertEqual(self.getCurrentIsolation(con), 'serializable')
        con.rollback()
        self.assertEqual(self.getCurrentIsolation(con), 'serializable')

        # But on a fresh connection, it works just fine.
        con = connect()
        con.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
        self.assertEqual(self.getCurrentIsolation(con), 'serializable')
        con.rollback()
        self.assertEqual(self.getCurrentIsolation(con), 'serializable')
    def test_connect(self):
        # Ensure connect() method returns a connection with the correct
        # default isolation
        con = connect()
        self.failUnlessEqual(self.getCurrentIsolation(con), 'read committed')
        con.rollback()
        self.failUnlessEqual(self.getCurrentIsolation(con), 'read committed')

        # Ensure that changing the isolation sticks.
        con = connect(isolation=ISOLATION_LEVEL_SERIALIZABLE)
        self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable')
        con.rollback()
        self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable')

        # But on a fresh connection, it works just fine.
        con = connect()
        con.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
        self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable')
        con.rollback()
        self.failUnlessEqual(self.getCurrentIsolation(con), 'serializable')
Esempio n. 10
0
def main(options, master_con=None):
    # Load the config file
    config = SafeConfigParser(CONFIG_DEFAULTS)
    configfile_name = os.path.join(os.path.dirname(__file__), 'security.cfg')
    config.read([configfile_name])

    if master_con is None:
        master_con = connect()

    log.info("Resetting permissions.")
    reset_permissions(master_con, config, options)
    return 0
Esempio n. 11
0
    def setUp(self):
        # We need some fake options so that this test doesn't try to parse
        # sys.args.  We don't care about the log messages, so just throw them
        # away.
        class FakeOptions:
            log_file = None
            loglevel = 1000
            verbose = False
            milliseconds = False

        self.con = connect()
        self.log = logger(FakeOptions())
Esempio n. 12
0
def main(options):
    con = connect()
    cur = con.cursor()
    cur.execute("""
        SELECT relname FROM pg_class,pg_namespace
        WHERE pg_class.relnamespace = pg_namespace.oid
            AND pg_namespace.nspname='public'
            AND pg_class.relkind = 'r'
        ORDER BY relname
        """)
    for table in (row[0] for row in cur.fetchall()):
        cur.execute("SELECT TRUE FROM public.%s LIMIT 1" %
                    quote_identifier(table))
        if cur.fetchone() is None:
            print table
Esempio n. 13
0
def main(options):
    con = connect()
    cur = con.cursor()
    cur.execute("""
        SELECT relname FROM pg_class,pg_namespace
        WHERE pg_class.relnamespace = pg_namespace.oid
            AND pg_namespace.nspname='public'
            AND pg_class.relkind = 'r'
        ORDER BY relname
        """)
    for table in (row[0] for row in cur.fetchall()):
        cur.execute(
                "SELECT TRUE FROM public.%s LIMIT 1" % quote_identifier(table)
                )
        if cur.fetchone() is None:
            print table
Esempio n. 14
0
def main():
    parser = OptionParser()
    logger_options(parser)
    db_options(parser)

    options, args = parser.parse_args()

    if len(args) > 0:
        parser.error("Too many arguments.")

    log = logger(options)

    log.debug("Connecting")
    con = connect()
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()

    log.debug("Disabling autovacuum on all tables in the database.")
    cur.execute("""
        SELECT nspname,relname
        FROM pg_namespace, pg_class
        WHERE relnamespace = pg_namespace.oid
            AND relkind = 'r' AND nspname <> 'pg_catalog'
        """)
    for namespace, table in list(cur.fetchall()):
        cur.execute("""
            ALTER TABLE ONLY "%s"."%s" SET (
                autovacuum_enabled=false,
                toast.autovacuum_enabled=false)
            """ % (namespace, table))

    log.debug("Killing existing autovacuum processes")
    num_autovacuums = -1
    while num_autovacuums != 0:
        # Sleep long enough for pg_stat_activity to be updated.
        time.sleep(0.6)
        cur.execute("""
            SELECT %(pid)s FROM pg_stat_activity
            WHERE
                datname=current_database()
                AND %(query)s LIKE 'autovacuum: %%'
            """ % activity_cols(cur))
        autovacuums = [row[0] for row in cur.fetchall()]
        num_autovacuums = len(autovacuums)
        for pid in autovacuums:
            log.debug("Cancelling %d" % pid)
            cur.execute("SELECT pg_cancel_backend(%d)" % pid)
Esempio n. 15
0
    def setUp(self):
        super(TestLibrarianGarbageCollection, self).setUp()
        self.client = LibrarianClient()
        self.patch(librariangc, 'log', BufferLogger())

        # A value we use in a number of tests. This represents the
        # stay of execution hard coded into the garbage collector.
        # We don't destroy any data unless it has been waiting to be
        # destroyed for longer than this period. We pick a value
        # that is close enough to the stay of execution so that
        # forgetting timezone information will break things, but
        # far enough so that how long it takes the test to run
        # is not an issue. 'stay_of_excution - 1 hour' fits these
        # criteria.
        self.recent_past = utc_now() - timedelta(days=6, hours=23)
        # A time beyond the stay of execution.
        self.ancient_past = utc_now() - timedelta(days=30)

        self.f1_id, self.f2_id = self._makeDupes()

        switch_dbuser(config.librarian_gc.dbuser)
        self.ztm = self.layer.txn

        # Make sure the files exist. We do this in setup, because we
        # need to use the get_file_path method later in the setup and we
        # want to be sure it is working correctly.
        path = librariangc.get_file_path(self.f1_id)
        self.failUnless(os.path.exists(path), "Librarian uploads failed")

        # Make sure that every file the database knows about exists on disk.
        # We manually remove them for tests that need to cope with missing
        # library items.
        self.ztm.begin()
        cur = cursor()
        cur.execute("SELECT id FROM LibraryFileContent")
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            if not os.path.exists(path):
                if not os.path.exists(os.path.dirname(path)):
                    os.makedirs(os.path.dirname(path))
                open(path, 'w').write('whatever')
        self.ztm.abort()

        self.con = connect(
            user=config.librarian_gc.dbuser,
            isolation=ISOLATION_LEVEL_AUTOCOMMIT)
Esempio n. 16
0
def main(con=None):
    if con is None:
        con = connect()

    patches = get_patchlist(con)

    log.info("Applying patches.")
    apply_patches_normal(con)

    report_patch_times(con, patches)

    # Commit changes
    if options.commit:
        log.debug("Committing changes")
        con.commit()

    return 0
Esempio n. 17
0
def main(con=None):
    if con is None:
        con = connect()

    patches = get_patchlist(con)

    log.info("Applying patches.")
    apply_patches_normal(con)

    report_patch_times(con, patches)

    # Commit changes
    if options.commit:
        log.debug("Committing changes")
        con.commit()

    return 0
Esempio n. 18
0
def preamble(con=None):
    """Return the preable needed at the start of all slonik scripts."""

    if con is None:
        con = connect(user='******')

    master_node = get_master_node(con)
    nodes = get_all_cluster_nodes(con)
    if master_node is None and len(nodes) == 1:
        master_node = nodes[0]

    preamble = [
        dedent("""\
        #
        # Every slonik script must start with a clustername, which cannot
        # be changed once the cluster is initialized.
        #
        cluster name = sl;

        # Symbolic ids for replication sets.
        define lpmain_set   %d;
        define holding_set  %d;
        define sso_set      %d;
        define lpmirror_set %d;
        """ % (LPMAIN_SET_ID, HOLDING_SET_ID, SSO_SET_ID, LPMIRROR_SET_ID))
    ]

    if master_node is not None:
        preamble.append(
            dedent("""\
        # Symbolic id for the main replication set master node.
        define master_node %d;
        define master_node_conninfo '%s';
        """ % (master_node.node_id, master_node.connection_string)))

    for node in nodes:
        preamble.append(
            dedent("""\
            define %s %d;
            define %s_conninfo '%s';
            node @%s admin conninfo = @%s_conninfo;
            """ % (node.nickname, node.node_id, node.nickname,
                   node.connection_string, node.nickname, node.nickname)))

    return '\n\n'.join(preamble)
Esempio n. 19
0
def preamble(con=None):
    """Return the preable needed at the start of all slonik scripts."""

    if con is None:
        con = connect(user='******')

    master_node = get_master_node(con)
    nodes = get_all_cluster_nodes(con)
    if master_node is None and len(nodes) == 1:
        master_node = nodes[0]

    preamble = [dedent("""\
        #
        # Every slonik script must start with a clustername, which cannot
        # be changed once the cluster is initialized.
        #
        cluster name = sl;

        # Symbolic ids for replication sets.
        define lpmain_set   %d;
        define holding_set  %d;
        define sso_set      %d;
        define lpmirror_set %d;
        """ % (LPMAIN_SET_ID, HOLDING_SET_ID, SSO_SET_ID, LPMIRROR_SET_ID))]

    if master_node is not None:
        preamble.append(dedent("""\
        # Symbolic id for the main replication set master node.
        define master_node %d;
        define master_node_conninfo '%s';
        """ % (master_node.node_id, master_node.connection_string)))

    for node in nodes:
        preamble.append(dedent("""\
            define %s %d;
            define %s_conninfo '%s';
            node @%s admin conninfo = @%s_conninfo;
            """ % (
                node.nickname, node.node_id,
                node.nickname, node.connection_string,
                node.nickname, node.nickname)))

    return '\n\n'.join(preamble)
Esempio n. 20
0
def main():
    parser = OptionParser()
    parser.add_option(
        "-0",
        "--null",
        dest="null",
        action="store_true",
        default=False,
        help="Set all full text index column values to NULL.",
    )
    parser.add_option(
        "-l",
        "--live-rebuild",
        dest="liverebuild",
        action="store_true",
        default=False,
        help="Rebuild all the indexes against a live database.",
    )
    db_options(parser)
    logger_options(parser)

    global options, args
    (options, args) = parser.parse_args()

    if options.null + options.liverebuild > 1:
        parser.error("Incompatible options")

    global log
    log = logger(options)

    con = connect()

    if options.liverebuild:
        con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
        liverebuild(con)
    elif options.null:
        con.set_isolation_level(ISOLATION_LEVEL_READ_COMMITTED)
        nullify(con)
    else:
        parser.error("Required argument not specified")

    con.commit()
    return 0
Esempio n. 21
0
def main():
    parser = OptionParser()
    parser.add_option(
            "-0", "--null", dest="null",
            action="store_true", default=False,
            help="Set all full text index column values to NULL.",
            )
    parser.add_option(
            "-l", "--live-rebuild", dest="liverebuild",
            action="store_true", default=False,
            help="Rebuild all the indexes against a live database.",
            )
    db_options(parser)
    logger_options(parser)

    global options, args
    (options, args) = parser.parse_args()

    if options.null + options.liverebuild > 1:
        parser.error("Incompatible options")

    global log
    log = logger(options)

    con = connect()

    if options.liverebuild:
        con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
        liverebuild(con)
    elif options.null:
        con.set_isolation_level(ISOLATION_LEVEL_READ_COMMITTED)
        nullify(con)
    else:
        parser.error("Required argument not specified")

    con.commit()
    return 0
Esempio n. 22
0
def main():
    # XXX: Tom Haddon 2007-07-12
    # There's a lot of untested stuff here: parsing options and sending
    # emails - this should be moved into a testable location.
    # Also duplicated code in scripts/script-monitor-nagios.py
    parser = OptionParser(
        '%prog [options] (minutes) (host:scriptname) [host:scriptname]')
    db_options(parser)
    logger_options(parser)

    (options, args) = parser.parse_args()

    if len(args) < 2:
        parser.error("Must specify at time in minutes and "
                     "at least one host and script")

    # First argument is the number of minutes into the past
    # we want to look for the scripts on the specified hosts
    try:
        minutes_ago, args = int(args[0]), args[1:]
        start_date = datetime.now() - timedelta(minutes=minutes_ago)

        completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple())
        completed_to = strftime("%Y-%m-%d %H:%M:%S",
                                datetime.now().timetuple())

        hosts_scripts = []
        for arg in args:
            try:
                hostname, scriptname = arg.split(':')
            except TypeError:
                parser.error("%r is not in the format 'host:scriptname'" %
                             (arg, ))
            hosts_scripts.append((hostname, scriptname))
    except ValueError:
        parser.error("Must specify time in minutes and "
                     "at least one host and script")

    log = logger(options)

    try:
        log.debug("Connecting to database")
        con = connect()
        error_found = False
        msg, subj = [], []
        for hostname, scriptname in hosts_scripts:
            failure_msg = check_script(con, log, hostname, scriptname,
                                       completed_from, completed_to)
            if failure_msg is not None:
                msg.append(failure_msg)
                subj.append("%s:%s" % (hostname, scriptname))
                error_found = 2
        if error_found:
            # Construct our email.
            msg = MIMEText('\n'.join(msg))
            msg['Subject'] = "Scripts failed to run: %s" % ", ".join(subj)
            msg['From'] = '*****@*****.**'
            msg['Reply-To'] = '*****@*****.**'
            msg['To'] = '*****@*****.**'

            # Send out the email.
            smtp = smtplib.SMTP()
            smtp.connect()
            smtp.sendmail('*****@*****.**',
                          ['*****@*****.**'], msg.as_string())
            smtp.close()
            return 2
    except:
        log.exception("Unhandled exception")
        return 1
Esempio n. 23
0
def main():
    parser = OptionParser()

    db_options(parser)
    parser.add_option("-f",
                      "--from",
                      dest="from_date",
                      default=None,
                      metavar="DATE",
                      help="Only count new files since DATE (yyyy/mm/dd)")
    parser.add_option("-u",
                      "--until",
                      dest="until_date",
                      default=None,
                      metavar="DATE",
                      help="Only count new files until DATE (yyyy/mm/dd)")

    options, args = parser.parse_args()
    if len(args) > 0:
        parser.error("Too many command line arguments.")

    # Handle date filters. We use LibraryFileContent.datecreated rather
    # than LibraryFileAlias.datecreated as this report is about actual
    # disk space usage. A new row in the database linking to a
    # previously existing file in the Librarian takes up no new space.
    if options.from_date is not None:
        from_date = 'AND LFC.datecreated >= %s' % sqlvalues(options.from_date)
    else:
        from_date = ''
    if options.until_date is not None:
        until_date = 'AND LFC.datecreated <= %s' % sqlvalues(
            options.until_date)
    else:
        until_date = ''

    con = connect()
    cur = con.cursor()

    # Collect direct references to the LibraryFileAlias table.
    references = set(
        (from_table, from_column)
        # Note that listReferences is recursive, which we don't
        # care about in this simple report. We also ignore the
        # irrelevant constraint type update and delete flags.
        for from_table, from_column, to_table, to_column, update, delete in
        listReferences(cur, 'libraryfilealias', 'id')
        if to_table == 'libraryfilealias')

    totals = set()
    for referring_table, referring_column in sorted(references):
        if referring_table == 'libraryfiledownloadcount':
            continue
        quoted_referring_table = quoteIdentifier(referring_table)
        quoted_referring_column = quoteIdentifier(referring_column)
        cur.execute("""
            SELECT
                COALESCE(SUM(filesize), 0),
                pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)),
                COUNT(*)
            FROM (
                SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize
                FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s
                WHERE LFC.id = LFA.content
                    AND LFA.id = %s.%s
                    AND (
                        LFA.expires IS NULL
                        OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC')
                    %s %s
                ORDER BY LFC.id
                ) AS Whatever
            """ % (quoted_referring_table, quoted_referring_table,
                   quoted_referring_column, from_date, until_date))
        total_bytes, formatted_size, num_files = cur.fetchone()
        totals.add((total_bytes, referring_table, formatted_size, num_files))

    for total_bytes, tab_name, formatted_size, num_files in sorted(
            totals, reverse=True):
        print '%-10s %s in %d files' % (formatted_size, tab_name, num_files)

    return 0
def main():
    parser = LPOptionParser()
    db_options(parser)
    parser.add_option("-f",
                      "--from",
                      dest="from_ts",
                      type=datetime,
                      default=None,
                      metavar="TIMESTAMP",
                      help="Use statistics collected since TIMESTAMP.")
    parser.add_option("-u",
                      "--until",
                      dest="until_ts",
                      type=datetime,
                      default=None,
                      metavar="TIMESTAMP",
                      help="Use statistics collected up until TIMESTAMP.")
    parser.add_option(
        "-i",
        "--interval",
        dest="interval",
        type=str,
        default=None,
        metavar="INTERVAL",
        help=("Use statistics collected over the last INTERVAL period. "
              "INTERVAL is a string parsable by PostgreSQL "
              "such as '5 minutes'."))
    parser.add_option("-n",
                      "--limit",
                      dest="limit",
                      type=int,
                      default=15,
                      metavar="NUM",
                      help="Display the top NUM items in each category.")
    parser.add_option(
        "-b",
        "--bloat",
        dest="bloat",
        type=float,
        default=40,
        metavar="BLOAT",
        help="Display tables and indexes bloated by more than BLOAT%.")
    parser.add_option(
        "--min-bloat",
        dest="min_bloat",
        type=int,
        default=10000000,
        metavar="BLOAT",
        help="Don't report tables bloated less than BLOAT bytes.")
    parser.set_defaults(dbuser="******")
    options, args = parser.parse_args()

    if options.from_ts and options.until_ts and options.interval:
        parser.error(
            "Only two of --from, --until and --interval may be specified.")

    con = connect()
    cur = con.cursor()

    tables = list(get_table_stats(cur, options))
    if len(tables) == 0:
        parser.error("No statistics available in that time range.")
    arbitrary_table = tables[0]
    interval = arbitrary_table.date_end - arbitrary_table.date_start
    per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
    if per_second == 0:
        parser.error("Only one sample in that time range.")

    user_cpu = get_cpu_stats(cur, options)
    print "== Most Active Users =="
    print
    for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
        print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)

    print
    print "== Most Written Tables =="
    print
    tables_sort = [
        'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname'
    ]
    most_written_tables = sorted(tables,
                                 key=attrgetter(*tables_sort),
                                 reverse=True)
    for table in most_written_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (
            table.relname, table.total_tup_written / per_second)

    print
    print "== Most Read Tables =="
    print
    # These match the pg_user_table_stats view. schemaname is the
    # namespace (normally 'public'), relname is the table (relation)
    # name. total_tup_red is the total number of rows read.
    # idx_tup_fetch is the number of rows looked up using an index.
    tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname']
    most_read_tables = sorted(tables,
                              key=attrgetter(*tables_sort),
                              reverse=True)
    for table in most_read_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (table.relname,
                                             table.total_tup_read / per_second)

    table_bloat_stats = get_bloat_stats(cur, options, 'r')

    if not table_bloat_stats:
        print
        print "(There is no bloat information available in this time range.)"

    else:
        print
        print "== Most Bloated Tables =="
        print
        for bloated_table in table_bloat_stats[:options.limit]:
            print "%40s || %2d%% || %s of %s" % (
                bloated_table.name, bloated_table.end_bloat_percent,
                bloated_table.bloat_size, bloated_table.table_size)

        index_bloat_stats = get_bloat_stats(cur, options, 'i')

        print
        print "== Most Bloated Indexes =="
        print
        for bloated_index in index_bloat_stats[:options.limit]:
            print "%65s || %2d%% || %s of %s" % (
                bloated_index.sub_name, bloated_index.end_bloat_percent,
                bloated_index.bloat_size, bloated_index.table_size)

        # Order bloat delta report by size of bloat increase.
        # We might want to change this to percentage bloat increase.
        bloating_sort_key = lambda x: x.delta_bloat_len

        table_bloating_stats = sorted(table_bloat_stats,
                                      key=bloating_sort_key,
                                      reverse=True)

        if table_bloating_stats[0].num_samples <= 1:
            print
            print fill(
                dedent("""\
                (There are not enough samples in this time range to display
                bloat change statistics)
                """))
        else:
            print
            print "== Most Bloating Tables =="
            print

            for bloated_table in table_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_table.delta_bloat_percent > 0:
                    print "%40s || +%4.2f%% || +%s" % (
                        bloated_table.name, bloated_table.delta_bloat_percent,
                        bloated_table.delta_bloat_size)

            index_bloating_stats = sorted(index_bloat_stats,
                                          key=bloating_sort_key,
                                          reverse=True)

            print
            print "== Most Bloating Indexes =="
            print
            for bloated_index in index_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_index.delta_bloat_percent > 0:
                    print "%65s || +%4.2f%% || +%s" % (
                        bloated_index.sub_name,
                        bloated_index.delta_bloat_percent,
                        bloated_index.delta_bloat_size)
The sampledata does not update the current values of all the sequences
used to populate the primary keys (this was removed to aid in merging changes
to the sampledata).

This script resets all of these sequences to the correct value based on the
maximum value currently found in the corresponding table.
"""

__metaclass__ = type

import _pythonpath

from optparse import OptionParser

from lp.services.database.postgresql import resetSequences
from lp.services.database.sqlbase import connect
from lp.services.scripts import db_options


if __name__ == '__main__':
    parser = OptionParser()
    db_options(parser)
    (options, args) = parser.parse_args()
    if args:
        parser.error("Too many options given")
    if not options.dbname:
        parser.error("Required option --dbname not given")
    con = connect()
    resetSequences(con.cursor())
    con.commit()
Esempio n. 26
0
def main():
    parser = OptionParser()

    db_options(parser)
    parser.add_option(
        "-f", "--from", dest="from_date", default=None,
        metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)")
    parser.add_option(
        "-u", "--until", dest="until_date", default=None,
        metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)")

    options, args = parser.parse_args()
    if len(args) > 0:
        parser.error("Too many command line arguments.")

    # Handle date filters. We use LibraryFileContent.datecreated rather
    # than LibraryFileAlias.datecreated as this report is about actual
    # disk space usage. A new row in the database linking to a
    # previously existing file in the Librarian takes up no new space.
    if options.from_date is not None:
        from_date = 'AND LFC.datecreated >= %s' % sqlvalues(
            options.from_date)
    else:
        from_date = ''
    if options.until_date is not None:
        until_date = 'AND LFC.datecreated <= %s' % sqlvalues(
            options.until_date)
    else:
        until_date = ''

    con = connect()
    cur = con.cursor()

    # Collect direct references to the LibraryFileAlias table.
    references = set(
        (from_table, from_column)
        # Note that listReferences is recursive, which we don't
        # care about in this simple report. We also ignore the
        # irrelevant constraint type update and delete flags.
        for from_table, from_column, to_table, to_column, update, delete
            in listReferences(cur, 'libraryfilealias', 'id')
        if to_table == 'libraryfilealias'
        )

    totals = set()
    for referring_table, referring_column in sorted(references):
        if referring_table == 'libraryfiledownloadcount':
            continue
        quoted_referring_table = quoteIdentifier(referring_table)
        quoted_referring_column = quoteIdentifier(referring_column)
        cur.execute("""
            SELECT
                COALESCE(SUM(filesize), 0),
                pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)),
                COUNT(*)
            FROM (
                SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize
                FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s
                WHERE LFC.id = LFA.content
                    AND LFA.id = %s.%s
                    AND (
                        LFA.expires IS NULL
                        OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC')
                    %s %s
                ORDER BY LFC.id
                ) AS Whatever
            """ % (
                quoted_referring_table, quoted_referring_table,
                quoted_referring_column, from_date, until_date))
        total_bytes, formatted_size, num_files = cur.fetchone()
        totals.add((total_bytes, referring_table, formatted_size, num_files))

    for total_bytes, tab_name, formatted_size, num_files in sorted(
        totals, reverse=True):
        print '%-10s %s in %d files' % (formatted_size, tab_name, num_files)

    return 0
Esempio n. 27
0
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Generate a preamble for slonik(1) scripts based on the current LPCONFIG.
"""

__metaclass__ = type
__all__ = []

import _pythonpath

from optparse import OptionParser
import time

from lp.services import scripts
from lp.services.config import config
from lp.services.database.sqlbase import connect
import replication.helpers

if __name__ == '__main__':
    parser = OptionParser()
    scripts.db_options(parser)
    (options, args) = parser.parse_args()
    if args:
        parser.error("Too many arguments")
    scripts.execute_zcml_for_scripts(use_web_security=False)

    con = connect()
    print '# slonik(1) preamble generated %s' % time.ctime()
    print '# LPCONFIG=%s' % config.instance_name
    print
    print replication.helpers.preamble(con)
Esempio n. 28
0
def main():
    # XXX: Tom Haddon 2007-07-12
    # There's a lot of untested stuff here: parsing options and sending
    # emails - this should be moved into a testable location.
    # Also duplicated code in scripts/script-monitor-nagios.py
    parser = OptionParser(
            '%prog [options] (minutes) (host:scriptname) [host:scriptname]'
            )
    db_options(parser)
    logger_options(parser)

    (options, args) = parser.parse_args()

    if len(args) < 2:
        parser.error("Must specify at time in minutes and "
            "at least one host and script")

    # First argument is the number of minutes into the past
    # we want to look for the scripts on the specified hosts
    try:
        minutes_ago, args = int(args[0]), args[1:]
        start_date = datetime.now() - timedelta(minutes=minutes_ago)

        completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple())
        completed_to = strftime(
            "%Y-%m-%d %H:%M:%S", datetime.now().timetuple())

        hosts_scripts = []
        for arg in args:
            try:
                hostname, scriptname = arg.split(':')
            except TypeError:
                parser.error(
                    "%r is not in the format 'host:scriptname'" % (arg,))
            hosts_scripts.append((hostname, scriptname))
    except ValueError:
        parser.error("Must specify time in minutes and "
            "at least one host and script")

    log = logger(options)

    try:
        log.debug("Connecting to database")
        con = connect()
        error_found = False
        msg, subj = [], []
        for hostname, scriptname in hosts_scripts:
            failure_msg = check_script(con, log, hostname,
                scriptname, completed_from, completed_to)
            if failure_msg is not None:
                msg.append(failure_msg)
                subj.append("%s:%s" % (hostname, scriptname))
                error_found = 2
        if error_found:
            # Construct our email.
            msg = MIMEText('\n'.join(msg))
            msg['Subject'] = "Scripts failed to run: %s" % ", ".join(subj)
            msg['From'] = '*****@*****.**'
            msg['Reply-To'] = '*****@*****.**'
            msg['To'] = '*****@*****.**'

            # Send out the email.
            smtp = smtplib.SMTP()
            smtp.connect()
            smtp.sendmail(
                '*****@*****.**',
                ['*****@*****.**'], msg.as_string())
            smtp.close()
            return 2
    except:
        log.exception("Unhandled exception")
        return 1
Esempio n. 29
0
    def setUp(self):
        super(TestBlobCollection, self).setUp()
        # Add in some sample data
        cur = cursor()

        # First a blob that has been unclaimed and expired.
        cur.execute("""
            INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256)
            VALUES (666, 'whatever', 'whatever', 'whatever')
            """)
        cur.execute("""SELECT currval('libraryfilecontent_id_seq')""")
        self.expired_lfc_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO LibraryFileAlias (
                content, filename, mimetype, expires)
            VALUES (
                %s, 'whatever', 'whatever',
                CURRENT_TIMESTAMP - '1 day'::interval
                )
            """, (self.expired_lfc_id,))
        cur.execute("""SELECT currval('libraryfilealias_id_seq')""")
        self.expired_lfa_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO TemporaryBlobStorage (uuid, file_alias)
            VALUES ('uuid', %s)
            """, (self.expired_lfa_id,))
        cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""")
        self.expired_blob_id = cur.fetchone()[0]

        # Add ApportJob and Job entries - these need to be removed
        # too.
        cur.execute("""
            INSERT INTO Job (status, date_finished)
            VALUES (0, CURRENT_TIMESTAMP - interval '2 days') RETURNING id
            """)
        self.expired_job_id = cur.fetchone()[0]
        cur.execute("""
            INSERT INTO ApportJob (job, blob, job_type)
            VALUES (%s, %s, 0) RETURNING id
            """, (self.expired_job_id, self.expired_blob_id))
        self.expired_apportjob_id = cur.fetchone()[0]

        # Next a blob that has expired, but claimed and now linked to
        # elsewhere in the database
        cur.execute("""
            INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256)
            VALUES (666, 'whatever', 'whatever', 'whatever')
            """)
        cur.execute("""SELECT currval('libraryfilecontent_id_seq')""")
        self.expired2_lfc_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO LibraryFileAlias (
                content, filename, mimetype, expires)
            VALUES (
                %s, 'whatever', 'whatever',
                CURRENT_TIMESTAMP - '1 day'::interval
                )
            """, (self.expired2_lfc_id,))
        cur.execute("""SELECT currval('libraryfilealias_id_seq')""")
        self.expired2_lfa_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO TemporaryBlobStorage (uuid, file_alias)
            VALUES ('uuid2', %s)
            """, (self.expired2_lfa_id,))
        cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""")
        self.expired2_blob_id = cur.fetchone()[0]

        # Link it somewhere else, unexpired
        cur.execute("""
            INSERT INTO LibraryFileAlias (content, filename, mimetype)
            VALUES (%s, 'whatever', 'whatever')
            """, (self.expired2_lfc_id,))
        cur.execute("""
            UPDATE Person SET mugshot=currval('libraryfilealias_id_seq')
            WHERE name='stub'
            """)

        # And a non expired blob
        cur.execute("""
            INSERT INTO LibraryFileContent (filesize, sha1, md5, sha256)
            VALUES (666, 'whatever', 'whatever', 'whatever')
            """)
        cur.execute("""SELECT currval('libraryfilecontent_id_seq')""")
        self.unexpired_lfc_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO LibraryFileAlias (
                content, filename, mimetype, expires)
            VALUES (
                %s, 'whatever', 'whatever',
                CURRENT_TIMESTAMP + '1 day'::interval
                )
            """, (self.unexpired_lfc_id,))
        cur.execute("""SELECT currval('libraryfilealias_id_seq')""")
        self.unexpired_lfa_id = cur.fetchone()[0]

        cur.execute("""
            INSERT INTO TemporaryBlobStorage (uuid, file_alias)
            VALUES ('uuid3', %s)
            """, (self.unexpired_lfa_id,))
        cur.execute("""SELECT currval('temporaryblobstorage_id_seq')""")
        self.unexpired_blob_id = cur.fetchone()[0]
        self.layer.txn.commit()

        # Make sure all the librarian files actually exist on disk
        cur = cursor()
        cur.execute("SELECT id FROM LibraryFileContent")
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            if not os.path.exists(path):
                if not os.path.exists(os.path.dirname(path)):
                    os.makedirs(os.path.dirname(path))
                open(path, 'w').write('whatever')
        self.layer.txn.abort()

        switch_dbuser(config.librarian_gc.dbuser)

        # Open a connection for our test
        self.con = connect(
            user=config.librarian_gc.dbuser,
            isolation=ISOLATION_LEVEL_AUTOCOMMIT)

        self.patch(librariangc, 'log', BufferLogger())
def main():
    parser = LPOptionParser()
    db_options(parser)
    parser.add_option(
        "-f", "--from", dest="from_ts", type=datetime,
        default=None, metavar="TIMESTAMP",
        help="Use statistics collected since TIMESTAMP.")
    parser.add_option(
        "-u", "--until", dest="until_ts", type=datetime,
        default=None, metavar="TIMESTAMP",
        help="Use statistics collected up until TIMESTAMP.")
    parser.add_option(
        "-i", "--interval", dest="interval", type=str,
        default=None, metavar="INTERVAL",
        help=(
            "Use statistics collected over the last INTERVAL period. "
            "INTERVAL is a string parsable by PostgreSQL "
            "such as '5 minutes'."))
    parser.add_option(
        "-n", "--limit", dest="limit", type=int,
        default=15, metavar="NUM",
        help="Display the top NUM items in each category.")
    parser.add_option(
        "-b", "--bloat", dest="bloat", type=float,
        default=40, metavar="BLOAT",
        help="Display tables and indexes bloated by more than BLOAT%.")
    parser.add_option(
        "--min-bloat", dest="min_bloat", type=int,
        default=10000000, metavar="BLOAT",
        help="Don't report tables bloated less than BLOAT bytes.")
    parser.set_defaults(dbuser="******")
    options, args = parser.parse_args()

    if options.from_ts and options.until_ts and options.interval:
        parser.error(
            "Only two of --from, --until and --interval may be specified.")

    con = connect()
    cur = con.cursor()

    tables = list(get_table_stats(cur, options))
    if len(tables) == 0:
        parser.error("No statistics available in that time range.")
    arbitrary_table = tables[0]
    interval = arbitrary_table.date_end - arbitrary_table.date_start
    per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
    if per_second == 0:
        parser.error("Only one sample in that time range.")

    user_cpu = get_cpu_stats(cur, options)
    print "== Most Active Users =="
    print
    for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
        print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)

    print
    print "== Most Written Tables =="
    print
    tables_sort = [
        'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
    most_written_tables = sorted(
        tables, key=attrgetter(*tables_sort), reverse=True)
    for table in most_written_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (
            table.relname, table.total_tup_written / per_second)

    print
    print "== Most Read Tables =="
    print
    # These match the pg_user_table_stats view. schemaname is the
    # namespace (normally 'public'), relname is the table (relation)
    # name. total_tup_red is the total number of rows read.
    # idx_tup_fetch is the number of rows looked up using an index.
    tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname']
    most_read_tables = sorted(
        tables, key=attrgetter(*tables_sort), reverse=True)
    for table in most_read_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (
            table.relname, table.total_tup_read / per_second)

    table_bloat_stats = get_bloat_stats(cur, options, 'r')

    if not table_bloat_stats:
        print
        print "(There is no bloat information available in this time range.)"

    else:
        print
        print "== Most Bloated Tables =="
        print
        for bloated_table in table_bloat_stats[:options.limit]:
            print "%40s || %2d%% || %s of %s" % (
                bloated_table.name,
                bloated_table.end_bloat_percent,
                bloated_table.bloat_size,
                bloated_table.table_size)

        index_bloat_stats = get_bloat_stats(cur, options, 'i')

        print
        print "== Most Bloated Indexes =="
        print
        for bloated_index in index_bloat_stats[:options.limit]:
            print "%65s || %2d%% || %s of %s" % (
                bloated_index.sub_name,
                bloated_index.end_bloat_percent,
                bloated_index.bloat_size,
                bloated_index.table_size)

        # Order bloat delta report by size of bloat increase.
        # We might want to change this to percentage bloat increase.
        bloating_sort_key = lambda x: x.delta_bloat_len

        table_bloating_stats = sorted(
            table_bloat_stats, key=bloating_sort_key, reverse=True)

        if table_bloating_stats[0].num_samples <= 1:
            print
            print fill(dedent("""\
                (There are not enough samples in this time range to display
                bloat change statistics)
                """))
        else:
            print
            print "== Most Bloating Tables =="
            print

            for bloated_table in table_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_table.delta_bloat_percent > 0:
                    print "%40s || +%4.2f%% || +%s" % (
                        bloated_table.name,
                        bloated_table.delta_bloat_percent,
                        bloated_table.delta_bloat_size)

            index_bloating_stats = sorted(
                index_bloat_stats, key=bloating_sort_key, reverse=True)

            print
            print "== Most Bloating Indexes =="
            print
            for bloated_index in index_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_index.delta_bloat_percent > 0:
                    print "%65s || +%4.2f%% || +%s" % (
                        bloated_index.sub_name,
                        bloated_index.delta_bloat_percent,
                        bloated_index.delta_bloat_size)
def main():
    # XXX: Tom Haddon 2007-07-12
    # There's a lot of untested stuff here: parsing options -
    # this should be moved into a testable location.
    # Also duplicated code in scripts/script-monitor.py
    parser = OptionParser(
            '%prog [options] (minutes) (host:scriptname) [host:scriptname]'
            )
    db_options(parser)
    logger_options(parser)

    (options, args) = parser.parse_args()

    if len(args) < 2:
        print "Must specify time in minutes and " \
            "at least one host and script"
        return 3

    # First argument is the number of minutes into the past
    # we want to look for the scripts on the specified hosts
    try:
        minutes_ago, args = int(args[0]), args[1:]
        start_date = datetime.now() - timedelta(minutes=minutes_ago)

        completed_from = strftime("%Y-%m-%d %H:%M:%S", start_date.timetuple())
        completed_to = strftime(
            "%Y-%m-%d %H:%M:%S", datetime.now().timetuple())

        hosts_scripts = []
        for arg in args:
            try:
                hostname, scriptname = arg.split(':')
            except TypeError:
                print "%r is not in the format 'host:scriptname'" % arg
                return 3
            hosts_scripts.append((hostname, scriptname))
    except ValueError:
        print "Must specify time in minutes and " \
            "at least one host and script"
        return 3

    log = logger(options)

    try:
        log.debug("Connecting to database")
        con = connect()
        error_found = False
        msg = []
        for hostname, scriptname in hosts_scripts:
            failure_msg = check_script(con, log, hostname,
                scriptname, completed_from, completed_to)
            if failure_msg is not None:
                msg.append("%s:%s" % (hostname, scriptname))
                error_found = True
        if error_found:
            # Construct our return message
            print "Scripts failed to run: %s" % ', '.join(msg)
            return 2
        else:
            # Construct our return message
            print "All scripts ran as expected"
            return 0
    except Exception as e:
        # Squeeze the exception type and stringification of the exception
        # value on to one line.
        print "Unhandled exception: %s %r" % (e.__class__.__name__, str(e))
        return 3