Exemple #1
0
def check_schema(zk_prefix, tablename, tbl_hash):
    """Verify that a table across an entire tier has the expected schema

    Args:
    zk_prefix - The prefix of the key in the DS KZ node
    table - the name of the table to verify
    tbl_hash - the md5sum of the desired CREATE TABLE for the table

    Returns:
    A dictionary with keys that are the hash of the CREATE TABLE statement
    and the values are sets of hostname:port followed by a space and then the
    db one which the incorrect schema was found.
    """
    incorrect = dict()
    zk = host_utils.MysqlZookeeper()
    config = zk.get_ds_mysql_config()
    for db in config.iteritems():
        if db[0].startswith(zk_prefix):
            master = host_utils.HostAddr(''.join((db[1]['master']['host'],
                                                  ':',
                                                  str(db[1]['master']['port']))))
            slave = host_utils.HostAddr(''.join((db[1]['slave']['host'],
                                                 ':',
                                                 str(db[1]['slave']['port']))))
            master_hashes = check_instance_table(master, tablename, tbl_hash)
            slave_hashes = check_instance_table(slave, tablename, tbl_hash)
            for entry in master_hashes.iteritems():
                if entry[0] not in incorrect:
                    incorrect[entry[0]] = set()
                incorrect[entry[0]] = incorrect[entry[0]].union(entry[1])
            for entry in slave_hashes.iteritems():
                if entry[0] not in incorrect:
                    incorrect[entry[0]] = set()
                incorrect[entry[0]] = incorrect[entry[0]].union(entry[1])
    return incorrect
def main():
    description = 'Utility to download and restore MySQL xbstream backups'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-b',
                        '--backup_type',
                        help='Type of backup to restore. Default: xtrabackup',
                        default=backup.BACKUP_TYPE_XBSTREAM,
                        choices=(backup.BACKUP_TYPE_LOGICAL,
                                 backup.BACKUP_TYPE_XBSTREAM))
    parser.add_argument('-s',
                        '--source_instance',
                        help=('Which instances backups to restore. Default is '
                              'a best guess based on the hostname.'),
                        default=None)
    parser.add_argument('-d',
                        '--date',
                        help='attempt to restore from a specific date')
    parser.add_argument('-p',
                        '--destination_port',
                        help='Port on localhost on to restore. Default 3306.',
                        default='3306')
    parser.add_argument('--no_repl',
                        help='Setup replication but do not run START SLAVE',
                        default='REQ',
                        action='store_const',
                        const='SKIP')
    parser.add_argument('--add_to_zk',
                        help=('By default the instance will not be added to '
                              'zk. This option will attempt to add the '
                              'instance to zk.'),
                        default='SKIP',
                        action='store_const',
                        const='REQ')
    parser.add_argument('--skip_production_check',
                        help=('DANGEROUS! Skip check of whether the instance '
                              'to be built is already in use'),
                        default=False,
                        action='store_true')

    args = parser.parse_args()
    if args.source_instance:
        source = host_utils.HostAddr(args.source_instance)
    else:
        source = None

    destination = host_utils.HostAddr(':'.join((host_utils.HOSTNAME,
                                               args.destination_port)))

    restore_instance(backup_type=args.backup_type,
                     restore_source=source,
                     destination=destination,
                     no_repl=args.no_repl,
                     date=args.date,
                     add_to_zk=args.add_to_zk,
                     skip_production_check=args.skip_production_check)
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description='MySQL replication checker')
    parser.add_argument('replica',
                        help='Replica MySQL instance to sanity check '
                        'hostname[:port]')
    parser.add_argument('-w',
                        '--watch_for_catch_up',
                        help='Watch replication for catch up ',
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    slave_hostaddr = host_utils.HostAddr(args.replica)

    if args.watch_for_catch_up:
        mysql_lib.wait_for_catch_up(slave_hostaddr)
    else:
        ret = mysql_lib.calc_slave_lag(slave_hostaddr)
        print "Heartbeat_seconds_behind: {sbm}".format(sbm=ret['sbm'])
        print "Slave_IO_Running: {Slave_IO_Running} ".format(
            Slave_IO_Running=ret['ss']['Slave_IO_Running'])
        print "IO_lag_bytes: {io_bytes}".format(io_bytes=ret['io_bytes'])
        print "IO_lag_binlogs: {io_binlogs}".format(
            io_binlogs=ret['io_binlogs'])
        print "Slave_SQL_Running: {Slave_IO_Running} ".format(
            Slave_IO_Running=ret['ss']['Slave_SQL_Running'])
        print "SQL_lag_bytes: {sql_bytes}".format(sql_bytes=ret['sql_bytes'])
        print "SQL_lag_binlogs: {sql_binlogs}".format(
            sql_binlogs=ret['sql_binlogs'])
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--db',
                        default=None,
                        help='DB to export, default is all databases.')
    parser.add_argument('--force_table',
                        default=None,
                        help='Table to export, default is all tables.')
    parser.add_argument('--force_reupload',
                        default=False,
                        action='store_true',
                        help='Ignore existing uploads, reupload everyting')
    parser.add_argument('--loglevel',
                        default='INFO',
                        help='Change logging verbosity',
                        choices=set(['INFO', 'DEBUG']))
    parser.add_argument('--dev_bucket',
                        default=False,
                        action='store_true',
                        help='Use the dev bucket, useful for testing')
    args = parser.parse_args()
    logging.basicConfig(level=getattr(logging, args.loglevel.upper(), None))

    # Nope, don't even start.
    if os.path.isfile(backup.CSV_BACKUP_SKIP_FILE):
        log.info('Found {}. Skipping CSV backup '
                 'run.'.format(backup.CSV_BACKUP_SKIP_FILE))
        return

    # If we ever want to run multi instance, this wil need to be updated
    backup_obj = mysql_backup_csv(host_utils.HostAddr(host_utils.HOSTNAME),
                                  args.db, args.force_table,
                                  args.force_reupload, args.dev_bucket)
    backup_obj.backup_instance()
Exemple #5
0
def collectTableStats(db):
    """ Collect table stats

    Args:
    db - a db object
    """
    # First we are going to pull stats aggregated by schema
    # and namespace, if applicable
    global collection_time, last_collection_time
    instance = host_utils.HostAddr(':'.join((socket.gethostname(),
                                             db.port)))
    namespace_dbs_map = dict()
    non_namespace_dbs = set()
    for schema in mysql_lib.get_dbs(instance):
        namespace = get_namespace_from_schema(schema)
        if namespace:
            if namespace not in namespace_dbs_map:
                namespace_dbs_map[namespace] = set()
            namespace_dbs_map[namespace].add(schema)
        else:
            non_namespace_dbs.add(schema)
    for namespace in namespace_dbs_map:
        for row in get_tablestats(db, namespace_dbs_map[namespace]):
            printmetrics_tablestat(db, row, namespace)
    if non_namespace_dbs:
        for row in get_tablestats(db, non_namespace_dbs):
            printmetrics_tablestat(db, row)
    # next we want table stats aggregated by table and namespace.
    for namespace in namespace_dbs_map:
        for row in get_schemastats(db, namespace_dbs_map[namespace]):
            printmetrics_schemastats(db, row, namespace)
    if non_namespace_dbs:
        for row in get_schemastats(db, non_namespace_dbs):
            printmetrics_schemastats(db, row)
    db.query("FLUSH NO_WRITE_TO_BINLOG TABLE_STATISTICS")
def determine_replacement_role(conn, instance_id):
    """ Try to determine the role an instance should be placed into

    Args:
    conn - A connection to the reporting server
    instance - The replacement instance

    Returns:
    The replication role which should be either 'slave' or 'dr_slave'
    """
    zk = host_utils.MysqlZookeeper()
    cursor = conn.cursor()
    sql = ("SELECT old_host "
           "FROM mysqlops.host_replacement_log "
           "WHERE new_instance = %(new_instance)s ")
    params = {'new_instance': instance_id}
    cursor.execute(sql, params)
    log.info(cursor._executed)
    result = cursor.fetchone()
    if result is None:
        raise Exception('Could not determine replacement host')

    old_host = host_utils.HostAddr(result['old_host'])
    log.info('Host to be replaced is {old_host}'
             ''.format(old_host=old_host.hostname))

    (_, repl_type) = zk.get_replica_set_from_instance(old_host)

    if repl_type == host_utils.REPLICA_ROLE_MASTER:
        raise Exception('Corwardly refusing to replace a master!')
    elif repl_type is None:
        raise Exception('Could not determine replacement role')
    else:
        return repl_type
def main():
    description = ("MySQL orpahned shard detector\n\n"
                   "This utility will attempt to find orphaned databases "
                   "across sharddb and modsharddb")

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Check a single instance rather than all',
                        default=False)
    args = parser.parse_args()

    if args.instance:
        instance = host_utils.HostAddr(args.instance)
    else:
        instance = False

    orphaned, orphaned_but_used, missing = find_shard_mismatches(instance)

    for orphan in orphaned:
        print 'Orphan dbs {host} {dbs}'.format(host=orphan,
                                               dbs=','.join(orphaned[orphan]))
    for orphan in orphaned_but_used:
        print 'Orphan but still used dbs {host} {dbs}'.format(
            host=orphan, dbs=','.join(orphaned_but_used[orphan]))
    for orphan in missing:
        print 'Missing dbs {host} {dbs}'.format(host=orphan,
                                                dbs=','.join(missing[orphan]))

    if not orphaned and not orphaned_but_used and not missing:
        print "No problems found"
def main():
    parser = argparse.ArgumentParser(description="Is ETL running on a "
                                     "different instance?")
    parser.add_argument('instance',
                        nargs='?',
                        help="Instance to inspect, default is localhost:3306",
                        default=''.join((host_utils.HOSTNAME, ':3306')))
    args = parser.parse_args()
    instance = host_utils.HostAddr(args.instance)

    zk = host_utils.MysqlZookeeper()
    (replica_set, replica_type) = zk.get_replica_set_from_instance(instance)

    if replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
        inst = zk.get_mysql_instance_from_replica_set(
            replica_set, host_utils.REPLICA_ROLE_SLAVE)
    elif replica_type == host_utils.REPLICA_ROLE_SLAVE:
        inst = zk.get_mysql_instance_from_replica_set(
            replica_set, host_utils.REPLICA_ROLE_DR_SLAVE)
    else:
        exit_unknown_error()

    if not inst:
        # if there is not another slave in zk, there is not possibility
        # it is ok
        exit_other_slave_not_running_etl()
    try:
        running = mysql_backup_status.csv_backups_running(instance)
    except:
        exit_other_slave_not_running_etl()

    if not running:
        exit_other_slave_not_running_etl()

    exit_other_slave_running_etl()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('instance', help='The master to be demoted')
    parser.add_argument('--trust_me_its_dead',
                        help=('You say you know what you are doing. We are '
                              'going to trust you and hope for the best'),
                        default=False,
                        action='store_true')
    parser.add_argument('--ignore_dr_slave',
                        help=('Need to promote, but alreaedy have a dead '
                              'dr_slave? This option is what you looking '
                              'for. The dr_slave will be completely '
                              'ignored.'),
                        default=False,
                        action='store_true')
    parser.add_argument('--dry_run',
                        help=('Do not actually run a promotion, just run '
                              'safety checks, etc...'),
                        default=False,
                        action='store_true')
    parser.add_argument('--skip_lock',
                        help=('Do not take a promotion lock. Scary.'),
                        default=False,
                        action='store_true')
    parser.add_argument('--kill_old_master',
                        help=('If we can not get the master into read_only, '
                              ' send a mysqladmin kill to the old master.'),
                        default=False,
                        action='store_true')
    args = parser.parse_args()

    instance = host_utils.HostAddr(args.instance)
    mysql_failover(instance, args.dry_run, args.skip_lock,
                   args.ignore_dr_slave, args.trust_me_its_dead,
                   args.kill_old_master)
Exemple #10
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    parser.add_argument('-i',
                        '--instance',
                        help='The instance to query.  This should '
                        'be the master of a replica set, but '
                        'if you supply a non-master, the script '
                        'will query the master anyway.')
    parser.add_argument('timestamp',
                        help='The timestamp to rewind to.  This must '
                        'be in MySQL format: YYYY-MM-DD HH:MM:SS')
    args = parser.parse_args()
    try:
        instance = host_utils.HostAddr(args.instance)
        zk = host_utils.MysqlZookeeper()
        rt = zk.get_replica_type_from_instance(instance)
        if rt != host_utils.REPLICA_ROLE_MASTER:
            instance = zk.get_mysql_instance_from_replica_set(
                zk.get_replica_set_from_instance(instance),
                host_utils.REPLICA_ROLE_MASTER)
            log.info('Detected master of {i} as {m}'.format(i=args.instance,
                                                            m=instance))
        timestamp = dt.datetime.strptime(args.timestamp, MYSQL_DT_FORMAT)
    except Exception as e:
        log.error("Error in argument parsing: {}".format(e))

    gtid = find_gtid_for_timestamp(instance, timestamp)
    if gtid:
        print gtid
    else:
        sys.exit(255)
Exemple #11
0
def main():
    description = ("MySQL orpahned shard detector\n\n"
                   "This utility will attempt to find orphaned databases "
                   "across sharded MySQL systems")

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Check a single instance rather than all',
                        default=False)
    args = parser.parse_args()

    if args.instance:
        instance = host_utils.HostAddr(args.instance)
    else:
        instance = False

    orphaned, orphaned_but_used, missing = find_shard_mismatches(instance)

    for o in orphaned:
        log.info('Orphan dbs: {host} {dbs}'.format(host=o,
                                                   dbs=','.join(orphaned[o])))

    for obu in orphaned_but_used:
        log.info('Orphan, but still used, dbs: {host} {dbs}'.format(
            host=obu, dbs=','.join(orphaned_but_used[obu])))

    for m in missing:
        log.info('Missing dbs:{host} {dbs}'.format(host=m,
                                                   dbs=','.join(missing[m])))

    if not (orphaned or orphaned_but_used or missing):
        log.info('No problems found!')
def main():
    instance = host_utils.HostAddr(host_utils.HOSTNAME)
    if os.path.isfile(TOUCH_STOP_KILLING):
        log.info('Found {path}.  Will not kill backups.\n'
                 'Exiting now.'.format(path=TOUCH_STOP_KILLING))
        return
    kill_mysql_backup(instance)
    kill_xtrabackup()
def main():
    parser = argparse.ArgumentParser(description='xtrabackup wrapper')
    parser.add_argument('-p',
                        '--port',
                        help='Port to backup on localhost (default: 3306)',
                        default='3306')
    args = parser.parse_args()
    instance = host_utils.HostAddr(':'.join((socket.getfqdn(), args.port)))
    xtrabackup_backup_instance(instance)
def main():
    parser = argparse.ArgumentParser(description='cleanup and chmod backups')
    parser.add_argument('-p',
                        '--port',
                        help='Port to backup on localhost (default: 3306)',
                        default='3306')
    args = parser.parse_args()
    instance = host_utils.HostAddr(''.join((socket.getfqdn(), ':', args.port)))
    purge_mysql_backups(instance)
def main():
    parser = argparse.ArgumentParser(description='MySQL schema verifier')
    parser.add_argument(
        'instance_type',
        help='Type of MySQL instance to verify',
        choices=environment_specific.SHARDED_DBS_PREFIX_MAP.keys())
    parser.add_argument(
        'table',
        help='Table to check',
    )
    parser.add_argument(
        'seed_instance',
        help=('Which host from which to fetch a table '
              ' definition. (format hostname[:port])'),
    )
    parser.add_argument('seed_db',
                        help=('Which db on --seed_instance from which to fetch'
                              ' a table definition. (ex pbdata012345)'))
    args = parser.parse_args()
    zk_prefix = environment_specific.SHARDED_DBS_PREFIX_MAP[
        args.instance_type]['zk_prefix']
    seed_instance = host_utils.HostAddr(args.seed_instance)
    desired = mysql_lib.show_create_table(seed_instance, args.seed_db,
                                          args.table)
    tbl_hash = hashlib.md5(desired).hexdigest()
    print("Desired table definition:\n{desired}").format(desired=desired)
    incorrect = check_schema(zk_prefix, args.table, tbl_hash)
    if len(incorrect) == 0:
        print "It appears that all schema is synced"
        sys.exit(0)

    d = difflib.Differ()
    for problem in incorrect.iteritems():
        represenative = list(problem[1])[0].split(' ')
        hostaddr = host_utils.HostAddr(represenative[0])
        create = mysql_lib.show_create_table(hostaddr, represenative[1],
                                             args.table)
        diff = d.compare(desired.splitlines(), create.splitlines())
        print 'The following difference has been found:'
        print '\n'.join(diff)
        print "It is present on the following db's:"
        print '\n'.join(list(problem[1]))
    sys.exit(1)
def main():

    action_desc = """Action description

rename - after checking no recent changes and shard not in zk,
         create a db with the old name appended to 'dropme_'. Then
         copy all tables to the new db
revert_rename - Copy all tables back from a 'dropme_' to their original table
drop - This should be run a few days after a rename. Drop the empty original
       db, and drop the 'dropme_' db.
"""

    parser = argparse.ArgumentParser(description='MySQL shard cleanup utility',
                                     epilog=action_desc,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Instance to act on if other than localhost:3306',
                        default=''.join((socket.getfqdn(), ':3306')))
    parser.add_argument('-a',
                        '--action',
                        choices=('rename',
                                 'revert_rename',
                                 'drop',),
                        required=True)
    parser.add_argument('-d',
                        '--dbs',
                        help=("Comma seperated list of db's to act upon"),
                        required=True)
    parser.add_argument('-r',
                        '--dry_run',
                        help=("Do not change any state"),
                        default=False,
                        action='store_true')
    parser.add_argument('-v',
                        '--verbose',
                        default=False,
                        action='store_true')

    args = parser.parse_args()
    dbs = set(args.dbs.split(','))
    instance = host_utils.HostAddr(args.instance)

    if args.action == 'rename':
        rename_db_to_drop(instance, dbs, args.verbose, args.dry_run)
    elif args.action == 'revert_rename':
        conn = mysql_lib.connect_mysql(instance)
        for db in dbs:
            mysql_lib.move_db_contents(conn=conn,
                                       old_db=''.join((DB_PREPEND, db)),
                                       new_db=db,
                                       verbose=args.verbose,
                                       dry_run=args.dry_run)
    elif args.action == 'drop':
        drop_db_after_rename(instance, dbs, args.verbose, args.dry_run)
Exemple #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('server', help='The server to be fenced')
    parser.add_argument('--dry_run', help=('Do not actually fence the host, '
                                           'just show the intended '
                                           'configuration'),
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    add_fence_to_host(hostname=host_utils.HostAddr(args.server),
                      dry_run=args.dry_run)
Exemple #18
0
def check_replication_for_migration(source_replica_set,
                                    destination_replica_set):
    """ Confirm that replication is sane for finishing a shard migration

    Args:
    source_replica_set - Where shards are coming from
    destination_replica_set - Where shards are being sent
    """
    zk = host_utils.MysqlZookeeper()
    source_master = zk.get_mysql_instance_from_replica_set(source_replica_set)
    destination_master = zk.get_mysql_instance_from_replica_set(
        destination_replica_set)
    source_slave = zk.get_mysql_instance_from_replica_set(
        source_replica_set, host_utils.REPLICA_ROLE_SLAVE)
    destination_slave = zk.get_mysql_instance_from_replica_set(
        destination_replica_set, host_utils.REPLICA_ROLE_SLAVE)

    # First we will confirm that the slave of the source is caught up
    # this is important for row count comparisons
    mysql_lib.assert_replication_unlagged(
        source_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # Next, the slave of the destination replica set for the same reason
    mysql_lib.assert_replication_unlagged(
        destination_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # Next, the destination master is relatively caught up to the source master
    mysql_lib.assert_replication_unlagged(
        destination_master, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # We will also verify that the source master is not replicating. A scary
    # scenario is if the there is some sort of ring replication going and db
    # drops of blackhole db's would propegate to the source db.
    try:
        source_slave_status = mysql_lib.get_slave_status(source_master)
    except mysql_lib.ReplicationError:
        source_slave_status = None

    if source_slave_status:
        raise Exception('Source master is setup for replication '
                        'this is super dangerous!')

    # We will also verify that the destination master is replicating from the
    # source master
    slave_status = mysql_lib.get_slave_status(destination_master)
    master_of_destination_master = host_utils.HostAddr(':'.join(
        (slave_status['Master_Host'], str(slave_status['Master_Port']))))
    if source_master != master_of_destination_master:
        raise Exception('Master of destination {d} is {actual} rather than '
                        'expected {expected} '
                        ''.format(d=destination_master,
                                  actual=master_of_destination_master,
                                  expected=destination_master))
    log.info('Replication looks ok for migration')
Exemple #19
0
def collectReplicationStatus(db):
    """ Collect replication stats using mysql_lib.calc_slave_lag """
    instance = host_utils.HostAddr(':'.join((socket.gethostname(),
                                             db.port)))
    ret = mysql_lib.calc_slave_lag(instance)
    printmetric(db, "slave.seconds_behind_master", ret['sbm'])
    printmetric(db, "slave.io_bytes_behind", ret["io_bytes"])
    printmetric(db, "slave.sql_bytes_behind", ret["sql_bytes"])
    printmetric(db, "slave.thread_io_running",
                int('yes' == ret['ss']['Slave_IO_Running'].lower()))
    printmetric(db, "slave.thread_sql_running",
                int('yes' == ret['ss']['Slave_SQL_Running'].lower()))
Exemple #20
0
def process_mysql_shutdown(hostname=None, dry_run=False):
    """ Check stats, and shutdown MySQL instances"""
    zk = host_utils.MysqlZookeeper()
    username, password = mysql_lib.get_mysql_user_for_role('admin')
    shutdown_instances = get_retirement_queue_servers(SHUTDOWN_MYSQL)

    if hostname:
        if hostname in shutdown_instances:
            log.info('Only acting on {}'.format(hostname))
            shutdown_instances = {hostname: shutdown_instances[hostname]}
        else:
            log.info('Supplied host {} is not ready '
                     'for shutdown'.format(hostname))
            return

    for instance in shutdown_instances:
        if instance in get_protected_hosts('set'):
            log.warning('Host {hostname} is protected from '
                        'retirement'.format(hostname=hostname))
            remove_from_retirement_queue(hostname)
            continue
        for active_instance in zk.get_all_mysql_instances():
            if active_instance.hostname == instance:
                log.warning("It appears {instance} is in zk. This is "
                            "very dangerous! If you got to here, you may be "
                            "trying to turn down a replica set. Please remove "
                            "it from zk and try again"
                            "".format(instance=instance))
                continue

        if dry_run:
            log.info('In dry_run mode, not changing state')
            continue

        try:
            if check_for_user_activity(shutdown_instances[instance]):
                log.info('Activity detected on {}, removing from queue'
                         ''.format(instance))
                remove_from_retirement_queue(hostname)
                continue
            else:
                log.info('Shutting down mysql on {}'.format(instance))
                mysql_lib.shutdown_mysql(host_utils.HostAddr(instance))
        except MySQLdb.OperationalError as detail:
            (error_code, msg) = detail.args
            if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
                raise
            log.warning("Can't connect to MySQL on {}".format(instance))

        log_to_retirement_queue(instance,
                                shutdown_instances[instance]['instance_id'],
                                SHUTDOWN_MYSQL)
def main():
    parser = argparse.ArgumentParser(description="Print out the replica role "
                                     "for an instance")
    parser.add_argument('instance',
                        nargs='?',
                        help="Instance to inspect, default is localhost:3306",
                        default=''.join((host_utils.HOSTNAME, ':3306')))
    args = parser.parse_args()
    instance = host_utils.HostAddr(args.instance)

    zk = host_utils.MysqlZookeeper()
    replica_type = zk.get_replica_type_from_instance(instance)
    print replica_type
Exemple #22
0
def check_for_user_activity(instance):
    username, password = mysql_lib.get_mysql_user_for_role('admin')

    # check mysql activity
    log.info('Checking activity on {}'.format(instance['hostname']))

    activity = mysql_lib.get_user_activity(
        host_utils.HostAddr(instance['hostname']))
    unexpected = set(activity.keys()).difference(IGNORABLE_USERS)

    if unexpected:
        log.error('Unexpected activity on {instance} by user(s):'
                  '{unexpected}'.format(instance=instance['hostname'],
                                        unexpected=','.join(unexpected)))
        return True

    log.info('Checking current connections on '
             '{instance}'.format(instance=instance['hostname']))
    # try catch here due to the query creates the temp file will break our
    # code if disk space is full
    try:
        connected_users = mysql_lib.get_connected_users(
            host_utils.HostAddr(instance['hostname']))
    except MySQLdb.InternalError as detail:
        (err_code, msg) = detail.args
        if err_code == mysql_lib.MYSQL_ERROR_CANT_CREATE_WRITE_TO_FILE:
            log.info('No space left on device')
            return False
    except:
        log.info('Something else is not correct here')
        return False
    unexpected = connected_users.difference(IGNORABLE_USERS)
    if unexpected:
        log.error('Unexpected connection on {instance} by user(s):'
                  '{unexpected}'.format(instance=instance['hostname'],
                                        unexpected=','.join(unexpected)))
        return True
    return False
def find_shard_mismatches(instance=False):
    """ Find shards that are missing or unexpected in modhsarddb and sharddb

    Args:
    instance - If supplied, only check this instance.

    Returns:
    orphaned - A dict of unexpected and (according to table statistics)
               unused shards. Key is master instance, value is a set.
    orphaned_but_used - A dict of unexpected and but used shards.
                        Data strucutre is the same as orphaned.
    missing - A dict of expected but missing shards.
              Data strucutre is the same as orphaned.

    """
    orphaned = dict()
    orphaned_but_used = dict()
    missing_shards = dict()

    zk = host_utils.MysqlZookeeper()
    host_shard_map = zk.get_host_shard_map()

    if instance:
        new_host_shard_map = dict()
        new_host_shard_map[instance.__str__()] = host_shard_map[
            instance.__str__()]
        host_shard_map = new_host_shard_map

    for master in host_shard_map:
        expected_shards = host_shard_map[master]
        instance = host_utils.HostAddr(master)
        conn = mysql_lib.connect_mysql(instance)
        activity = mysql_lib.get_dbs_activity(conn)
        actual_shards = mysql_lib.get_dbs(conn)
        unexpected_shards = actual_shards.difference(expected_shards)
        missing = expected_shards.difference(actual_shards)
        if missing:
            missing_shards[master] = expected_shards.difference(actual_shards)

        for db in unexpected_shards:
            if activity[db]['ROWS_CHANGED'] != 0:
                if master not in orphaned_but_used:
                    orphaned_but_used[master] = set()
                orphaned_but_used[master].add(db)
            else:
                if master not in orphaned:
                    orphaned[master] = set()
                orphaned[master].add(db)

    return orphaned, orphaned_but_used, missing_shards
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        help='Port to backup on localhost (default: 3306)',
                        default='3306')
    parser.add_argument('-b',
                        '--backup_type',
                        help='Type of backup to run.',
                        default=backup.BACKUP_TYPE_XBSTREAM,
                        choices=backup.BACKUP_TYPES)
    args = parser.parse_args()
    instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, args.port)))
    mysql_backup(instance, args.backup_type)
def main():
    parser = argparse.ArgumentParser(description='MySQL backup reporting')
    parser.add_argument("-d",
                        "--date",
                        default=time.strftime('%Y-%m-%d'),
                        help="Backup date. Ex: 2013-12-12")
    parser.add_argument("-f",
                        "--show_found",
                        default=False,
                        action='store_true',
                        help="Display found backups")
    parser.add_argument("-i",
                        "--instance",
                        default=host_utils.HOSTNAME,
                        help=("Check backup status for this instance if "
                              "not the default (localhost:3306)"))
    parser.add_argument("-a",
                        "--all",
                        action='store_true',
                        help="Check all replica sets")

    args = parser.parse_args()
    zk = host_utils.MysqlZookeeper()

    if args.all:
        replica_sets = zk.get_all_mysql_replica_sets()
    else:
        instance = host_utils.HostAddr(args.instance)

        # if we aren't in ZK, we will exit with a special return code
        # that can be picked up by the nagios check.
        try:
            (replica_set, _) = zk.get_replica_set_from_instance(instance)
            replica_sets = set([replica_set])
        except Exception as e:
            print "Nothing known about backups for {i}: {e}".format(i=instance,
                                                                    e=e)
            sys.exit(BACKUP_NOT_IN_ZK_RETURN)

    return_code = BACKUP_OK_RETURN
    for replica_set in replica_sets:
        found_backup = find_mysql_backup(replica_set, args.date)
        if found_backup is not None:
            if args.show_found:
                print "{file}".format(file=found_backup)
        else:
            print "Backup not found for replica set {rs}".format(
                rs=replica_set)
            return_code = BACKUP_MISSING_RETURN
    sys.exit(return_code)
def find_unused_server_name(replica_set, conn, dry_run):
    """ Increment a db servers hostname

    The current naming convention for db servers is:
    {Shard Type}-{Shard number}-{Server number}


    Note: The current naming convention for db servers is:
    {Shard Type}{Shard number}{Server letter}

    The purpose of this function is to find the next server letter
    that is not used.

    Args:
    replica_set - The replica of the host to be replaced
    conn -  A mysql connection to the reporting server
    dry_run - don't log that a hostname will be used
    """
    cmdb_servers = environment_specific.get_all_replica_set_servers(
        replica_set)
    next_host_num = 1
    for server in cmdb_servers:
        host = host_utils.HostAddr(server['config.name'])

        # We should be able to iterate over everything that came back from the
        # cmdb and find out the greatest host number in use for a replica set
        if not host.host_identifier:
            # unparsable, probably not previously under dba management
            continue

        if (len(host.host_identifier) == 1
                and ord(host.host_identifier) in range(ord('a'), ord('z'))):
            # old style hostname
            continue

        if int(host.host_identifier) >= next_host_num:
            next_host_num = int(host.host_identifier) + 1
    new_hostname = '-'.join((replica_set, str(next_host_num)))

    while True:
        if is_hostname_new(new_hostname, conn):
            if not dry_run:
                log_new_hostname(new_hostname, conn)
            return new_hostname

        log.info('Hostname {hostname} has been logged to be in use but is not '
                 'in brood or dns'.format(hostname=new_hostname))
        next_host_num = next_host_num + 1
        new_hostname = '-'.join((replica_set, str(next_host_num)))
def archive_mysql_binlogs(port, dry_run):
    """ Flush logs and upload all binary logs that don't exist to s3

    Arguments:
    port - Port of the MySQL instance on which to act
    dry_run - Display output but do not uplad
    """
    binlog_rotator.rotate_binlogs_if_needed(port, dry_run)
    zk = host_utils.MysqlZookeeper()
    instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME,
                                             str(port))))

    if zk.get_replica_set_from_instance(instance)[0] is None:
        log.info('Instance is not in production, exiting')
        return

    lock_handle = None
    ensure_binlog_archiving_table_sanity(instance)
    try:
        log.info('Taking binlog archiver lock')
        lock_handle = host_utils.take_flock_lock(BINLOG_LOCK_FILE)
        log_bin_dir = host_utils.get_cnf_setting('log_bin', port)
        bin_logs = mysql_lib.get_master_logs(instance)
        logged_uploads = get_logged_binlog_uploads(instance)
        for binlog in bin_logs[:-1]:
            err_count = 0
            local_file = os.path.join(os.path.dirname(log_bin_dir),
                                      binlog['Log_name'])
            if already_uploaded(instance, local_file, logged_uploads):
                continue
            success = False
            while not success:
                try:
                    upload_binlog(instance, local_file, dry_run)
                    success = True
                except:
                    if err_count > MAX_ERRORS:
                        log.error('Error count in thread > MAX_THREAD_ERROR. '
                                  'Aborting :(')
                        raise

                    log.error('error: {e}'.format(e=traceback.format_exc()))
                    err_count = err_count + 1
                    time.sleep(err_count*2)
        log.info('Archiving complete')
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('action',
                        help=("What modification to make. If 'auto', the host "
                              "replacement log will be used to determine what "
                              "what role to use. Default is auto."),
                        choices=[
                            'add_slave', 'add_dr_slave', 'auto',
                            'swap_master_and_slave', 'swap_slave_and_dr_slave'
                        ],
                        default='auto')
    parser.add_argument('instance', help='What instance to act upon')
    parser.add_argument('--dry_run',
                        help=('Do not actually modify zk, just show '
                              'what would be modify'),
                        default=False,
                        action='store_true')
    parser.add_argument('--dangerous',
                        help=('If you need to swap_master_and_slave in zk'
                              'outside of the failover script, that is '
                              'dangerous and you will need this flag.'),
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    action = args.action
    instance = host_utils.HostAddr(args.instance)

    if args.dry_run:
        log.removeHandler(chat_handler)

    if action == 'add_slave':
        add_replica_to_zk(instance, host_utils.REPLICA_ROLE_SLAVE,
                          args.dry_run)
    elif action == 'add_dr_slave':
        add_replica_to_zk(instance, host_utils.REPLICA_ROLE_DR_SLAVE,
                          args.dry_run)
    elif action == 'swap_master_and_slave':
        if args.dangerous:
            swap_master_and_slave(instance, args.dry_run)
        else:
            raise Exception('To swap_master_and_slave in zk outside of the '
                            'failover script is very dangerous and the '
                            '--dangerous flag was not supplied.')
    elif action == 'swap_slave_and_dr_slave':
        swap_slave_and_dr_slave(instance, args.dry_run)
    elif action == 'auto':
        auto_add_instance_to_zk(instance, args.dry_run)
    else:
        raise Exception('Invalid action: {action}'.format(action=action))
Exemple #29
0
def check_for_user_activity(instance):
    zk = host_utils.MysqlZookeeper()
    username, password = mysql_lib.get_mysql_user_for_role('admin')

    # check mysql activity
    log.info('Checking activity on {instance}'.format(
        instance=instance['hostname']))
    with timeout.timeout(3):
        conn = MySQLdb.connect(host=instance['internal_ip'],
                               user=username,
                               passwd=password,
                               cursorclass=MySQLdb.cursors.DictCursor)
    if not conn:
        raise Exception('Could not connect to {ip}'
                        ''.format(ip=instance['internal_ip']))

    activity = mysql_lib.get_user_activity(
        host_utils.HostAddr(instance['hostname']))
    unexpected = set(activity.keys()).difference(IGNORABLE_USERS)
    if unexpected:
        log.error('Unexpected activity on {instance} by user(s):'
                  '{unexpected}'.format(instance=instance['hostname'],
                                        unexpected=','.join(unexpected)))
        return True

    log.info('Checking current connections on '
             '{instance}'.format(instance=instance['hostname']))
    connected_users = mysql_lib.get_connected_users(
        host_utils.HostAddr(instance['hostname']))
    unexpected = connected_users.difference(IGNORABLE_USERS)
    if unexpected:
        log.error('Unexpected connection on {instance} by user(s):'
                  '{unexpected}'.format(instance=instance['hostname'],
                                        unexpected=','.join(unexpected)))
        return True
    return False
Exemple #30
0
def get_db_type(port):
    """ Get status in replica set via service discover

    Args:
        port: the port on localhost

    Returns: 'master', 'slave', 'dr_slave' or 'undef'
    """
    try:
        instance = host_utils.HostAddr(':'.join((socket.gethostname(),
                                                 str(port))))
        zk = host_utils.MysqlZookeeper()
        (_, replica_type) = zk.get_replica_set_from_instance(instance)
        return replica_type
    except:
        return 'undef'