Ejemplo n.º 1
0
def confirm_max_replica_lag(replicas,
                            lag_tolerance,
                            dead_master,
                            replicas_synced=False,
                            timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set(
            [mysql_lib.CHECK_SQL_THREAD, mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance,
                                                      dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
Ejemplo n.º 2
0
def is_master_alive(master, replicas):
    """ Determine if the master is alive

    The function will:
    1. Attempt to connect to the master via the mysql protcol. If successful
       the master is considered alive.
    2. If #1 fails, check the io thread of the replica instance(s). If the io
       thread is not running, the master will be considered dead. If step #1
       fails and step #2 succeeds, we are in a weird state and will throw an
       exception.

    Args:
    master - A hostaddr object for the master instance
    replicas -  A set of hostaddr objects for the replica instances

    Returns:
    A mysql connection to the master if the master is alive, False otherwise.
    """
    if len(replicas) == 0:
        raise Exception('At least one replica must be present to determine '
                        'a master is dead')
    try:
        master_conn = mysql_lib.connect_mysql(master)
        return master_conn
    except MySQLdb.OperationalError as detail:
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        master_conn = False
        log.info('Unable to connect to current master {master} from '
                 '{hostname}, will check replica servers beforce declaring '
                 'the master dead'.format(master=master,
                                          hostname=host_utils.HOSTNAME))
    except:
        log.info('This is an unknown connection error. If you are very sure '
                 'that the master is dead, please put a "return False" at the '
                 'top of is_master_alive and then send rwultsch a stack trace')
        raise

    # We can not get a connection to the master, so poll the replica servers
    for replica in replicas:
        # If replication has not hit a timeout, a dead master can still have
        # a replica which thinks it is ok. "STOP SLAVE; START SLAVE" followed
        # by a sleep will get us truthyness.
        mysql_lib.restart_replication(replica)
        try:
            mysql_lib.assert_replication_sanity(replica)
            raise Exception('Replica {replica} thinks it can connect to '
                            'master {master}, but failover script can not. '
                            'Possible network partition!'
                            ''.format(replica=replica,
                                      master=master))
        except:
            # The exception is expected in this case
            pass
        log.info('Replica {replica} also can not connect to master '
                 '{master}.'.format(replica=replica,
                                    master=master))
    return False
Ejemplo n.º 3
0
def mysql_backup(instance,
                 backup_type=backup.BACKUP_TYPE_XBSTREAM,
                 initial_build=False):
    """ Run a file based backup on a supplied local instance

    Args:
    instance - A hostaddr object
    backup_type - backup.BACKUP_TYPE_LOGICAL or backup.BACKUP_TYPE_XBSTREAM
    initial_build - Boolean, if this is being created right after the server
                    was built
    """
    log.info('Confirming sanity of replication (if applicable)')
    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(instance)
    except:
        # instance is not in production
        replica_type = None

    if replica_type and replica_type != host_utils.REPLICA_ROLE_MASTER:
        mysql_lib.assert_replication_sanity(instance)

    log.info('Logging initial status to mysqlops')
    start_timestamp = time.localtime()
    lock_handle = None
    backup_id = mysql_lib.start_backup_log(instance, backup_type,
                                           start_timestamp)

    # Take a lock to prevent multiple backups from running concurrently
    try:
        log.info('Taking backup lock')
        lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)

        # Actually run the backup
        log.info('Running backup')
        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            backup_file = backup.xtrabackup_instance(instance, start_timestamp,
                                                     initial_build)
        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            backup_file = backup.logical_backup_instance(
                instance, start_timestamp, initial_build)
        else:
            raise Exception('Unsupported backup type {backup_type}'
                            ''.format(backup_type=backup_type))
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)

    # Update database with additional info now that backup is done.
    if backup_id:
        log.info("Updating database log entry with final backup info")
        mysql_lib.finalize_backup_log(backup_id, backup_file)
    else:
        log.info("The backup is complete, but we were not able to "
                 "write to the central log DB.")
Ejemplo n.º 4
0
def confirm_max_replica_lag(replicas, lag_tolerance, dead_master,
                            replicas_synced=False, timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set([mysql_lib.CHECK_SQL_THREAD,
                                  mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica, replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica, replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance, dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
Ejemplo n.º 5
0
def mysql_backup(instance, backup_type=backup.BACKUP_TYPE_XBSTREAM, initial_build=False):
    """ Run a file based backup on a supplied local instance

    Args:
    instance - A hostaddr object
    backup_type - backup.BACKUP_TYPE_LOGICAL or backup.BACKUP_TYPE_XBSTREAM
    initial_build - Boolean, if this is being created right after the server
                    was built
    """
    log.info('Confirming sanity of replication (if applicable)')
    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(instance)
    except:
        # instance is not in production
        replica_type = None

    if replica_type and replica_type != host_utils.REPLICA_ROLE_MASTER:
        mysql_lib.assert_replication_sanity(instance)

    log.info('Logging initial status to mysqlops')
    start_timestamp = time.localtime()
    lock_handle = None
    backup_id = mysql_lib.start_backup_log(instance, backup_type,
                                           start_timestamp)

    # Take a lock to prevent multiple backups from running concurrently
    try:
        log.info('Taking backup lock')
        lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)

        # Actually run the backup
        log.info('Running backup')
        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            backup_file = backup.xtrabackup_instance(instance, start_timestamp, initial_build)
        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            backup_file = backup.logical_backup_instance(instance, start_timestamp, initial_build)
        else:
            raise Exception('Unsupported backup type {backup_type}'
                            ''.format(backup_type=backup_type))
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)

    # Update database with additional info now that backup is done.
    if backup_id:
        log.info("Updating database log entry with final backup info")
        mysql_lib.finalize_backup_log(backup_id, backup_file)
    else:
        log.info("The backup is complete, but we were not able to "
                 "write to the central log DB.")
Ejemplo n.º 6
0
def main():
    description = ("MySQL checksum wrapper\n\n"
                   "Wrapper of pt-table-checksum and pt-table-sync.\n"
                   "Defaults to checksumming 1/{k}th of databases on instance.\n"
                   "If diffs are found, use pt-table-sync to measure actual "
                   "divergence,\nbut only if the number of diffs is between "
                   "--min_diffs and --max_diffs.").format(k=DB_CHECK_FRACTION)

    parser = argparse.ArgumentParser(description=description,
                                     formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Instance to act on if other than localhost:3306',
                        default=''.join((socket.getfqdn(),
                                         ':3306')))
    parser.add_argument('-a',
                        '--all',
                        help='Checksums all dbs rather than the default',
                        action='store_true',
                        default=False)
    parser.add_argument('-d',
                        '--dbs',
                        help=("Comma separated list of db's to check rather "
                              "than the default"),
                        default=False)
    parser.add_argument('-q',
                        '--quiet',
                        help=("Do not print output to stdout"),
                        action='store_true',
                        default=False)
    parser.add_argument('-m',
                        '--min_diffs',
                        help=("Do per-row check if chunk diff count is at "
                              "least this value"),
                        dest='min_diffs',
                        default=MIN_DIFFS)
    parser.add_argument('-M',
                        '--max_diffs',
                        help=("Do not do per-row check if chunk diff count "
                              "is greater than this value"),
                        dest='max_diffs',
                        default=MAX_DIFFS)
    parser.add_argument('-C',
                        '--no_create_table',
                        help=("If test.checksum_detail is missing, do "
                              "not try to create it."),
                        dest='create_table',
                        action='store_false',
                        default=True)
    parser.add_argument('-v',
                        '--verbose',
                        help=("Store raw output from PT tools in the DB?"),
                        action='store_true',
                        default=False)
    parser.add_argument('-c',
                        '--check_fraction',
                        help=('Check this fraction of databases.'),
                        default=DB_CHECK_FRACTION)

    args = parser.parse_args()
    instance = host_utils.HostAddr(args.instance)
    zk = host_utils.MysqlZookeeper()

    if instance not in \
            zk.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
        raise Exception("Instance is not a master in ZK")

    # If enabled, try to create the table that holds the checksum info.
    # If not enabled, make sure that the table exists.
    if not mysql_lib.does_table_exist(instance, mysql_lib.METADATA_DB, CHECKSUM_TBL):
        if args.create_table:
            create_checksum_detail_table(instance)
        else:
            raise Exception("Checksum table not found.  Unable to continue."
                            "Consider not using the -C option or create it "
                            "yourself.")

    # Determine what replica set we belong to and get a list of slaves.
    replica_set = zk.get_replica_set_from_instance(instance)[0]
    slaves = set()
    for rtype in host_utils.REPLICA_ROLE_SLAVE, host_utils.REPLICA_ROLE_DR_SLAVE:
        s = zk.get_mysql_instance_from_replica_set(replica_set, rtype)
        if s:
            slaves.add(s)

    if len(slaves) == 0:
        log.info("This server has no slaves.  Nothing to do.")
        sys.exit(0)

    # before we even start this, make sure replication is OK.
    for slave in slaves:
        mysql_lib.assert_replication_sanity(slave)

    if args.dbs:
        db_to_check = set(args.dbs.split(','))
    else:
        dbs = mysql_lib.get_dbs(instance)

        if args.all:
            db_to_check = dbs
        else:
            # default behaviour, check a given DB every N days based on
            # day of year.  minimizes month-boundary issues.
            db_to_check = set()
            check_modulus = int(time.strftime("%j")) % int(args.check_fraction)
            counter = 0
            for db in dbs:
                modulus = counter % int(args.check_fraction)
                if modulus == check_modulus:
                    db_to_check.add(db)
                counter = counter + 1

    # Iterate through the list of DBs and check one table at a time.
    # We do it this way to ensure more coverage in case pt-table-checksum
    # loses its DB connection and errors out before completing a full scan
    # of a given database.
    #
    for db in db_to_check:
        tables_to_check = mysql_lib.get_tables(instance, db, skip_views=True)
        for tbl in tables_to_check:
            c_cmd, c_out, c_err, c_ret = checksum_tbl(instance, db, tbl)
            if not args.quiet:
                log.info("Checksum command executed was:\n{cmd}".format(cmd=c_cmd))
                log.info("Standard out:\n{out}".format(out=c_out))
                log.info("Standard error:\n{err}".format(err=c_err))
                log.info("Return code: {ret}".format(ret=c_ret))

            # parse each line of STDOUT (there should only be one with
            # actual data).  We only care about errors, rows, chunks, and
            # skipped, since we'll need to figure out diffs separately for
            # each slave box.
            for line in c_out.split("\n"):
                results = parse_checksum_row(line)
                if results:
                    chunk_errors = int(results[1])
                    row_count = int(results[3])
                    chunk_count = int(results[4])
                    chunk_skips = int(results[5])

                    for slave in slaves:
                        rows_checked = 'NO'
                        sync_cmd = ""
                        sync_out = ""
                        sync_err = ""
                        sync_ret = -1
                        row_diffs = 0

                        elapsed_time_ms,\
                            chunk_diffs = check_one_replica(slave,
                                                            db, tbl)

                        # if we skipped some chunks or there were errors,
                        # this means we can't have complete information about the
                        # state of the replica. in the case of a hard error,
                        # we'll just stop.  in the case of a skipped chunk, we will
                        # treat it as a different chunk for purposes of deciding
                        # whether or not to do a more detailed analysis.
                        #
                        checkable_chunks = chunk_skips + chunk_diffs

                        if chunk_errors > 0:
                            checksum_status = 'ERRORS_IN_CHECKSUM_PROCESS'
                        elif checkable_chunks == 0:
                            checksum_status = 'GOOD'
                        else:
                            if checkable_chunks > int(args.max_diffs):
                                # too many chunk diffs, don't bother checking
                                # further.  not good.
                                checksum_status = 'TOO_MANY_CHUNK_DIFFS'
                            elif checkable_chunks < int(args.min_diffs):
                                # some diffs, but not enough that we care.
                                checksum_status = 'CHUNK_DIFFS_FOUND_BUT_OK'
                            else:
                                start_time = int(time.time()*1000)
                                rows_checked = 'YES'

                                # set the proper status - did we do a sync-based check
                                # because of explicit diffs or because of skipped chunks?
                                if chunk_diffs > 0:
                                    checksum_status = 'ROW_DIFFS_FOUND'
                                else:
                                    checksum_status = 'CHUNKS_WERE_SKIPPED'

                                sync_cmd, sync_out, sync_err, sync_ret, \
                                    row_diffs = checksum_tbl_via_sync(slave,
                                                                      db,
                                                                      tbl)

                                # Add in the time it took to do the sync.
                                elapsed_time_ms += int(time.time()*1000) - start_time

                                if not args.quiet:
                                    log.info("Sync command executed was:\n{cmd} ".format(cmd=sync_cmd))
                                    log.info("Standard out:\n {out}".format(out=sync_out))
                                    log.info("Standard error:\n {err}".format(err=sync_err))
                                    log.info("Return code: {ret}".format(ret=sync_ret))
                                    log.info("Row diffs found: {cnt}".format(cnt=row_diffs))

                        # Checksum process is complete, store the results.
                        #
                        data = {'instance': slave,
                                'master_instance': instance,
                                'db': db,
                                'tbl': tbl,
                                'elapsed_time_ms': elapsed_time_ms,
                                'chunk_count': chunk_count,
                                'chunk_errors': chunk_errors,
                                'chunk_diffs': chunk_diffs,
                                'chunk_skips': chunk_skips,
                                'row_count': row_count,
                                'row_diffs': row_diffs,
                                'rows_checked': rows_checked,
                                'checksum_status': checksum_status,
                                'checksum_cmd': None,
                                'checksum_stdout': None,
                                'checksum_stderr': None,
                                'checksum_rc': c_ret,
                                'sync_cmd': None,
                                'sync_stdout': None,
                                'sync_stderr': None,
                                'sync_rc': sync_ret}

                        if args.verbose:
                            data.update({'checksum_cmd': c_cmd,
                                         'checksum_stdout': c_out,
                                         'checksum_stderr': c_err,
                                         'sync_cmd': sync_cmd,
                                         'sync_stdout': sync_out,
                                         'sync_stderr': sync_err,
                                         'sync_rc': sync_ret})

                        write_checksum_status(instance, data)
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(
                 host=original_server.hostname,
                 replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now(
            ) - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} days ago. The timeout for servers builds is "
                         "{timeout} days so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
        version_server = original_server
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')
        version_server = zk.get_mysql_instance_from_replica_set(
            original_server.get_zk_replica_set()[0],
            repl_type=host_utils.REPLICA_ROLE_MASTER)

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(
        original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {
        'availability_zone':
        cmdb_data['location'],
        'vpc_security_group':
        cmdb_data['security_groups'],
        'hostname':
        find_unused_server_name(original_server.get_standardized_replica_set(),
                                reporting_conn, dry_run),
        'instance_type':
        cmdb_data['config.instance_type'],
        'mysql_major_version':
        mysql_lib.get_global_variables(version_server)['version'][0:3],
        'mysql_minor_version':
        DEFAULT_MYSQL_MINOR_VERSION,
        'dry_run':
        dry_run,
        'skip_name_check':
        True
    }

    # At this point, all our defaults should be good to go
    config_overridden = False

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                reasons.add('changing {key} from {old} to '
                            '{new}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                replacement_config[key] = overrides[key]
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    if not dead_server:
        try:
            mysql_lib.assert_replication_sanity(original_server)
        except Exception as e:
            log.info('Replication problem: {e}'.format(e=e))
            reasons.add('replication broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason '
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(
        **replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
Ejemplo n.º 8
0
    # in theory, we could allow multiple instances of this script to run
    # on one server, as long as they are checksumming different replica sets.
    #
    try:
        lock = host_utils.bind_lock_socket('CHECKSUM_{}'.format(replica_set))
    except socket.error, (code, msg):
        log.error("Unable to bind socket for checksum on {rs} "
                  "(msg: {m}, code:{c})".format(rs=replica_set, m=msg, c=code))
        sys.exit(code)

    log.info("Locked replica set {} for checksum on this "
             "server".format(replica_set))

    # before we even start this, make sure replication is OK.
    for slave in slaves:
        mysql_lib.assert_replication_sanity(slave)

    if args.dbs:
        db_to_check = set(args.dbs.split(','))
    else:
        dbs = mysql_lib.get_dbs(instance)

        if args.all:
            db_to_check = dbs
        else:
            # default behaviour, check a given DB every N days based on
            # day of year.  minimizes month-boundary issues.
            db_to_check = set()
            check_modulus = int(time.strftime("%j")) % int(args.check_fraction)
            counter = 0
            for db in dbs:
Ejemplo n.º 9
0
def main():
    description = (
        "MySQL checksum wrapper\n\n"
        "Wrapper of pt-table-checksum and pt-table-sync.\n"
        "Defaults to checksumming 1/{k}th of databases on instance.\n"
        "If diffs are found, use pt-table-sync to measure actual "
        "divergence,\nbut only if the number of diffs is between "
        "--min_diffs and --max_diffs.").format(k=DB_CHECK_FRACTION)

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Instance to act on if other than localhost:3306',
                        default=''.join((socket.getfqdn(), ':3306')))
    parser.add_argument('-a',
                        '--all',
                        help='Checksums all dbs rather than the default',
                        action='store_true',
                        default=False)
    parser.add_argument('-d',
                        '--dbs',
                        help=("Comma separated list of db's to check rather "
                              "than the default"),
                        default=False)
    parser.add_argument('-q',
                        '--quiet',
                        help=("Do not print output to stdout"),
                        action='store_true',
                        default=False)
    parser.add_argument('-m',
                        '--min_diffs',
                        help=("Do per-row check if chunk diff count is at "
                              "least this value"),
                        dest='min_diffs',
                        default=MIN_DIFFS)
    parser.add_argument('-M',
                        '--max_diffs',
                        help=("Do not do per-row check if chunk diff count "
                              "is greater than this value"),
                        dest='max_diffs',
                        default=MAX_DIFFS)
    parser.add_argument('-C',
                        '--no_create_table',
                        help=("If test.checksum_detail is missing, do "
                              "not try to create it."),
                        dest='create_table',
                        action='store_false',
                        default=True)
    parser.add_argument('-v',
                        '--verbose',
                        help=("Store raw output from PT tools in the DB?"),
                        action='store_true',
                        default=False)
    parser.add_argument('-c',
                        '--check_fraction',
                        help=('Check this fraction of databases.'),
                        default=DB_CHECK_FRACTION)

    args = parser.parse_args()
    instance = host_utils.HostAddr(args.instance)
    zk = host_utils.MysqlZookeeper()

    if instance not in \
            zk.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
        raise Exception("Instance is not a master in ZK")

    # If enabled, try to create the table that holds the checksum info.
    # If not enabled, make sure that the table exists.
    if not mysql_lib.does_table_exist(instance, mysql_lib.METADATA_DB,
                                      CHECKSUM_TBL):
        if args.create_table:
            create_checksum_detail_table(instance)
        else:
            raise Exception("Checksum table not found.  Unable to continue."
                            "Consider not using the -C option or create it "
                            "yourself.")

    # Determine what replica set we belong to and get a list of slaves.
    replica_set = zk.get_replica_set_from_instance(instance)[0]
    slaves = set()
    for rtype in host_utils.REPLICA_ROLE_SLAVE, host_utils.REPLICA_ROLE_DR_SLAVE:
        s = zk.get_mysql_instance_from_replica_set(replica_set, rtype)
        if s:
            slaves.add(s)

    if len(slaves) == 0:
        log.info("This server has no slaves.  Nothing to do.")
        sys.exit(0)

    # before we even start this, make sure replication is OK.
    for slave in slaves:
        mysql_lib.assert_replication_sanity(slave)

    if args.dbs:
        db_to_check = set(args.dbs.split(','))
    else:
        dbs = mysql_lib.get_dbs(instance)

        if args.all:
            db_to_check = dbs
        else:
            # default behaviour, check a given DB every N days based on
            # day of year.  minimizes month-boundary issues.
            db_to_check = set()
            check_modulus = int(time.strftime("%j")) % int(args.check_fraction)
            counter = 0
            for db in dbs:
                modulus = counter % int(args.check_fraction)
                if modulus == check_modulus:
                    db_to_check.add(db)
                counter = counter + 1

    # Iterate through the list of DBs and check one table at a time.
    # We do it this way to ensure more coverage in case pt-table-checksum
    # loses its DB connection and errors out before completing a full scan
    # of a given database.
    #
    for db in db_to_check:
        tables_to_check = mysql_lib.get_tables(instance, db, skip_views=True)
        for tbl in tables_to_check:
            c_cmd, c_out, c_err, c_ret = checksum_tbl(instance, db, tbl)
            if not args.quiet:
                log.info(
                    "Checksum command executed was:\n{cmd}".format(cmd=c_cmd))
                log.info("Standard out:\n{out}".format(out=c_out))
                log.info("Standard error:\n{err}".format(err=c_err))
                log.info("Return code: {ret}".format(ret=c_ret))

            # parse each line of STDOUT (there should only be one with
            # actual data).  We only care about errors, rows, chunks, and
            # skipped, since we'll need to figure out diffs separately for
            # each slave box.
            for line in c_out.split("\n"):
                results = parse_checksum_row(line)
                if results:
                    chunk_errors = int(results[1])
                    row_count = int(results[3])
                    chunk_count = int(results[4])
                    chunk_skips = int(results[5])

                    for slave in slaves:
                        rows_checked = 'NO'
                        sync_cmd = ""
                        sync_out = ""
                        sync_err = ""
                        sync_ret = -1
                        row_diffs = 0

                        elapsed_time_ms,\
                            chunk_diffs = check_one_replica(slave,
                                                            db, tbl)

                        # if we skipped some chunks or there were errors,
                        # this means we can't have complete information about the
                        # state of the replica. in the case of a hard error,
                        # we'll just stop.  in the case of a skipped chunk, we will
                        # treat it as a different chunk for purposes of deciding
                        # whether or not to do a more detailed analysis.
                        #
                        checkable_chunks = chunk_skips + chunk_diffs

                        if chunk_errors > 0:
                            checksum_status = 'ERRORS_IN_CHECKSUM_PROCESS'
                        elif checkable_chunks == 0:
                            checksum_status = 'GOOD'
                        else:
                            if checkable_chunks > int(args.max_diffs):
                                # too many chunk diffs, don't bother checking
                                # further.  not good.
                                checksum_status = 'TOO_MANY_CHUNK_DIFFS'
                            elif checkable_chunks < int(args.min_diffs):
                                # some diffs, but not enough that we care.
                                checksum_status = 'CHUNK_DIFFS_FOUND_BUT_OK'
                            else:
                                start_time = int(time.time() * 1000)
                                rows_checked = 'YES'

                                # set the proper status - did we do a sync-based check
                                # because of explicit diffs or because of skipped chunks?
                                if chunk_diffs > 0:
                                    checksum_status = 'ROW_DIFFS_FOUND'
                                else:
                                    checksum_status = 'CHUNKS_WERE_SKIPPED'

                                sync_cmd, sync_out, sync_err, sync_ret, \
                                    row_diffs = checksum_tbl_via_sync(slave,
                                                                      db,
                                                                      tbl)

                                # Add in the time it took to do the sync.
                                elapsed_time_ms += int(
                                    time.time() * 1000) - start_time

                                if not args.quiet:
                                    log.info(
                                        "Sync command executed was:\n{cmd} ".
                                        format(cmd=sync_cmd))
                                    log.info("Standard out:\n {out}".format(
                                        out=sync_out))
                                    log.info("Standard error:\n {err}".format(
                                        err=sync_err))
                                    log.info("Return code: {ret}".format(
                                        ret=sync_ret))
                                    log.info("Row diffs found: {cnt}".format(
                                        cnt=row_diffs))

                        # Checksum process is complete, store the results.
                        #
                        data = {
                            'instance': slave,
                            'master_instance': instance,
                            'db': db,
                            'tbl': tbl,
                            'elapsed_time_ms': elapsed_time_ms,
                            'chunk_count': chunk_count,
                            'chunk_errors': chunk_errors,
                            'chunk_diffs': chunk_diffs,
                            'chunk_skips': chunk_skips,
                            'row_count': row_count,
                            'row_diffs': row_diffs,
                            'rows_checked': rows_checked,
                            'checksum_status': checksum_status,
                            'checksum_cmd': None,
                            'checksum_stdout': None,
                            'checksum_stderr': None,
                            'checksum_rc': c_ret,
                            'sync_cmd': None,
                            'sync_stdout': None,
                            'sync_stderr': None,
                            'sync_rc': sync_ret
                        }

                        if args.verbose:
                            data.update({
                                'checksum_cmd': c_cmd,
                                'checksum_stdout': c_out,
                                'checksum_stderr': c_err,
                                'sync_cmd': sync_cmd,
                                'sync_stdout': sync_out,
                                'sync_stderr': sync_err,
                                'sync_rc': sync_ret
                            })

                        write_checksum_status(instance, data)
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'classic_security_group',
                'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(host=original_server.hostname,
                                                      replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now() - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} days ago. The timeout for servers builds is "
                         "{timeout} days so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {'availability_zone': cmdb_data['location'],
                          'hostname': find_unused_server_name(original_server.get_standardized_replica_set(),
                                                              reporting_conn, dry_run),
                          'instance_type': cmdb_data['config.instance_type'],
                          'mysql_major_version': get_master_mysql_major_version(original_server),
                          'mysql_minor_version': DEFAULT_MYSQL_MINOR_VERSION,
                          'dry_run': dry_run,
                          'skip_name_check': True}

    if cmdb_data.pop('cloud.aws.vpc_id', None):
        # Existing server is in VPC
        replacement_config['classic_security_group'] = None
        replacement_config['vpc_security_group'] = cmdb_data['security_groups']
    else:
        # Existing server is in Classic
        replacement_config['classic_security_group'] = cmdb_data['security_groups']
        replacement_config['vpc_security_group'] = None

    # At this point, all our defaults should be good to go
    config_overridden = False
    if replacement_config['classic_security_group'] and overrides['vpc_security_group']:
        # a VPC migration
        vpc_migration(replacement_config, overrides)
        reasons.add('vpc migration')
        config_overridden = True

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                reasons.add('changing {key} from {old} to '
                            '{new}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                replacement_config[key] = overrides[key]
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')

    if not dead_server:
        try:
            mysql_lib.assert_replication_sanity(original_server)
        except Exception as e:
            log.info('Replication problem: {e}'.format(e=e))
            reasons.add('replication broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason '
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(**replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
Ejemplo n.º 11
0
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [
                host_utils.REPLICA_ROLE_DR_SLAVE, host_utils.REPLICA_ROLE_SLAVE
        ]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(
            instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(
                host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance, master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data,
             version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK,
                                 simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise
Ejemplo n.º 12
0
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [host_utils.REPLICA_ROLE_DR_SLAVE,
                                host_utils.REPLICA_ROLE_SLAVE]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance,
                                      master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data, version) = get_zk_node_for_replica_set(kazoo_client,
                                                                          replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise
Ejemplo n.º 13
0
def mysql_backup(instance,
                 backup_type=backup.BACKUP_TYPE_XBSTREAM,
                 initial_build=False,
                 lock_handle=None):
    """ Run a file based backup on a supplied local instance

    Args:
    instance - A hostaddr object
    backup_type - backup.BACKUP_TYPE_LOGICAL or backup.BACKUP_TYPE_XBSTREAM
    initial_build - Boolean, if this is being created right after the server
                    was built
    lock_handle - A lock handle, if we have one from the caller.
    """

    if backup_type == backup.BACKUP_TYPE_XBSTREAM and \
            os.path.isfile(backup.XTRABACKUP_SKIP_FILE):
        log.info('Found {}. Skipping xtrabackup '
                 'run.'.format(backup.XTRABACKUP_SKIP_FILE))
        return

    log.info('Confirming sanity of replication (if applicable)')
    zk = host_utils.MysqlZookeeper()
    try:
        replica_type = zk.get_replica_type_from_instance(instance)
    except:
        # instance is not in production
        replica_type = None

    if replica_type and replica_type != host_utils.REPLICA_ROLE_MASTER:
        mysql_lib.assert_replication_sanity(instance)

    log.info('Logging initial status to mysqlops')
    start_timestamp = time.localtime()
    backup_id = mysql_lib.start_backup_log(instance, backup_type,
                                           start_timestamp)

    # Take a lock to prevent multiple backups from running concurrently
    # unless we already have a lock from the caller.  This means we
    # also don't have to release the lock at the end; either we
    # exit the script entirely, and it gets cleaned up or the caller
    # maintains it.
    if lock_handle is None:
        log.info('Taking backup lock')
        lock_handle = host_utils.bind_lock_socket(
            backup.STD_BACKUP_LOCK_SOCKET)
    else:
        log.info('Not acquiring backup lock, we already have one.')

    # Actually run the backup
    log.info('Running backup')
    if backup_type == backup.BACKUP_TYPE_XBSTREAM:
        backup_file = backup.xtrabackup_instance(instance, start_timestamp,
                                                 initial_build)
    elif backup_type == backup.BACKUP_TYPE_LOGICAL:
        # We don't need a backup-skip file here since this isn't
        # regularly scheduled.
        backup_file = backup.logical_backup_instance(instance, start_timestamp,
                                                     initial_build)
    else:
        raise Exception('Unsupported backup type {}'.format(backup_type))

    # Update database with additional info now that backup is done.
    if backup_id:
        log.info("Updating database log entry with final backup info")
        mysql_lib.finalize_backup_log(backup_id, backup_file)
    else:
        log.info("The backup is complete, but we were not able to "
                 "write to the central log DB.")