예제 #1
0
def confirm_max_replica_lag(replicas,
                            lag_tolerance,
                            dead_master,
                            replicas_synced=False,
                            timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set(
            [mysql_lib.CHECK_SQL_THREAD, mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance,
                                                      dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
예제 #2
0
def is_master_alive(master, replicas):
    """ Determine if the master is alive

    The function will:
    1. Attempt to connect to the master via the mysql protcol. If successful
       the master is considered alive.
    2. If #1 fails, check the io thread of the replica instance(s). If the io
       thread is not running, the master will be considered dead. If step #1
       fails and step #2 succeeds, we are in a weird state and will throw an
       exception.

    Args:
    master - A hostaddr object for the master instance
    replicas -  A set of hostaddr objects for the replica instances

    Returns:
    A mysql connection to the master if the master is alive, False otherwise.
    """
    if len(replicas) == 0:
        raise Exception('At least one replica must be present to determine '
                        'a master is dead')
    try:
        master_conn = mysql_lib.connect_mysql(master)
        return master_conn
    except MySQLdb.OperationalError as detail:
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        master_conn = False
        log.info('Unable to connect to current master {master} from '
                 '{hostname}, will check replica servers beforce declaring '
                 'the master dead'.format(master=master,
                                          hostname=host_utils.HOSTNAME))
    except:
        log.info('This is an unknown connection error. If you are very sure '
                 'that the master is dead, please put a "return False" at the '
                 'top of is_master_alive and then send rwultsch a stack trace')
        raise

    # We can not get a connection to the master, so poll the replica servers
    for replica in replicas:
        # If replication has not hit a timeout, a dead master can still have
        # a replica which thinks it is ok. "STOP SLAVE; START SLAVE" followed
        # by a sleep will get us truthyness.
        mysql_lib.restart_replication(replica)
        try:
            mysql_lib.assert_replication_sanity(replica)
            raise Exception('Replica {replica} thinks it can connect to '
                            'master {master}, but failover script can not. '
                            'Possible network partition!'
                            ''.format(replica=replica,
                                      master=master))
        except:
            # The exception is expected in this case
            pass
        log.info('Replica {replica} also can not connect to master '
                 '{master}.'.format(replica=replica,
                                    master=master))
    return False
예제 #3
0
def confirm_max_replica_lag(replicas, lag_tolerance, dead_master,
                            replicas_synced=False, timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set([mysql_lib.CHECK_SQL_THREAD,
                                  mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica, replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica, replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance, dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
def restore_instance(backup_type, restore_source, destination,
                     no_repl, date,
                     add_to_zk, skip_production_check):
    """ Restore a MySQL backup on to localhost

    Args:
    backup_type - Type of backup to restore
    restore_source - A hostaddr object for where to pull a backup from
    destination -  A hostaddr object for where to restore the backup
    no_repl - Should  replication be not started. It will always be setup.
    date - What date should the backup be from
    add_to_zk - Should the instnace be added to zk. If so, the log from the
                host being launched will be consulted.
    skip_production_check - Do not check if the host is already in zk for
                            production use.
    """
    log.info('Supplied source is {source}'.format(source=restore_source))
    log.info('Supplied destination is {dest}'.format(dest=destination))
    log.info('Desired date of restore {date}'.format(date=date))
    zk = host_utils.MysqlZookeeper()

    # Try to prevent unintentional destruction of prod servers
    log.info('Confirming no prod instances running on destination')
    prod_check(destination, skip_production_check)

    # Take a lock to prevent multiple restores from running concurrently
    log.info('Taking a lock to block another restore from starting')
    lock_handle = host_utils.bind_lock_socket(backup.STD_BACKUP_LOCK_SOCKET)

    log.info('Looking for a backup to restore')
    if restore_source:
        possible_sources = [restore_source]
    else:
        possible_sources = get_possible_sources(destination, backup_type)
    backup_key = find_a_backup_to_restore(possible_sources, destination,
                                          backup_type, date)

    # Figure out what what we use to as the master when we setup replication
    (restore_source, _) = backup.get_metadata_from_backup_file(backup_key.name)
    try:
        replica_set = restore_source.get_zk_replica_set()
        master = zk.get_mysql_instance_from_replica_set(replica_set,
                                                        host_utils.REPLICA_ROLE_MASTER)
    except:
        # ZK has no idea what this replica set is, probably a new replica set.
        master = restore_source

    # Start logging
    row_id = backup.start_restore_log(master, {
                'restore_source': restore_source,
                'restore_port': destination.port,
                'restore_file': backup_key.name,
                'source_instance': destination.hostname,
                'restore_date': date,
                'replication': no_repl,
                'zookeeper': add_to_zk})

    # Giant try to allow logging if anything goes wrong.
    try:
        # If we hit an exception, this status will be used. If not, it will
        # be overwritten
        restore_log_update = {'restore_status': 'BAD'}

        # This also ensures that all needed directories exist
        log.info('Rebuilding local mysql instance')
        lock_handle = mysql_init_server.mysql_init_server(
                        destination,
                        skip_production_check=True,
                        skip_backup=True,
                        lock_handle=lock_handle)

        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            xbstream_restore(backup_key, destination.port)
            if master == restore_source:
                log.info('Pulling replication info for restore from '
                         'backup source')
                (binlog_file,
                 binlog_pos,
                 gtid_purged) = backup.parse_xtrabackup_binlog_info(
                                destination.port)
            else:
                log.info('Pulling replication info for restore from '
                         'master of backup source')
                # if our backup came from a GTID server, we won't have
                # a binlog_file and a binlog_pos, so we need to see if
                # we can get a set of purged GTIDs
                (binlog_file,
                 binlog_pos,
                 gtid_purged) = backup.parse_xtrabackup_slave_info(
                                destination.port)

        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            log.info('Preparing replication')
            # We are importing a mysqldump which was created with
            # --master-data or --dump-slave so there will be a CHANGE MASTER
            # statement at the start of the dump. MySQL will basically just
            # ignore a CHANGE MASTER command if master_host is not already
            # setup. So we are setting master_host, username and password
            # here. We use BOGUS for master_log_file so that the IO thread is
            # intentionally broken.  With no argument for master_log_file,
            # the IO thread would start downloading the first bin log and
            # the SQL thread would start executing...
            mysql_lib.change_master(destination, master, 'BOGUS', 0,
                                    no_start=True)
            # reset master on slave before we load anything to ensure that
            # we can set GTID info from the backup, if it exists.
            mysql_lib.reset_master(destination)
            logical_restore(backup_key, destination)
            host_utils.stop_mysql(destination.port)

        log.info('Running MySQL upgrade')
        host_utils.upgrade_auth_tables(destination.port)

        log.info('Starting MySQL')
        host_utils.start_mysql(
            destination.port,
            options=host_utils.DEFAULTS_FILE_EXTRA_ARG.format(
                defaults_file=host_utils.MYSQL_NOREPL_CNF_FILE))

        # Since we haven't started the slave yet, make sure we've got these
        # plugins installed, whether we use them or not.
        mysql_lib.setup_semisync_plugins(destination)
        mysql_lib.setup_audit_plugin(destination)
        mysql_lib.setup_response_time_metrics(destination)

        restore_log_update = {'restore_status': 'OK'}

        # Try to configure replication.
        log.info('Setting up MySQL replication')
        restore_log_update['replication'] = 'FAIL'
        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            # before we change master, reset master on the
            # slave to clear out any GTID errant transactions.
            mysql_lib.reset_master(destination)
            mysql_lib.change_master(destination,
                                    master,
                                    binlog_file,
                                    binlog_pos,
                                    gtid_purged=gtid_purged,
                                    no_start=(no_repl == 'SKIP'))
        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            if no_repl == 'SKIP':
                log.info('As requested, not starting replication.')
            else:
                mysql_lib.restart_replication(destination)
        if no_repl == 'REQ':
            mysql_lib.wait_for_catch_up(destination)
        restore_log_update['replication'] = 'OK'

        host_utils.manage_pt_daemons(destination.port)

    except Exception as e:
        log.error(e)
        if row_id is not None:
            restore_log_update['status_message'] = e
            restore_log_update['finished_at'] = True
        raise
    finally:
        # As with mysql_init_server, we have to do one more restart to
        # clear out lock ownership, but here we have to also do it with
        # the proper config file.
        if lock_handle:
            log.info('Releasing lock and restarting MySQL')
            host_utils.stop_mysql(destination.port)
            time.sleep(5)
            host_utils.release_lock_socket(lock_handle)
            if no_repl == 'SKIP':
                host_utils.start_mysql(
                    destination.port,
                    options=host_utils.DEFAULTS_FILE_EXTRA_ARG.format(
                        defaults_file=host_utils.MYSQL_NOREPL_CNF_FILE))
            else:
                host_utils.start_mysql(destination.port)

        backup.update_restore_log(master, row_id, restore_log_update)

    try:
        if add_to_zk == 'REQ':
            if no_repl == 'REQ':
                log.info('Waiting for replication again, as it may have '
                         'drifted due to restart.')
                mysql_lib.wait_for_catch_up(destination)
                log.info('Waiting for IO lag in case it is still too '
                         'far even wait for resync ')
                mysql_lib.wait_for_catch_up(destination, io=True)
            log.info('Adding instance to zk.')
            modify_mysql_zk.auto_add_instance_to_zk(destination.port,
                                                    dry_run=False)
            backup.update_restore_log(master, row_id, {'zookeeper': 'OK'})
        else:
            log.info('add_to_zk is not set, therefore not adding to zk')
    except Exception as e:
        log.warning("An exception occurred: {}".format(e))
        log.warning("If this is a DB issue, that's fine. "
                    "Otherwise, you should check ZK.")
    backup.update_restore_log(master, row_id, {'finished_at': True})

    if no_repl == 'REQ':
        log.info('Starting a new backup')
        mysql_backup.mysql_backup(destination, initial_build=True)
예제 #5
0
def confirm_max_replica_lag(replicas,
                            max_lag,
                            dead_master,
                            replicas_synced=False,
                            timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    repl_checks = dict()
    start = time.time()
    while True:
        acceptable = True
        for replica in replicas:
            repl_check = mysql_lib.calc_slave_lag(replica,
                                                  dead_master=dead_master)
            repl_checks[replica.__str__()] = ':'.join(
                (repl_check['ss']['Relay_Master_Log_File'],
                 str(repl_check['ss']['Exec_Master_Log_Pos'])))
            # Basic sanity
            if repl_check['sbm'] is None:
                raise Exception(
                    'Computed replication is unavailible for {replica}, '
                    'perhaps restart pt-heartbeat '
                    'on the master?'.format(replica=replica))

            if repl_check['ss']['Slave_SQL_Running'] != 'Yes':
                log.info('SQL thread is not running, trying to restart, then '
                         'sleep 20 seconds')
                conn = mysql_lib.connect_mysql(replica)
                mysql_lib.restart_replication(conn)
                time.sleep(20)
                repl_check = mysql_lib.calc_slave_lag(replica,
                                                      dead_master=dead_master)
                if repl_check['ss']['Slave_SQL_Running'] != 'Yes':
                    raise Exception('SQL thread on {replica} has serious '
                                    'problems'.format(replica=replica))

            if max_lag == 0:
                if repl_check['sql_bytes'] != 0:
                    acceptable = False
                    log.warn('Unprocessed log on {replica} is {sql_bytes} '
                             'bytes  > 0'
                             ''.format(replica=replica,
                                       sql_bytes=repl_check['sql_bytes']))
                else:
                    log.info('{replica} is in sync with the '
                             'master'.format(replica=replica))
            else:
                if repl_check['sbm'] > max_lag:
                    acceptable = False
                    log.warn('Lag on {replica} is {lag} seconds is greater '
                             'than limit of '
                             '{limit}'.format(replica=replica,
                                              limit=max_lag,
                                              lag=repl_check['sbm']))
                else:
                    log.info('Lag on {replica} is {lag} is <= limit of '
                             '{limit}'.format(replica=replica,
                                              limit=max_lag,
                                              lag=repl_check['sbm']))

        if replicas_synced and len(set(repl_checks.values())) != 1:
            acceptable = False
            raise Exception(
                'Replica servers are not in sync and replicas_synced '
                'is set. Replication status: '
                '{repl_checks}'.format(repl_checks=repl_checks))
        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state')
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
예제 #6
0
def restore_instance(backup_type, restore_source, destination,
                     no_repl, date,
                     add_to_zk, skip_production_check):
    """ Restore a MySQL backup on to localhost

    Args:
    backup_type - Type of backup to restore
    restore_source - A hostaddr object for where to pull a backup from
    destination -  A hostaddr object for where to restore the backup
    no_repl - Should  replication be not started. It will always be setup.
    date - What date should the backup be from
    add_to_zk - Should the instnace be added to zk. If so, the log from the
                host being launched will be consulted.
    skip_production_check - Do not check if the host is already in zk for
                            production use.
    """
    log.info('Supplied source is {source}'.format(source=restore_source))
    log.info('Supplied destination is {dest}'.format(dest=destination))
    log.info('Desired date of restore {date}'.format(date=date))
    zk = host_utils.MysqlZookeeper()

    # Try to prevent unintentional destruction of prod servers
    log.info('Confirming no prod instances running on destination')
    prod_check(destination, skip_production_check)

    # Take a lock to prevent multiple restores from running concurrently
    log.info('Taking a flock to block another restore from starting')
    lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)

    log.info('Looking for a backup to restore')
    if restore_source:
        possible_sources = [restore_source]
    else:
        possible_sources = get_possible_sources(destination, backup_type)
    backup_key = find_a_backup_to_restore(possible_sources, destination,
                                          backup_type, date)

    # Figure out what what we use to as the master when we setup replication
    (restore_source, _) = backup.get_metadata_from_backup_file(backup_key.name)
    if restore_source.get_zk_replica_set():
        replica_set = restore_source.get_zk_replica_set()[0]
        master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
    else:
        # ZK has no idea what this replica set is, probably a new replica set.
        master = restore_source

    # Start logging
    row_id = backup.start_restore_log(master, {'restore_source': restore_source,
                                               'restore_port': destination.port,
                                               'restore_file': backup_key.name,
                                               'source_instance': destination.hostname,
                                               'restore_date': date,
                                               'replication': no_repl,
                                               'zookeeper': add_to_zk})
    # Giant try to allow logging if anything goes wrong.
    try:
        # If we hit an exception, this status will be used. If not, it will
        # be overwritten
        restore_log_update = {'restore_status': 'BAD'}

        # This also ensures that all needed directories exist
        log.info('Rebuilding local mysql instance')
        mysql_init_server.mysql_init_server(destination, skip_production_check=True,
                                            skip_backup=True, skip_locking=True)

        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            xbstream_restore(backup_key, destination.port)
            if master == restore_source:
                log.info('Pulling replication info from restore to backup source')
                (binlog_file, binlog_pos) = backup.parse_xtrabackup_binlog_info(destination.port)
            else:
                log.info('Pulling replication info from restore to '
                         'master of backup source')
                (binlog_file, binlog_pos) = backup.parse_xtrabackup_slave_info(destination.port)
        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            logical_restore(backup_key, destination)
            host_utils.stop_mysql(destination.port)

        log.info('Running MySQL upgrade')
        host_utils.upgrade_auth_tables(destination.port)

        log.info('Starting MySQL')
        host_utils.start_mysql(destination.port,
                               options=host_utils.DEFAULTS_FILE_EXTRA_ARG.format(defaults_file=host_utils.MYSQL_NOREPL_CNF_FILE))

        # Since we haven't started the slave yet, make sure we've got these
        # plugins installed, whether we use them or not.
        mysql_lib.setup_semisync_plugins(destination)
        restore_log_update = {'restore_status': 'OK'}

        # Try to configure replication.
        log.info('Setting up MySQL replication')
        restore_log_update['replication'] = 'FAIL'
        if backup_type == backup.BACKUP_TYPE_XBSTREAM:
            mysql_lib.change_master(destination,
                                    master,
                                    binlog_file,
                                    binlog_pos,
                                    no_start=(no_repl == 'SKIP'))
        elif backup_type == backup.BACKUP_TYPE_LOGICAL:
            if no_repl == 'SKIP':
                log.info('As requested, not starting replication.')
            else:
                mysql_lib.restart_replication(destination)
        if no_repl == 'REQ':
            mysql_lib.wait_replication_catch_up(destination)
        restore_log_update['replication'] = 'OK'

        host_utils.restart_pt_daemons(destination.port)
        mysql_lib.setup_response_time_metrics(destination)

    except Exception as e:
        log.error(e)
        if row_id is not None:
            restore_log_update['status_message'] = e
            restore_log_update['finished_at'] = True
        raise
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)
        backup.update_restore_log(master, row_id, restore_log_update)

    try:
        if add_to_zk == 'REQ':
            log.info('Adding instance to zk')
            modify_mysql_zk.auto_add_instance_to_zk(destination.port,
                                                    dry_run=False)
            backup.update_restore_log(master, row_id, {'zookeeper': 'OK'})
        else:
            log.info('add_to_zk is not set, therefore not adding to zk')
    except Exception as e:
        log.warning("An exception occurred: {e}".format(e=e))
        log.warning("If this is a DB issue, that's fine. "
                    "Otherwise, you should check ZK.")
    backup.update_restore_log(master, row_id, {'finished_at': True})

    if no_repl == 'REQ':
        log.info('Starting a new backup')
        mysql_backup.mysql_backup(destination, initial_build=True)
예제 #7
0
def confirm_max_replica_lag(replicas, max_lag, dead_master,
                            replicas_synced=False, timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    repl_checks = dict()
    start = time.time()
    while True:
        acceptable = True
        for replica in replicas:
            repl_check = mysql_lib.calc_slave_lag(replica, dead_master=dead_master)
            repl_checks[replica.__str__()] = ':'.join((repl_check['ss']['Relay_Master_Log_File'],
                                                       str(repl_check['ss']['Exec_Master_Log_Pos'])))
            # Basic sanity
            if repl_check['sbm'] is None:
                raise Exception('Computed replication is unavailible for {replica}, '
                                'perhaps restart pt-heartbeat '
                                'on the master?'.format(replica=replica))

            if repl_check['ss']['Slave_SQL_Running'] != 'Yes':
                log.info('SQL thread is not running, trying to restart, then '
                         'sleep 20 seconds')
                conn = mysql_lib.connect_mysql(replica)
                mysql_lib.restart_replication(conn)
                time.sleep(20)
                repl_check = mysql_lib.calc_slave_lag(replica, dead_master=dead_master)
                if repl_check['ss']['Slave_SQL_Running'] != 'Yes':
                    raise Exception('SQL thread on {replica} has serious '
                                    'problems'.format(replica=replica))

            if max_lag == 0:
                if repl_check['sql_bytes'] != 0:
                    acceptable = False
                    log.warn('Unprocessed log on {replica} is {sql_bytes} '
                             'bytes  > 0'
                             ''.format(replica=replica,
                                       sql_bytes=repl_check['sql_bytes']))
                else:
                    log.info('{replica} is in sync with the '
                             'master'.format(replica=replica))
            else:
                if repl_check['sbm'] > max_lag:
                    acceptable = False
                    log.warn('Lag on {replica} is {lag} seconds is greater '
                             'than limit of '
                             '{limit}'.format(replica=replica,
                                              limit=max_lag,
                                              lag=repl_check['sbm']))
                else:
                    log.info('Lag on {replica} is {lag} is <= limit of '
                             '{limit}'.format(replica=replica,
                                              limit=max_lag,
                                              lag=repl_check['sbm']))

        if replicas_synced and len(set(repl_checks.values())) != 1:
            acceptable = False
            raise Exception('Replica servers are not in sync and replicas_synced '
                            'is set. Replication status: '
                            '{repl_checks}'.format(repl_checks=repl_checks))
        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state')
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)