def confirm_max_replica_lag(replicas,
                            lag_tolerance,
                            dead_master,
                            replicas_synced=False,
                            timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set(
            [mysql_lib.CHECK_SQL_THREAD, mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica,
                                                    replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance,
                                                      dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
def confirm_max_replica_lag(replicas, lag_tolerance, dead_master,
                            replicas_synced=False, timeout=0):
    """ Test replication lag

    Args:
    replicas - A set of hostaddr object to be tested for replication lag
    max_lag - Max computed replication lag in seconds. If 0 is supplied,
              then exec position is compared from replica servers to the
              master rather than using a computed second behind as the
              heartbeat will be blocked by read_only.
    replicas_synced - Replica servers must have executed to the same
                      position in the binary log.
    timeout - How long to wait for replication to be in the desired state
    """
    start = time.time()
    if dead_master:
        replication_checks = set([mysql_lib.CHECK_SQL_THREAD,
                                  mysql_lib.CHECK_CORRECT_MASTER])
    else:
        replication_checks = mysql_lib.ALL_REPLICATION_CHECKS

    while True:
        acceptable = True
        for replica in replicas:
            # Confirm threads are running, expected master
            try:
                mysql_lib.assert_replication_sanity(replica, replication_checks)
            except Exception as e:
                log.warning(e)
                log.info('Trying to restart replication, then '
                         'sleep 20 seconds')
                mysql_lib.restart_replication(replica)
                time.sleep(20)
                mysql_lib.assert_replication_sanity(replica, replication_checks)

            try:
                mysql_lib.assert_replication_unlagged(replica, lag_tolerance, dead_master)
            except Exception as e:
                log.warning(e)
                acceptable = False

        if replicas_synced and not confirm_replicas_in_sync(replicas):
            acceptable = False
            log.warning('Replica servers are not in sync and replicas_synced '
                        'is set')

        if acceptable:
            return
        elif (time.time() - start) > timeout:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {r}'.format(r=replica))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
Beispiel #3
0
def wait_for_repl_sync(instance):
    """ Wait for replication to become synced

    args:
    instance - A hostaddr instance
    """
    start = time.time()
    while True:
        acceptable = True
        try:
            mysql_lib.assert_replication_unlagged(
                instance, mysql_lib.REPLICATION_TOLERANCE_NONE)
        except Exception as e:
            log.warning(e)
            acceptable = False

        if acceptable:
            return
        elif (time.time() - start) > REPL_SYNC_MAX_SECONDS:
            raise Exception('Replication is not in an acceptable state on '
                            'replica {}'.format(instance))
        else:
            log.info('Sleeping for 5 second to allow replication to catch up')
            time.sleep(5)
Beispiel #4
0
def check_replication_for_migration(source_replica_set,
                                    destination_replica_set):
    """ Confirm that replication is sane for finishing a shard migration

    Args:
    source_replica_set - Where shards are coming from
    destination_replica_set - Where shards are being sent
    """
    zk = host_utils.MysqlZookeeper()
    source_master = zk.get_mysql_instance_from_replica_set(source_replica_set)
    destination_master = zk.get_mysql_instance_from_replica_set(
        destination_replica_set)
    source_slave = zk.get_mysql_instance_from_replica_set(
        source_replica_set, host_utils.REPLICA_ROLE_SLAVE)
    destination_slave = zk.get_mysql_instance_from_replica_set(
        destination_replica_set, host_utils.REPLICA_ROLE_SLAVE)

    # First we will confirm that the slave of the source is caught up
    # this is important for row count comparisons
    mysql_lib.assert_replication_unlagged(
        source_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # Next, the slave of the destination replica set for the same reason
    mysql_lib.assert_replication_unlagged(
        destination_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # Next, the destination master is relatively caught up to the source master
    mysql_lib.assert_replication_unlagged(
        destination_master, mysql_lib.REPLICATION_TOLERANCE_NORMAL)

    # We will also verify that the source master is not replicating. A scary
    # scenario is if the there is some sort of ring replication going and db
    # drops of blackhole db's would propegate to the source db.
    try:
        source_slave_status = mysql_lib.get_slave_status(source_master)
    except mysql_lib.ReplicationError:
        source_slave_status = None

    if source_slave_status:
        raise Exception('Source master is setup for replication '
                        'this is super dangerous!')

    # We will also verify that the destination master is replicating from the
    # source master
    slave_status = mysql_lib.get_slave_status(destination_master)
    master_of_destination_master = host_utils.HostAddr(':'.join(
        (slave_status['Master_Host'], str(slave_status['Master_Port']))))
    if source_master != master_of_destination_master:
        raise Exception('Master of destination {d} is {actual} rather than '
                        'expected {expected} '
                        ''.format(d=destination_master,
                                  actual=master_of_destination_master,
                                  expected=destination_master))
    log.info('Replication looks ok for migration')
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [
                host_utils.REPLICA_ROLE_DR_SLAVE, host_utils.REPLICA_ROLE_SLAVE
        ]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(
            instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(
                host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance, master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data,
             version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK,
                                 simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [host_utils.REPLICA_ROLE_DR_SLAVE,
                                host_utils.REPLICA_ROLE_SLAVE]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance,
                                      master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data, version) = get_zk_node_for_replica_set(kazoo_client,
                                                                          replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise