コード例 #1
0
def swap_slave_and_dr_slave(instance, dry_run):
    """ Swap a slave and a dr_slave in zk

    Args:
    instance - An instance that is either a slave or dr_slave
    """
    zk_local = host_utils.MysqlZookeeper()
    kazoo_client = environment_specific.get_kazoo_client()
    if not kazoo_client:
        raise Exception('Could not get a zk connection')

    log.info('Instance is {}'.format(instance))
    replica_set = zk_local.get_replica_set_from_instance(instance)

    log.info('Detected replica_set as {}'.format(replica_set))
    (zk_node,
     parsed_data,
     version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
    log.info('Replica set {replica_set} is held in zk_node '
             '{zk_node}'.format(zk_node=zk_node,
                                replica_set=replica_set))

    log.info('Existing config:')
    log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
    new_data = copy.deepcopy(parsed_data)

    dr_znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
    dr_parsed_data = simplejson.loads(dr_znode_data)
    new_dr_data = copy.deepcopy(dr_parsed_data)
    if replica_set not in parsed_data:
        raise Exception('Replica set {replica_set} is not present '
                        'in dr_node'.format(replica_set=replica_set))
    log.info('Existing dr config:')
    log.info(pprint.pformat(remove_auth(dr_parsed_data[replica_set])))

    new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \
        dr_parsed_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE]
    new_dr_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]

    log.info('New config:')
    log.info(pprint.pformat(remove_auth(new_data[replica_set])))

    log.info('New dr config:')
    log.info(pprint.pformat(remove_auth(new_dr_data[replica_set])))

    if dry_run:
        log.info('dry_run is set, therefore not modifying zk')
    else:
        log.info('Pushing new configuration for '
                 '{replica_set}:'.format(replica_set=replica_set))
        kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        try:
            kazoo_client.set(environment_specific.DR_ZK,
                             simplejson.dumps(new_dr_data), dr_meta.version)
        except:
            raise Exception('DR node is incorrect due to a different change '
                            'blocking this change.  Manual intervention '
                            'is required.')
コード例 #2
0
def swap_slave_and_dr_slave(instance, dry_run):
    """ Swap a slave and a dr_slave in zk

    Args:
    instance - An instance that is either a slave or dr_slave
    """
    zk_local = host_utils.MysqlZookeeper()
    kazoo_client = environment_specific.get_kazoo_client()
    if not kazoo_client:
        raise Exception('Could not get a zk connection')

    log.info('Instance is {inst}'.format(inst=instance))
    (replica_set, _) = zk_local.get_replica_set_from_instance(instance)
    log.info('Detected replica_set as '
             '{replica_set}'.format(replica_set=replica_set))
    (zk_node,
     parsed_data,
     version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
    log.info('Replica set {replica_set} is held in zk_node '
             '{zk_node}'.format(zk_node=zk_node,
                                replica_set=replica_set))

    log.info('Existing config:')
    log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
    new_data = copy.deepcopy(parsed_data)

    dr_znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
    dr_parsed_data = simplejson.loads(dr_znode_data)
    new_dr_data = copy.deepcopy(dr_parsed_data)
    if replica_set not in parsed_data:
        raise Exception('Replica set {replica_set} is not present '
                        'in dr_node'.format(replica_set=replica_set))
    log.info('Existing dr config:')
    log.info(pprint.pformat(remove_auth(dr_parsed_data[replica_set])))

    new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \
        dr_parsed_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE]
    new_dr_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]

    log.info('New config:')
    log.info(pprint.pformat(remove_auth(new_data[replica_set])))

    log.info('New dr config:')
    log.info(pprint.pformat(remove_auth(new_dr_data[replica_set])))

    if dry_run:
        log.info('dry_run is set, therefore not modifying zk')
    else:
        log.info('Pushing new configuration for '
                 '{replica_set}:'.format(replica_set=replica_set))
        kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        try:
            kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_dr_data), dr_meta.version)
        except:
            raise Exception('DR node is incorrect due to a different change '
                            'blocking this change. You need to fix it yourself')
コード例 #3
0
def swap_master_and_slave(instance, dry_run):
    """ Swap a master and slave in zk. Warning: this does not sanity checks
        and does nothing more than update zk. YOU HAVE BEEN WARNED!

    Args:
    instance - An instance in the replica set. This function will figure
               everything else out.
    dry_run - If set, do not modify configuration.
    """
    zk_local = host_utils.MysqlZookeeper()
    kazoo_client = environment_specific.get_kazoo_client()
    if not kazoo_client:
        raise Exception('Could not get a zk connection')

    log.info('Instance is {}'.format(instance))

    replica_set = zk_local.get_replica_set_from_instance(instance)
    log.info('Detected replica_set as {}'.format(replica_set))

    (zk_node,
     parsed_data,
     version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
    log.info('Replica set {replica_set} is held in zk_node '
             '{zk_node}'.format(zk_node=zk_node,
                                replica_set=replica_set))

    log.info('Existing config:')
    log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
    new_data = copy.deepcopy(parsed_data)
    new_data[replica_set][host_utils.REPLICA_ROLE_MASTER] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]
    new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_MASTER]

    log.info('New config:')
    log.info(pprint.pformat(remove_auth(new_data[replica_set])))

    if new_data == parsed_data:
        raise Exception('No change would be made to zk, '
                        'will not write new config')
    elif dry_run:
        log.info('dry_run is set, therefore not modifying zk')
    else:
        log.info('Pushing new configuration for '
                 '{replica_set}:'.format(replica_set=replica_set))
        kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
コード例 #4
0
def swap_master_and_slave(instance, dry_run):
    """ Swap a master and slave in zk. Warning: this does not sanity checks
        and does nothing more than update zk. YOU HAVE BEEN WARNED!

    Args:
    instance - An instance in the replica set. This function will figure
               everything else out.
    dry_run - If set, do not modify configuration.
    """
    zk_local = host_utils.MysqlZookeeper()
    kazoo_client = environment_specific.get_kazoo_client()
    if not kazoo_client:
        raise Exception('Could not get a zk connection')

    log.info('Instance is {inst}'.format(inst=instance))
    (replica_set, version) = zk_local.get_replica_set_from_instance(instance)
    log.info('Detected replica_set as '
             '{replica_set}'.format(replica_set=replica_set))
    (zk_node,
     parsed_data,
     version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
    log.info('Replica set {replica_set} is held in zk_node '
             '{zk_node}'.format(zk_node=zk_node,
                                replica_set=replica_set))

    log.info('Existing config:')
    log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
    new_data = copy.deepcopy(parsed_data)
    new_data[replica_set][host_utils.REPLICA_ROLE_MASTER] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]
    new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \
        parsed_data[replica_set][host_utils.REPLICA_ROLE_MASTER]

    log.info('New config:')
    log.info(pprint.pformat(remove_auth(new_data[replica_set])))

    if new_data == parsed_data:
        raise Exception('No change would be made to zk, '
                        'will not write new config')
    elif dry_run:
        log.info('dry_run is set, therefore not modifying zk')
    else:
        log.info('Pushing new configuration for '
                 '{replica_set}:'.format(replica_set=replica_set))
        kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
コード例 #5
0
ファイル: mysql_failover.py プロジェクト: mtanda/mysql_utils
def mysql_failover(master, dry_run, skip_lock,
                   ignore_dr_slave, trust_me_its_dead, kill_old_master):
    """ Promte a new MySQL master

    Args:
    master - Hostaddr object of the master instance to be demoted
    dry_run - Do not change state, just do sanity testing and exit
    skip_lock - Do not take a promotion lock
    ignore_dr_slave - Ignore the existance of a dr_slave
    trust_me_its_dead - Do not test to see if the master is dead
    kill_old_master - Send a mysqladmin kill command to the old master

    Returns:
    new_master - The new master server
    """
    log.info('Master to demote is {master}'.format(master=master))

    zk = host_utils.MysqlZookeeper()
    (replica_set, _) = zk.get_replica_set_from_instance(master, rtypes=['master'])
    log.info('Replica set is detected as '
             '{replica_set}'.format(replica_set=replica_set))

    # take a lock here to make sure nothing changes underneath us
    if not skip_lock and not dry_run:
        log.info('Taking promotion lock on replica set')
        lock_identifier = get_promotion_lock(replica_set)
    else:
        lock_identifier = None

    # giant try. If there any problems we roll back from the except
    try:
        master_conn = False
        slave = zk.get_mysql_instance_from_replica_set(replica_set=replica_set,
                                                       repl_type=host_utils.REPLICA_ROLE_SLAVE)
        log.info('Slave/new master is detected as {slave}'.format(slave=slave))

        if ignore_dr_slave:
            log.info('Intentionally ignoring a dr_slave')
            dr_slave = None
        else:
            dr_slave = zk.get_mysql_instance_from_replica_set(replica_set,
                                                              host_utils.REPLICA_ROLE_DR_SLAVE)
        log.info('DR slave is detected as {dr_slave}'.format(dr_slave=dr_slave))
        if dr_slave:
            if dr_slave == slave:
                raise Exception('Slave and dr_slave appear to be the same')

            replicas = set([slave, dr_slave])
        else:
            replicas = set([slave])

        # let's make sure that what we think is the master, actually is
        confirm_replica_topology(master, replicas)

        # We use master_conn as a mysql connection to the master server, if
        # it is False, the master is dead
        if trust_me_its_dead:
            master_conn = None
        else:
            master_conn = is_master_alive(master, replicas)
        slave_conn = mysql_lib.connect_mysql(slave)

        # Test to see if the slave is setup for replication. If not, we are hosed
        log.info('Testing to see if Slave/new master is setup to write '
                 'replication logs')
        try:
            mysql_lib.get_master_status(slave_conn)
        except mysql_lib.ReplicationError:
            log.error('New master {slave} is not setup to write replicaiton '
                      'logs!'.format(slave=slave))
            raise
        log.info('Slave/new master is setup to write replication logs')

        if kill_old_master:
            log.info('Killing old master, we hope you know what you are doing')
            mysql_lib.shutdown_mysql(master)
            master_conn = None

        if master_conn:
            log.info('Master is considered alive')
            dead_master = False
            confirm_max_replica_lag(replicas, MAX_ALIVE_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)
        else:
            log.info('Master is considered dead')
            dead_master = True
            confirm_max_replica_lag(replicas, MAX_DEAD_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)

        if dry_run:
            log.info('In dry_run mode, so exiting now')
            # Using os._exit in order to not get catch in the giant try
            os._exit(0)

        log.info('Preliminary sanity checks complete, starting promotion')

        if master_conn:
            log.info('Setting read_only on master')
            mysql_lib.set_global_variable(master_conn, 'read_only', True)
            log.info('Confirming no writes to old master')
            # If there are writes with the master in read_only mode then the
            # promotion can not proceed.
            # A likely reason is a client has the SUPER privilege.
            confirm_no_writes(master_conn)
            log.info('Waiting for replicas to be caught up')
            confirm_max_replica_lag(replicas, 0,
                                    timeout=MAX_ALIVE_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)
            log.info('Setting up replication from old master ({master})'
                     'to new master ({slave})'.format(master=master,
                                                      slave=slave))
            mysql_lib.setup_replication(new_master=slave, new_replica=master)
        else:
            log.info('Starting up a zk connection to make sure we can connect')
            kazoo_client = environment_specific.get_kazoo_client()
            if not kazoo_client:
                raise Exception('Could not conect to zk')

            log.info('Confirming replica has processed all replication '
                     ' logs')
            confirm_no_writes(slave_conn)
            log.info('Looks like no writes being processed by replica via '
                     'replication or other means')
            if len(replicas) > 1:
                log.info('Confirming relpica servers in sync')
                confirm_max_replica_lag(replicas, MAX_DEAD_MASTER_SLAVE_LAG_SECONDS,
                                        replicas_synced=True,
                                        dead_master=dead_master)
    except:
        log.info('Starting rollback')
        if master_conn:
            log.info('Releasing read_only on old master')
            mysql_lib.set_global_variable(master_conn, 'read_only', False)

            log.info('Clearing replication settings on old master')
            mysql_lib.reset_slave(master_conn)
        if lock_identifier:
            log.info('Releasing promotion lock')
            release_promotion_lock(lock_identifier)
        log.info('Rollback complete, reraising exception')
        raise

    if dr_slave:
        try:
            mysql_lib.setup_replication(new_master=slave, new_replica=dr_slave)
        except Exception as e:
            log.error(e)
            log.error('Setting up replication on the dr_slave failed. '
                      'Failing forward!')

    log.info('Updating zk')
    zk_write_attempt = 0
    while True:
        try:
            modify_mysql_zk.swap_master_and_slave(slave, dry_run=False)
            break
        except:
            if zk_write_attempt > MAX_ZK_WRITE_ATTEMPTS:
                log.info('Final failure writing to zk, bailing')
                raise
            else:
                log.info('Write to zk failed, trying again')
                zk_write_attempt = zk_write_attempt+1

    log.info('Removing read_only from new master')
    mysql_lib.set_global_variable(slave_conn, 'read_only', False)
    log.info('Removing replication configuration from new master')
    mysql_lib.reset_slave(slave_conn)
    if lock_identifier:
        log.info('Releasing promotion lock')
        release_promotion_lock(lock_identifier)

    log.info('Failover complete')

    if not master_conn:
        log.info('As master is dead, will try to launch a replacement. Will '
                 'sleep 20 seconds first to let things settle')
        time.sleep(20)
        launch_replacement_db_host.launch_replacement_db_host(master)
コード例 #6
0
def mysql_failover(master, dry_run, skip_lock, ignore_dr_slave,
                   trust_me_its_dead, kill_old_master):
    """ Promote a new MySQL master

    Args:
    master - Hostaddr object of the master instance to be demoted
    dry_run - Do not change state, just do sanity testing and exit
    skip_lock - Do not take a promotion lock
    ignore_dr_slave - Ignore the existance of a dr_slave
    trust_me_its_dead - Do not test to see if the master is dead
    kill_old_master - Send a mysqladmin kill command to the old master

    Returns:
    new_master - The new master server
    """
    log.info('Master to demote is {master}'.format(master=master))

    zk = host_utils.MysqlZookeeper()
    (replica_set, _) = zk.get_replica_set_from_instance(master,
                                                        rtypes=['master'])
    log.info('Replica set is detected as '
             '{replica_set}'.format(replica_set=replica_set))

    # take a lock here to make sure nothing changes underneath us
    if not skip_lock and not dry_run:
        log.info('Taking promotion lock on replica set')
        lock_identifier = get_promotion_lock(replica_set)
    else:
        lock_identifier = None

    # giant try. If there any problems we roll back from the except
    try:
        master_conn = False
        slave = zk.get_mysql_instance_from_replica_set(
            replica_set=replica_set, repl_type=host_utils.REPLICA_ROLE_SLAVE)
        log.info('Slave/new master is detected as {slave}'.format(slave=slave))

        if ignore_dr_slave:
            log.info('Intentionally ignoring a dr_slave')
            dr_slave = None
        else:
            dr_slave = zk.get_mysql_instance_from_replica_set(
                replica_set, host_utils.REPLICA_ROLE_DR_SLAVE)
        log.info(
            'DR slave is detected as {dr_slave}'.format(dr_slave=dr_slave))
        if dr_slave:
            if dr_slave == slave:
                raise Exception('Slave and dr_slave appear to be the same')

            replicas = set([slave, dr_slave])
        else:
            replicas = set([slave])

        # We use master_conn as a mysql connection to the master server, if
        # it is False, the master is dead
        if trust_me_its_dead:
            master_conn = None
        else:
            master_conn = is_master_alive(master, replicas)

        # Test to see if the slave is setup for replication. If not, we are hosed
        log.info('Testing to see if Slave/new master is setup to write '
                 'replication logs')
        mysql_lib.get_master_status(slave)

        if kill_old_master and not dry_run:
            log.info('Killing old master, we hope you know what you are doing')
            mysql_lib.shutdown_mysql(master)
            master_conn = None

        if master_conn:
            log.info('Master is considered alive')
            dead_master = False
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_NORMAL,
                                    dead_master)
        else:
            log.info('Master is considered dead')
            dead_master = True
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_LOOSE,
                                    dead_master)

        if dry_run:
            log.info('In dry_run mode, so exiting now')
            # Using os._exit in order to not get catch in the giant try
            os._exit(environment_specific.DRY_RUN_EXIT_CODE)

        log.info('Preliminary sanity checks complete, starting promotion')

        if master_conn:
            log.info('Setting read_only on master')
            mysql_lib.set_global_variable(master, 'read_only', True)
            log.info('Confirming no writes to old master')
            # If there are writes with the master in read_only mode then the
            # promotion can not proceed.
            # A likely reason is a client has the SUPER privilege.
            confirm_no_writes(master)
            log.info('Waiting for replicas to be caught up')
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_NONE,
                                    dead_master, True,
                                    mysql_lib.NORMAL_HEARTBEAT_LAG)
            log.info('Setting up replication from old master ({master}) '
                     'to new master ({slave})'.format(master=master,
                                                      slave=slave))
            mysql_lib.setup_replication(new_master=slave, new_replica=master)
        else:
            log.info('Starting up a zk connection to make sure we can connect')
            kazoo_client = environment_specific.get_kazoo_client()
            if not kazoo_client:
                raise Exception('Could not conect to zk')

            log.info('Confirming replica has processed all replication '
                     ' logs')
            confirm_no_writes(slave)
            log.info('Looks like no writes being processed by replica via '
                     'replication or other means')
            if len(replicas) > 1:
                log.info('Confirming replica servers are synced')
                confirm_max_replica_lag(replicas,
                                        mysql_lib.REPLICATION_TOLERANCE_LOOSE,
                                        dead_master, True)
    except:
        log.info('Starting rollback')
        if master_conn:
            log.info('Releasing read_only on old master')
            mysql_lib.set_global_variable(master, 'read_only', False)

            log.info('Clearing replication settings on old master')
            mysql_lib.reset_slave(master)
        if lock_identifier:
            log.info('Releasing promotion lock')
            release_promotion_lock(lock_identifier)
        log.info('Rollback complete, reraising exception')
        raise

    if dr_slave:
        try:
            mysql_lib.setup_replication(new_master=slave, new_replica=dr_slave)
        except Exception as e:
            log.error(e)
            log.error('Setting up replication on the dr_slave failed. '
                      'Failing forward!')

    log.info('Updating zk')
    zk_write_attempt = 0
    while True:
        try:
            modify_mysql_zk.swap_master_and_slave(slave, dry_run=False)
            break
        except:
            if zk_write_attempt > MAX_ZK_WRITE_ATTEMPTS:
                log.info('Final failure writing to zk, bailing')
                raise
            else:
                log.info('Write to zk failed, trying again')
                zk_write_attempt = zk_write_attempt + 1

    log.info('Removing read_only from new master')
    mysql_lib.set_global_variable(slave, 'read_only', False)
    log.info('Removing replication configuration from new master')
    mysql_lib.reset_slave(slave)
    if lock_identifier:
        log.info('Releasing promotion lock')
        release_promotion_lock(lock_identifier)

    log.info('Failover complete')

    # we don't really care if this fails, but we'll print a message anyway.
    try:
        environment_specific.generic_json_post(
            environment_specific.CHANGE_FEED_URL, {
                'type': 'MySQL Failover',
                'environment': replica_set,
                'description': "Failover from {m} to {s}".format(m=master,
                                                                 s=slave),
                'author': host_utils.get_user(),
                'automation': False,
                'source': "mysql_failover.py on {}".format(host_utils.HOSTNAME)
            })
    except Exception as e:
        log.warning("Failover completed, but change feed "
                    "not updated: {}".format(e))

    if not master_conn:
        log.info('As master is dead, will try to launch a replacement. Will '
                 'sleep 20 seconds first to let things settle')
        time.sleep(20)
        launch_replacement_db_host.launch_replacement_db_host(master)
コード例 #7
0
ファイル: mysql_failover.py プロジェクト: zhuhowe/mysql_utils
def mysql_failover(master, dry_run, skip_lock, ignore_dr_slave,
                   trust_me_its_dead, kill_old_master):
    """ Promte a new MySQL master

    Args:
    master - Hostaddr object of the master instance to be demoted
    dry_run - Do not change state, just do sanity testing and exit
    skip_lock - Do not take a promotion lock
    ignore_dr_slave - Ignore the existance of a dr_slave
    trust_me_its_dead - Do not test to see if the master is dead
    kill_old_master - Send a mysqladmin kill command to the old master

    Returns:
    new_master - The new master server
    """
    log.info('Master to demote is {master}'.format(master=master))

    zk = host_utils.MysqlZookeeper()
    (replica_set, _) = zk.get_replica_set_from_instance(master,
                                                        rtypes=['master'])
    log.info('Replica set is detected as '
             '{replica_set}'.format(replica_set=replica_set))

    # take a lock here to make sure nothing changes underneath us
    if not skip_lock and not dry_run:
        log.info('Taking promotion lock on replica set')
        lock_identifier = get_promotion_lock(replica_set)
    else:
        lock_identifier = None

    # giant try. If there any problems we roll back from the except
    try:
        master_conn = False
        slave = zk.get_mysql_instance_from_replica_set(
            replica_set=replica_set, repl_type=host_utils.REPLICA_ROLE_SLAVE)
        log.info('Slave/new master is detected as {slave}'.format(slave=slave))

        if ignore_dr_slave:
            log.info('Intentionally ignoring a dr_slave')
            dr_slave = None
        else:
            dr_slave = zk.get_mysql_instance_from_replica_set(
                replica_set, host_utils.REPLICA_ROLE_DR_SLAVE)
        log.info(
            'DR slave is detected as {dr_slave}'.format(dr_slave=dr_slave))
        if dr_slave:
            if dr_slave == slave:
                raise Exception('Slave and dr_slave appear to be the same')

            replicas = set([slave, dr_slave])
        else:
            replicas = set([slave])

        # let's make sure that what we think is the master, actually is
        confirm_replica_topology(master, replicas)

        # We use master_conn as a mysql connection to the master server, if
        # it is False, the master is dead
        if trust_me_its_dead:
            master_conn = None
        else:
            master_conn = is_master_alive(master, replicas)
        slave_conn = mysql_lib.connect_mysql(slave)

        # Test to see if the slave is setup for replication. If not, we are hosed
        log.info('Testing to see if Slave/new master is setup to write '
                 'replication logs')
        try:
            mysql_lib.get_master_status(slave_conn)
        except mysql_lib.ReplicationError:
            log.error('New master {slave} is not setup to write replicaiton '
                      'logs!'.format(slave=slave))
            raise
        log.info('Slave/new master is setup to write replication logs')

        if kill_old_master:
            log.info('Killing old master, we hope you know what you are doing')
            mysql_lib.shutdown_mysql(master)
            master_conn = None

        if master_conn:
            log.info('Master is considered alive')
            dead_master = False
            confirm_max_replica_lag(replicas,
                                    MAX_ALIVE_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)
        else:
            log.info('Master is considered dead')
            dead_master = True
            confirm_max_replica_lag(replicas,
                                    MAX_DEAD_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)

        if dry_run:
            log.info('In dry_run mode, so exiting now')
            # Using os._exit in order to not get catch in the giant try
            os._exit(0)

        log.info('Preliminary sanity checks complete, starting promotion')

        if master_conn:
            log.info('Setting read_only on master')
            mysql_lib.set_global_variable(master_conn, 'read_only', True)
            log.info('Confirming no writes to old master')
            # If there are writes with the master in read_only mode then the
            # promotion can not proceed.
            # A likely reason is a client has the SUPER privilege.
            confirm_no_writes(master_conn)
            log.info('Waiting for replicas to be caught up')
            confirm_max_replica_lag(replicas,
                                    0,
                                    timeout=MAX_ALIVE_MASTER_SLAVE_LAG_SECONDS,
                                    dead_master=dead_master)
            log.info('Setting up replication from old master ({master})'
                     'to new master ({slave})'.format(master=master,
                                                      slave=slave))
            mysql_lib.setup_replication(new_master=slave, new_replica=master)
        else:
            log.info('Starting up a zk connection to make sure we can connect')
            kazoo_client = environment_specific.get_kazoo_client()
            if not kazoo_client:
                raise Exception('Could not conect to zk')

            log.info('Confirming replica has processed all replication '
                     ' logs')
            confirm_no_writes(slave_conn)
            log.info('Looks like no writes being processed by replica via '
                     'replication or other means')
            if len(replicas) > 1:
                log.info('Confirming relpica servers in sync')
                confirm_max_replica_lag(replicas,
                                        MAX_DEAD_MASTER_SLAVE_LAG_SECONDS,
                                        replicas_synced=True,
                                        dead_master=dead_master)
    except:
        log.info('Starting rollback')
        if master_conn:
            log.info('Releasing read_only on old master')
            mysql_lib.set_global_variable(master_conn, 'read_only', False)

            log.info('Clearing replication settings on old master')
            mysql_lib.reset_slave(master_conn)
        if lock_identifier:
            log.info('Releasing promotion lock')
            release_promotion_lock(lock_identifier)
        log.info('Rollback complete, reraising exception')
        raise

    if dr_slave:
        try:
            mysql_lib.setup_replication(new_master=slave, new_replica=dr_slave)
        except Exception as e:
            log.error(e)
            log.error('Setting up replication on the dr_slave failed. '
                      'Failing forward!')

    log.info('Updating zk')
    zk_write_attempt = 0
    while True:
        try:
            modify_mysql_zk.swap_master_and_slave(slave, dry_run=False)
            break
        except:
            if zk_write_attempt > MAX_ZK_WRITE_ATTEMPTS:
                log.info('Final failure writing to zk, bailing')
                raise
            else:
                log.info('Write to zk failed, trying again')
                zk_write_attempt = zk_write_attempt + 1

    log.info('Removing read_only from new master')
    mysql_lib.set_global_variable(slave_conn, 'read_only', False)
    log.info('Removing replication configuration from new master')
    mysql_lib.reset_slave(slave_conn)
    if lock_identifier:
        log.info('Releasing promotion lock')
        release_promotion_lock(lock_identifier)

    log.info('Failover complete')

    if not master_conn:
        log.info('As master is dead, will try to launch a replacement. Will '
                 'sleep 20 seconds first to let things settle')
        time.sleep(20)
        launch_replacement_db_host.launch_replacement_db_host(master)
コード例 #8
0
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [
                host_utils.REPLICA_ROLE_DR_SLAVE, host_utils.REPLICA_ROLE_SLAVE
        ]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(
            instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(
                host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance, master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data,
             version) = get_zk_node_for_replica_set(kazoo_client, replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK,
                                 simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise
コード例 #9
0
def add_replica_to_zk(instance, replica_type, dry_run):
    """ Add a replica to zk

    Args:
    instance - A hostaddr object of the replica to add to zk
    replica_type - Either 'slave' or 'dr_slave'.
    dry_run - If set, do not modify zk
    """
    try:
        if replica_type not in [host_utils.REPLICA_ROLE_DR_SLAVE,
                                host_utils.REPLICA_ROLE_SLAVE]:
            raise Exception('Invalid value "{replica_type}" for argument '
                            "replica_type").format(replica_type=replica_type)

        zk_local = host_utils.MysqlZookeeper()
        kazoo_client = environment_specific.get_kazoo_client()
        if not kazoo_client:
            raise Exception('Could not get a zk connection')

        log.info('Instance is {inst}'.format(inst=instance))
        mysql_lib.assert_replication_sanity(instance)
        mysql_lib.assert_replication_unlagged(instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL)
        master = mysql_lib.get_master_from_instance(instance)
        if master not in zk_local.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
            raise Exception('Instance {master} is not a master in zk'
                            ''.format(master=master))

        log.info('Detected master of {instance} '
                 'as {master}'.format(instance=instance,
                                      master=master))

        (replica_set, _) = zk_local.get_replica_set_from_instance(master)
        log.info('Detected replica_set as '
                 '{replica_set}'.format(replica_set=replica_set))

        if replica_type == host_utils.REPLICA_ROLE_SLAVE:
            (zk_node, parsed_data, version) = get_zk_node_for_replica_set(kazoo_client,
                                                                          replica_set)
            log.info('Replica set {replica_set} is held in zk_node '
                     '{zk_node}'.format(zk_node=zk_node,
                                        replica_set=replica_set))
            log.info('Existing config:')
            log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            new_data = copy.deepcopy(parsed_data)
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \
                instance.hostname
            new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \
                instance.port
            log.info('New config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(zk_node, simplejson.dumps(new_data), version)
        elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE:
            znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK)
            parsed_data = simplejson.loads(znode_data)
            new_data = copy.deepcopy(parsed_data)
            if replica_set in parsed_data:
                log.info('Existing dr config:')
                log.info(pprint.pformat(remove_auth(parsed_data[replica_set])))
            else:
                log.info('Replica set did not previously have a dr slave')

            new_data[replica_set] = \
                {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname,
                                                    'port': instance.port}}
            log.info('New dr config:')
            log.info(pprint.pformat(remove_auth(new_data[replica_set])))

            if new_data == parsed_data:
                raise Exception('No change would be made to zk, '
                                'will not write new config')
            elif dry_run:
                log.info('dry_run is set, therefore not modifying zk')
            else:
                log.info('Pushing new dr configuration for '
                         '{replica_set}:'.format(replica_set=replica_set))
                kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_data), dr_meta.version)
        else:
            # we should raise an exception above rather than getting to here
            pass
    except Exception, e:
        log.exception(e)
        raise
コード例 #10
0
def mysql_failover(master, dry_run, skip_lock,
                   ignore_dr_slave, trust_me_its_dead, kill_old_master):
    """ Promote a new MySQL master

    Args:
    master - Hostaddr object of the master instance to be demoted
    dry_run - Do not change state, just do sanity testing and exit
    skip_lock - Do not take a promotion lock
    ignore_dr_slave - Ignore the existance of a dr_slave
    trust_me_its_dead - Do not test to see if the master is dead
    kill_old_master - Send a mysqladmin kill command to the old master

    Returns:
    new_master - The new master server
    """
    log.info('Master to demote is {master}'.format(master=master))

    zk = host_utils.MysqlZookeeper()
    (replica_set, _) = zk.get_replica_set_from_instance(master, rtypes=['master'])
    log.info('Replica set is detected as '
             '{replica_set}'.format(replica_set=replica_set))

    # take a lock here to make sure nothing changes underneath us
    if not skip_lock and not dry_run:
        log.info('Taking promotion lock on replica set')
        lock_identifier = get_promotion_lock(replica_set)
    else:
        lock_identifier = None

    # giant try. If there any problems we roll back from the except
    try:
        master_conn = False
        slave = zk.get_mysql_instance_from_replica_set(replica_set=replica_set,
                                                       repl_type=host_utils.REPLICA_ROLE_SLAVE)
        log.info('Slave/new master is detected as {slave}'.format(slave=slave))

        if ignore_dr_slave:
            log.info('Intentionally ignoring a dr_slave')
            dr_slave = None
        else:
            dr_slave = zk.get_mysql_instance_from_replica_set(replica_set,
                                                              host_utils.REPLICA_ROLE_DR_SLAVE)
        log.info('DR slave is detected as {dr_slave}'.format(dr_slave=dr_slave))
        if dr_slave:
            if dr_slave == slave:
                raise Exception('Slave and dr_slave appear to be the same')

            replicas = set([slave, dr_slave])
        else:
            replicas = set([slave])

        # We use master_conn as a mysql connection to the master server, if
        # it is False, the master is dead
        if trust_me_its_dead:
            master_conn = None
        else:
            master_conn = is_master_alive(master, replicas)

        # Test to see if the slave is setup for replication. If not, we are hosed
        log.info('Testing to see if Slave/new master is setup to write '
                 'replication logs')
        mysql_lib.get_master_status(slave)

        if kill_old_master and not dry_run:
            log.info('Killing old master, we hope you know what you are doing')
            mysql_lib.shutdown_mysql(master)
            master_conn = None

        if master_conn:
            log.info('Master is considered alive')
            dead_master = False
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_NORMAL,
                                    dead_master)
        else:
            log.info('Master is considered dead')
            dead_master = True
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_LOOSE,
                                    dead_master)

        if dry_run:
            log.info('In dry_run mode, so exiting now')
            # Using os._exit in order to not get catch in the giant try
            os._exit(environment_specific.DRY_RUN_EXIT_CODE)

        log.info('Preliminary sanity checks complete, starting promotion')

        if master_conn:
            log.info('Setting read_only on master')
            mysql_lib.set_global_variable(master, 'read_only', True)
            log.info('Confirming no writes to old master')
            # If there are writes with the master in read_only mode then the
            # promotion can not proceed.
            # A likely reason is a client has the SUPER privilege.
            confirm_no_writes(master)
            log.info('Waiting for replicas to be caught up')
            confirm_max_replica_lag(replicas,
                                    mysql_lib.REPLICATION_TOLERANCE_NONE,
                                    dead_master,
                                    True,
                                    mysql_lib.NORMAL_HEARTBEAT_LAG)
            log.info('Setting up replication from old master ({master}) '
                     'to new master ({slave})'.format(master=master,
                                                      slave=slave))
            mysql_lib.setup_replication(new_master=slave, new_replica=master)
        else:
            log.info('Starting up a zk connection to make sure we can connect')
            kazoo_client = environment_specific.get_kazoo_client()
            if not kazoo_client:
                raise Exception('Could not conect to zk')

            log.info('Confirming replica has processed all replication '
                     ' logs')
            confirm_no_writes(slave)
            log.info('Looks like no writes being processed by replica via '
                     'replication or other means')
            if len(replicas) > 1:
                log.info('Confirming replica servers are synced')
                confirm_max_replica_lag(replicas,
                                        mysql_lib.REPLICATION_TOLERANCE_LOOSE,
                                        dead_master,
                                        True)
    except:
        log.info('Starting rollback')
        if master_conn:
            log.info('Releasing read_only on old master')
            mysql_lib.set_global_variable(master, 'read_only', False)

            log.info('Clearing replication settings on old master')
            mysql_lib.reset_slave(master)
        if lock_identifier:
            log.info('Releasing promotion lock')
            release_promotion_lock(lock_identifier)
        log.info('Rollback complete, reraising exception')
        raise

    if dr_slave:
        try:
            mysql_lib.setup_replication(new_master=slave, new_replica=dr_slave)
        except Exception as e:
            log.error(e)
            log.error('Setting up replication on the dr_slave failed. '
                      'Failing forward!')

    log.info('Updating zk')
    zk_write_attempt = 0
    while True:
        try:
            modify_mysql_zk.swap_master_and_slave(slave, dry_run=False)
            break
        except:
            if zk_write_attempt > MAX_ZK_WRITE_ATTEMPTS:
                log.info('Final failure writing to zk, bailing')
                raise
            else:
                log.info('Write to zk failed, trying again')
                zk_write_attempt = zk_write_attempt+1

    log.info('Removing read_only from new master')
    mysql_lib.set_global_variable(slave, 'read_only', False)
    log.info('Removing replication configuration from new master')
    mysql_lib.reset_slave(slave)
    if lock_identifier:
        log.info('Releasing promotion lock')
        release_promotion_lock(lock_identifier)

    log.info('Failover complete')

    # we don't really care if this fails, but we'll print a message anyway.
    try:
        environment_specific.generic_json_post(
            environment_specific.CHANGE_FEED_URL,
            {'type': 'MySQL Failover',
             'environment': replica_set,
             'description': "Failover from {m} to {s}".format(m=master, s=slave),
             'author': host_utils.get_user(),
             'automation': False,
             'source': "mysql_failover.py on {}".format(host_utils.HOSTNAME)})
    except Exception as e:
        log.warning("Failover completed, but change feed "
                    "not updated: {}".format(e))

    if not master_conn:
        log.info('As master is dead, will try to launch a replacement. Will '
                 'sleep 20 seconds first to let things settle')
        time.sleep(20)
        launch_replacement_db_host.launch_replacement_db_host(master)