def confirm_max_replica_lag(replicas, lag_tolerance, dead_master, replicas_synced=False, timeout=0): """ Test replication lag Args: replicas - A set of hostaddr object to be tested for replication lag max_lag - Max computed replication lag in seconds. If 0 is supplied, then exec position is compared from replica servers to the master rather than using a computed second behind as the heartbeat will be blocked by read_only. replicas_synced - Replica servers must have executed to the same position in the binary log. timeout - How long to wait for replication to be in the desired state """ start = time.time() if dead_master: replication_checks = set( [mysql_lib.CHECK_SQL_THREAD, mysql_lib.CHECK_CORRECT_MASTER]) else: replication_checks = mysql_lib.ALL_REPLICATION_CHECKS while True: acceptable = True for replica in replicas: # Confirm threads are running, expected master try: mysql_lib.assert_replication_sanity(replica, replication_checks) except Exception as e: log.warning(e) log.info('Trying to restart replication, then ' 'sleep 20 seconds') mysql_lib.restart_replication(replica) time.sleep(20) mysql_lib.assert_replication_sanity(replica, replication_checks) try: mysql_lib.assert_replication_unlagged(replica, lag_tolerance, dead_master) except Exception as e: log.warning(e) acceptable = False if replicas_synced and not confirm_replicas_in_sync(replicas): acceptable = False log.warning('Replica servers are not in sync and replicas_synced ' 'is set') if acceptable: return elif (time.time() - start) > timeout: raise Exception('Replication is not in an acceptable state on ' 'replica {r}'.format(r=replica)) else: log.info('Sleeping for 5 second to allow replication to catch up') time.sleep(5)
def confirm_max_replica_lag(replicas, lag_tolerance, dead_master, replicas_synced=False, timeout=0): """ Test replication lag Args: replicas - A set of hostaddr object to be tested for replication lag max_lag - Max computed replication lag in seconds. If 0 is supplied, then exec position is compared from replica servers to the master rather than using a computed second behind as the heartbeat will be blocked by read_only. replicas_synced - Replica servers must have executed to the same position in the binary log. timeout - How long to wait for replication to be in the desired state """ start = time.time() if dead_master: replication_checks = set([mysql_lib.CHECK_SQL_THREAD, mysql_lib.CHECK_CORRECT_MASTER]) else: replication_checks = mysql_lib.ALL_REPLICATION_CHECKS while True: acceptable = True for replica in replicas: # Confirm threads are running, expected master try: mysql_lib.assert_replication_sanity(replica, replication_checks) except Exception as e: log.warning(e) log.info('Trying to restart replication, then ' 'sleep 20 seconds') mysql_lib.restart_replication(replica) time.sleep(20) mysql_lib.assert_replication_sanity(replica, replication_checks) try: mysql_lib.assert_replication_unlagged(replica, lag_tolerance, dead_master) except Exception as e: log.warning(e) acceptable = False if replicas_synced and not confirm_replicas_in_sync(replicas): acceptable = False log.warning('Replica servers are not in sync and replicas_synced ' 'is set') if acceptable: return elif (time.time() - start) > timeout: raise Exception('Replication is not in an acceptable state on ' 'replica {r}'.format(r=replica)) else: log.info('Sleeping for 5 second to allow replication to catch up') time.sleep(5)
def wait_for_repl_sync(instance): """ Wait for replication to become synced args: instance - A hostaddr instance """ start = time.time() while True: acceptable = True try: mysql_lib.assert_replication_unlagged( instance, mysql_lib.REPLICATION_TOLERANCE_NONE) except Exception as e: log.warning(e) acceptable = False if acceptable: return elif (time.time() - start) > REPL_SYNC_MAX_SECONDS: raise Exception('Replication is not in an acceptable state on ' 'replica {}'.format(instance)) else: log.info('Sleeping for 5 second to allow replication to catch up') time.sleep(5)
def check_replication_for_migration(source_replica_set, destination_replica_set): """ Confirm that replication is sane for finishing a shard migration Args: source_replica_set - Where shards are coming from destination_replica_set - Where shards are being sent """ zk = host_utils.MysqlZookeeper() source_master = zk.get_mysql_instance_from_replica_set(source_replica_set) destination_master = zk.get_mysql_instance_from_replica_set( destination_replica_set) source_slave = zk.get_mysql_instance_from_replica_set( source_replica_set, host_utils.REPLICA_ROLE_SLAVE) destination_slave = zk.get_mysql_instance_from_replica_set( destination_replica_set, host_utils.REPLICA_ROLE_SLAVE) # First we will confirm that the slave of the source is caught up # this is important for row count comparisons mysql_lib.assert_replication_unlagged( source_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # Next, the slave of the destination replica set for the same reason mysql_lib.assert_replication_unlagged( destination_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # Next, the destination master is relatively caught up to the source master mysql_lib.assert_replication_unlagged( destination_master, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # We will also verify that the source master is not replicating. A scary # scenario is if the there is some sort of ring replication going and db # drops of blackhole db's would propegate to the source db. try: source_slave_status = mysql_lib.get_slave_status(source_master) except mysql_lib.ReplicationError: source_slave_status = None if source_slave_status: raise Exception('Source master is setup for replication ' 'this is super dangerous!') # We will also verify that the destination master is replicating from the # source master slave_status = mysql_lib.get_slave_status(destination_master) master_of_destination_master = host_utils.HostAddr(':'.join( (slave_status['Master_Host'], str(slave_status['Master_Port'])))) if source_master != master_of_destination_master: raise Exception('Master of destination {d} is {actual} rather than ' 'expected {expected} ' ''.format(d=destination_master, actual=master_of_destination_master, expected=destination_master)) log.info('Replication looks ok for migration')
def add_replica_to_zk(instance, replica_type, dry_run): """ Add a replica to zk Args: instance - A hostaddr object of the replica to add to zk replica_type - Either 'slave' or 'dr_slave'. dry_run - If set, do not modify zk """ try: if replica_type not in [ host_utils.REPLICA_ROLE_DR_SLAVE, host_utils.REPLICA_ROLE_SLAVE ]: raise Exception('Invalid value "{replica_type}" for argument ' "replica_type").format(replica_type=replica_type) zk_local = host_utils.MysqlZookeeper() kazoo_client = environment_specific.get_kazoo_client() if not kazoo_client: raise Exception('Could not get a zk connection') log.info('Instance is {inst}'.format(inst=instance)) mysql_lib.assert_replication_sanity(instance) mysql_lib.assert_replication_unlagged( instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL) master = mysql_lib.get_master_from_instance(instance) if master not in zk_local.get_all_mysql_instances_by_type( host_utils.REPLICA_ROLE_MASTER): raise Exception('Instance {master} is not a master in zk' ''.format(master=master)) log.info('Detected master of {instance} ' 'as {master}'.format(instance=instance, master=master)) (replica_set, _) = zk_local.get_replica_set_from_instance(master) log.info('Detected replica_set as ' '{replica_set}'.format(replica_set=replica_set)) if replica_type == host_utils.REPLICA_ROLE_SLAVE: (zk_node, parsed_data, version) = get_zk_node_for_replica_set(kazoo_client, replica_set) log.info('Replica set {replica_set} is held in zk_node ' '{zk_node}'.format(zk_node=zk_node, replica_set=replica_set)) log.info('Existing config:') log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) new_data = copy.deepcopy(parsed_data) new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \ instance.hostname new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \ instance.port log.info('New config:') log.info(pprint.pformat(remove_auth(new_data[replica_set]))) if new_data == parsed_data: raise Exception('No change would be made to zk, ' 'will not write new config') elif dry_run: log.info('dry_run is set, therefore not modifying zk') else: log.info('Pushing new configuration for ' '{replica_set}:'.format(replica_set=replica_set)) kazoo_client.set(zk_node, simplejson.dumps(new_data), version) elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE: znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK) parsed_data = simplejson.loads(znode_data) new_data = copy.deepcopy(parsed_data) if replica_set in parsed_data: log.info('Existing dr config:') log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) else: log.info('Replica set did not previously have a dr slave') new_data[replica_set] = \ {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname, 'port': instance.port}} log.info('New dr config:') log.info(pprint.pformat(remove_auth(new_data[replica_set]))) if new_data == parsed_data: raise Exception('No change would be made to zk, ' 'will not write new config') elif dry_run: log.info('dry_run is set, therefore not modifying zk') else: log.info('Pushing new dr configuration for ' '{replica_set}:'.format(replica_set=replica_set)) kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_data), dr_meta.version) else: # we should raise an exception above rather than getting to here pass except Exception, e: log.exception(e) raise
def add_replica_to_zk(instance, replica_type, dry_run): """ Add a replica to zk Args: instance - A hostaddr object of the replica to add to zk replica_type - Either 'slave' or 'dr_slave'. dry_run - If set, do not modify zk """ try: if replica_type not in [host_utils.REPLICA_ROLE_DR_SLAVE, host_utils.REPLICA_ROLE_SLAVE]: raise Exception('Invalid value "{replica_type}" for argument ' "replica_type").format(replica_type=replica_type) zk_local = host_utils.MysqlZookeeper() kazoo_client = environment_specific.get_kazoo_client() if not kazoo_client: raise Exception('Could not get a zk connection') log.info('Instance is {inst}'.format(inst=instance)) mysql_lib.assert_replication_sanity(instance) mysql_lib.assert_replication_unlagged(instance, mysql_lib.REPLICATION_TOLERANCE_NORMAL) master = mysql_lib.get_master_from_instance(instance) if master not in zk_local.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER): raise Exception('Instance {master} is not a master in zk' ''.format(master=master)) log.info('Detected master of {instance} ' 'as {master}'.format(instance=instance, master=master)) (replica_set, _) = zk_local.get_replica_set_from_instance(master) log.info('Detected replica_set as ' '{replica_set}'.format(replica_set=replica_set)) if replica_type == host_utils.REPLICA_ROLE_SLAVE: (zk_node, parsed_data, version) = get_zk_node_for_replica_set(kazoo_client, replica_set) log.info('Replica set {replica_set} is held in zk_node ' '{zk_node}'.format(zk_node=zk_node, replica_set=replica_set)) log.info('Existing config:') log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) new_data = copy.deepcopy(parsed_data) new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \ instance.hostname new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \ instance.port log.info('New config:') log.info(pprint.pformat(remove_auth(new_data[replica_set]))) if new_data == parsed_data: raise Exception('No change would be made to zk, ' 'will not write new config') elif dry_run: log.info('dry_run is set, therefore not modifying zk') else: log.info('Pushing new configuration for ' '{replica_set}:'.format(replica_set=replica_set)) kazoo_client.set(zk_node, simplejson.dumps(new_data), version) elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE: znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK) parsed_data = simplejson.loads(znode_data) new_data = copy.deepcopy(parsed_data) if replica_set in parsed_data: log.info('Existing dr config:') log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) else: log.info('Replica set did not previously have a dr slave') new_data[replica_set] = \ {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname, 'port': instance.port}} log.info('New dr config:') log.info(pprint.pformat(remove_auth(new_data[replica_set]))) if new_data == parsed_data: raise Exception('No change would be made to zk, ' 'will not write new config') elif dry_run: log.info('dry_run is set, therefore not modifying zk') else: log.info('Pushing new dr configuration for ' '{replica_set}:'.format(replica_set=replica_set)) kazoo_client.set(environment_specific.DR_ZK, simplejson.dumps(new_data), dr_meta.version) else: # we should raise an exception above rather than getting to here pass except Exception, e: log.exception(e) raise