def get_retirement_queue_servers(next_state): """ Pull instances in queue ready for termination Args: next_state - The desired next state of a server. Options are constants SHUTDOWN_MYSQL and TERMINATE_INSTANCE. Returns: A dict of the same form as what is returned from the cmdbs """ if next_state == SHUTDOWN_MYSQL: server_state = {'previous_state': RESET_STATS, 'next_state': SHUTDOWN_MYSQL} elif next_state == TERMINATE_INSTANCE: server_state = {'previous_state': SHUTDOWN_MYSQL, 'next_state': TERMINATE_INSTANCE} else: raise Exception('Invalid state param ' '"{next_state}"'.format(next_state=next_state)) reporting_conn = mysql_lib.get_mysqlops_connections() cursor = reporting_conn.cursor() sql = ("SELECT t1.hostname, t1.instance_id " "FROM ( " " SELECT hostname, instance_id " " FROM mysqlops.retirement_queue " " WHERE activity = %(previous_state)s " " AND happened > now() - INTERVAL 3 WEEK " " AND happened < now() - INTERVAL 1 DAY) t1 " "LEFT JOIN mysqlops.retirement_queue t2 on t1.instance_id = t2.instance_id " "AND t2.activity=%(next_state)s " "WHERE t2.hostname IS NULL;") cursor.execute(sql, server_state) instances = cursor.fetchall() all_servers = environment_specific.get_all_server_metadata() if len(all_servers) < MIN_CMDB_RESULTS: raise Exception('CMDB returned too few results') ret = dict() for instance in instances: if instance['hostname'] not in all_servers: log.error('Something killed {instance}, cleaning up ' 'retirement queue now'.format(instance=instance)) remove_from_retirement_queue(instance['hostname']) elif instance['instance_id'] != all_servers[instance['hostname']]['instance_id']: log.error('Possibly duplicate hostname for ' '{hostname}!'.format(hostname=instance['hostname'])) else: ret[instance['hostname']] = all_servers[instance['hostname']] return ret
def add_to_queue(hostname, dry_run): """ Add an instance to the retirement queue Args: hostname - The hostname of the instance to add to the retirement queue """ log.info('Adding server {hostname} to retirement ' 'queue'.format(hostname=hostname)) if hostname in get_protected_hosts('set'): raise Exception('Host {hostname} is protected from ' 'retirement'.format(hostname=hostname)) # basic sanity check zk = host_utils.MysqlZookeeper() for instance in zk.get_all_mysql_instances(): if instance.hostname == hostname: raise Exception("It appears {instance} is in zk. This is " "very dangerous!".format(instance=instance)) all_servers = environment_specific.get_all_server_metadata() if not hostname in all_servers: raise Exception('Host {hostname} is not cmdb'.format(hostname=hostname)) instance_metadata = all_servers[hostname] log.info(instance_metadata) username, password = mysql_lib.get_mysql_user_for_role('admin') try: log.info('Trying to reset user_statistics on ip ' '{ip}'.format(ip=instance_metadata['internal_ip'])) with timeout.timeout(3): conn = MySQLdb.connect(host=instance_metadata['internal_ip'], user=username, passwd=password, cursorclass=MySQLdb.cursors.DictCursor) if not conn: raise Exception('timeout') mysql_lib.enable_and_flush_activity_statistics(conn) activity = RESET_STATS except MySQLdb.OperationalError as detail: (error_code, msg) = detail.args if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR: raise log.info('Could not connect to ' '{ip}'.format(ip=instance_metadata['internal_ip'])) activity = SHUTDOWN_MYSQL log_to_retirement_queue(hostname, instance_metadata['instance_id'], activity)
def find_unused_db_servers(): """ Compare zk and AWS to determine which servers are likely not in use Returns: A set of hosts that appear to not be in use """ # First find out what servers we know about from zk, and make a # of hostname prefixes that we think we own. zk = host_utils.MysqlZookeeper() config = zk.get_all_mysql_config() zk_servers = set() zk_prefixes = set() mysql_aws_hosts = set() for db in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[db]: host = config[db][rtype]['host'] zk_servers.add(host) prefix = get_db_host_prefix(host) zk_prefixes.add(prefix) cmdb_servers = environment_specific.get_all_server_metadata() for host in cmdb_servers: match = False for prefix in zk_prefixes: if host.startswith(prefix): match = True if not match: continue # We need to give servers a chance to build and then add themselves # to zk, so we will ignore server for a week. creation = boto.utils.parse_ts(cmdb_servers[host]['launch_time']) if creation < datetime.datetime.now() - datetime.timedelta(weeks=1): mysql_aws_hosts.add(host) hosts_not_in_zk = mysql_aws_hosts.difference(zk_servers) hosts_not_protected = hosts_not_in_zk.difference( retirement_queue.get_protected_hosts('set')) return hosts_not_protected
def find_unused_db_servers(): """ Compare zk and AWS to determine which servers are likely not in use Returns: A set of hosts that appear to not be in use """ # First find out what servers we know about from zk, and make a # of hostname prefixes that we think we own. zk = host_utils.MysqlZookeeper() config = zk.get_all_mysql_config() zk_servers = set() zk_prefixes = set() mysql_aws_hosts = set() for db in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[db]: host = config[db][rtype]['host'] zk_servers.add(host) prefix = get_db_host_prefix(host) zk_prefixes.add(prefix) cmdb_servers = environment_specific.get_all_server_metadata() for host in cmdb_servers: match = False for prefix in zk_prefixes: if host.startswith(prefix): match = True if not match: continue # We need to give servers a chance to build and then add themselves # to zk, so we will ignore server for a week. creation = boto.utils.parse_ts(cmdb_servers[host]['launch_time']) if creation < datetime.datetime.now()-datetime.timedelta(weeks=1): mysql_aws_hosts.add(host) hosts_not_in_zk = mysql_aws_hosts.difference(zk_servers) hosts_not_protected = hosts_not_in_zk.difference(retirement_queue.get_protected_hosts('set')) return hosts_not_protected
def add_to_queue(hostname, dry_run, skip_production_check=False): """ Add an instance to the retirement queue Args: hostname - The hostname of the instance to add to the retirement queue """ log.info('Adding server {hostname} to retirement ' 'queue'.format(hostname=hostname)) if hostname in get_protected_hosts('set'): raise Exception('Host {hostname} is protected from ' 'retirement'.format(hostname=hostname)) # basic sanity check zk = host_utils.MysqlZookeeper() for instance in zk.get_all_mysql_instances(): if instance.hostname == hostname: if skip_production_check: log.warning("It appears {instance} is in zk but " "skip_production_check is set so continuing." "".format(instance=instance)) else: raise Exception("It appears {instance} is in zk. This is " "very dangerous!".format(instance=instance)) all_servers = environment_specific.get_all_server_metadata() if hostname not in all_servers: raise Exception( 'Host {hostname} is not cmdb'.format(hostname=hostname)) instance_metadata = all_servers[hostname] log.info(instance_metadata) username, password = mysql_lib.get_mysql_user_for_role('admin') try: if check_for_user_activity(instance_metadata): log.info('Trying to reset user_statistics on ip ' '{ip}'.format(ip=instance_metadata['internal_ip'])) with timeout.timeout(3): conn = MySQLdb.connect(host=instance_metadata['internal_ip'], user=username, passwd=password, cursorclass=MySQLdb.cursors.DictCursor) if not conn: raise Exception('timeout') if dry_run: log.info('In dry_run mode, not changing anything') else: mysql_lib.enable_and_flush_activity_statistics( host_utils.HostAddr(hostname)) else: log.info("No recent user activity, skipping stats reset") # We still need to add it to the queue the first time. # Check if it was added recently and exit if it was if is_host_in_retirement_queue(hostname): return activity = RESET_STATS except MySQLdb.OperationalError as detail: (error_code, msg) = detail.args if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR: raise log.info('Could not connect to ' '{ip}'.format(ip=instance_metadata['internal_ip'])) activity = SHUTDOWN_MYSQL # We only want to add the host if it wasn't already in the queue if is_host_in_retirement_queue(hostname): return if dry_run: log.info('In dry_run mode, not changing anything') else: log_to_retirement_queue(hostname, instance_metadata['instance_id'], activity)
def add_to_queue(hostname, dry_run, skip_production_check=False): """ Add an instance to the retirement queue Args: hostname - The hostname of the instance to add to the retirement queue """ log.info('Adding server {hostname} to retirement ' 'queue'.format(hostname=hostname)) if hostname in get_protected_hosts('set'): raise Exception('Host {hostname} is protected from ' 'retirement'.format(hostname=hostname)) # basic sanity check zk = host_utils.MysqlZookeeper() for instance in zk.get_all_mysql_instances(): if instance.hostname == hostname: if skip_production_check: log.warning("It appears {instance} is in zk but " "skip_production_check is set so continuing." "".format(instance=instance)) else: raise Exception("It appears {instance} is in zk. This is " "very dangerous!".format(instance=instance)) all_servers = environment_specific.get_all_server_metadata() if hostname not in all_servers: raise Exception('Host {hostname} is not cmdb'.format(hostname=hostname)) instance_metadata = all_servers[hostname] log.info(instance_metadata) username, password = mysql_lib.get_mysql_user_for_role('admin') try: if check_for_user_activity(instance_metadata): log.info('Trying to reset user_statistics on ip ' '{ip}'.format(ip=instance_metadata['internal_ip'])) with timeout.timeout(3): conn = MySQLdb.connect(host=instance_metadata['internal_ip'], user=username, passwd=password, cursorclass=MySQLdb.cursors.DictCursor) if not conn: raise Exception('timeout') if dry_run: log.info('In dry_run mode, not changing anything') else: mysql_lib.enable_and_flush_activity_statistics(host_utils.HostAddr(hostname)) else: log.info("No recent user activity, skipping stats reset") # We still need to add it to the queue the first time. # Check if it was added recently and exit if it was if is_host_in_retirement_queue(hostname): return activity = RESET_STATS except MySQLdb.OperationalError as detail: (error_code, msg) = detail.args if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR: raise log.info('Could not connect to ' '{ip}'.format(ip=instance_metadata['internal_ip'])) activity = SHUTDOWN_MYSQL # We only want to add the host if it wasn't already in the queue if is_host_in_retirement_queue(hostname): return if dry_run: log.info('In dry_run mode, not changing anything') else: log_to_retirement_queue(hostname, instance_metadata['instance_id'], activity)
def main(): parser = argparse.ArgumentParser(description='MySQL production viewer') parser.add_argument('-e', '--extended', help='Include information about hardware, az, etc', default=False, action='store_true') args = parser.parse_args() zk = host_utils.MysqlZookeeper() config = zk.get_all_mysql_config() if args.extended: servers = environment_specific.get_all_server_metadata() output = list() max_rs_length = 10 max_sg_length = 0 # iterate through and find the longest replica set name and # security group, then use that to set the spacing for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if len(replica_set) > max_rs_length: max_rs_length = len(replica_set) if args.extended and inst['host'] in servers: sg = ','.join(servers[inst['host']].get('security_groups', 'N/A')) if len(sg) > max_sg_length: max_sg_length = len(sg) max_rs_length += 4 max_sg_length += 4 hostport_length = max_rs_length + 6 # dynamically generate padding format_str = OUTPUT_FORMAT.replace( 'RS', str(max_rs_length)).replace( 'HP', str(hostport_length)).replace( 'SGL', str(max_sg_length)) format_str_extended = OUTPUT_FORMAT_EXTENDED.replace( 'RS', str(max_rs_length)).replace( 'HP', str(hostport_length)).replace( 'SGL', str(max_sg_length)) for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if args.extended and inst['host'] in servers: az = servers[inst['host']]['zone'] id = servers[inst['host']]['instance_id'] hw = servers[inst['host']]['instance_type'] try: sg = ','.join(servers[inst['host']]['security_groups']) except KeyError: sg = '??VPC??' output.append(format_str_extended.format( replica_set=replica_set, replica_type=rtype, hostport=':'.join([inst['host'], str(inst['port'])]), az=az, hw=hw, sg=sg, id=id)) else: output.append(format_str.format( replica_set=replica_set, replica_type=rtype, hostport=':'.join([inst['host'], str(inst['port'])]))) output.sort() print '\n'.join(output)
def main(): parser = argparse.ArgumentParser(description='MySQL production viewer') parser.add_argument('-e', '--extended', help='Include information about hardware, az, etc', default=False, action='store_true') args = parser.parse_args() zk = host_utils.MysqlZookeeper() config = zk.get_all_mysql_config() if args.extended: servers = environment_specific.get_all_server_metadata() output = list() max_rs_length = 10 max_sg_length = 0 # iterate through and find the longest replica set name and # security group, then use that to set the spacing for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if len(replica_set) > max_rs_length: max_rs_length = len(replica_set) if args.extended and inst['host'] in servers: sg = ','.join(servers[inst['host']].get( 'security_groups', 'N/A')) if len(sg) > max_sg_length: max_sg_length = len(sg) max_rs_length += 4 max_sg_length += 4 hostport_length = max_rs_length + 6 # dynamically generate padding format_str = OUTPUT_FORMAT.replace('RS', str(max_rs_length)).replace( 'HP', str(hostport_length)).replace('SGL', str(max_sg_length)) format_str_extended = OUTPUT_FORMAT_EXTENDED.replace( 'RS', str(max_rs_length)).replace('HP', str(hostport_length)).replace( 'SGL', str(max_sg_length)) for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if args.extended and inst['host'] in servers: az = servers[inst['host']]['zone'] id = servers[inst['host']]['instance_id'] hw = servers[inst['host']]['instance_type'] try: sg = ','.join(servers[inst['host']]['security_groups']) except KeyError: sg = '??VPC??' output.append( format_str_extended.format(replica_set=replica_set, replica_type=rtype, hostport=':'.join([ inst['host'], str(inst['port']) ]), az=az, hw=hw, sg=sg, id=id)) else: output.append( format_str.format(replica_set=replica_set, replica_type=rtype, hostport=':'.join([ inst['host'], str(inst['port']) ]))) output.sort() print '\n'.join(output)
def main(): parser = argparse.ArgumentParser(description="MySQL production viewer") parser.add_argument( "-e", "--extended", help="Include information about hardware, az, etc", default=False, action="store_true" ) args = parser.parse_args() zk = host_utils.MysqlZookeeper() config = zk.get_all_mysql_config() if args.extended: servers = environment_specific.get_all_server_metadata() output = list() max_rs_length = 10 max_sg_length = 0 # iterate through and find the longest replica set name and # security group, then use that to set the spacing for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if len(replica_set) > max_rs_length: max_rs_length = len(replica_set) if args.extended and inst["host"] in servers: sg = ",".join(servers[inst["host"]].get("security_groups", "N/A")) if len(sg) > max_sg_length: max_sg_length = len(sg) max_rs_length += 4 max_sg_length += 4 hostport_length = max_rs_length + 6 # dynamically generate padding format_str = ( OUTPUT_FORMAT.replace("RS", str(max_rs_length)) .replace("HP", str(hostport_length)) .replace("SGL", str(max_sg_length)) ) format_str_extended = ( OUTPUT_FORMAT_EXTENDED.replace("RS", str(max_rs_length)) .replace("HP", str(hostport_length)) .replace("SGL", str(max_sg_length)) ) for replica_set in config: for rtype in host_utils.REPLICA_TYPES: if rtype in config[replica_set]: inst = config[replica_set][rtype] if args.extended and inst["host"] in servers: az = servers[inst["host"]]["zone"] id = servers[inst["host"]]["instance_id"] hw = servers[inst["host"]]["instance_type"] try: sg = ",".join(servers[inst["host"]]["security_groups"]) except KeyError: sg = "??VPC??" output.append( format_str_extended.format( replica_set=replica_set, replica_type=rtype, hostport=":".join([inst["host"], str(inst["port"])]), az=az, hw=hw, sg=sg, id=id, ) ) else: output.append( format_str.format( replica_set=replica_set, replica_type=rtype, hostport=":".join([inst["host"], str(inst["port"])]), ) ) output.sort() print "\n".join(output)