def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( description='Fixes issues with orphaned resource providers.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument( '-q', '--quiet', action='store_true', help='Quiet mode. No output if there was nothing to do.') parser.add_argument( '--slack', type=str, help= 'JSON file with Slack webhook information to send a notification to') parser.add_argument('action', choices=['info', 'update'], help='Just display info or actually update them?') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) if args.slack: slack = Slackbot(args.slack, script_name='orphan-resource-providers') else: slack = None db = mysqlargs.connect() kwargs = { 'db': db, 'describe': args.action == 'info', 'quiet': args.quiet, } if slack: with slack: # log exceptions update_count = resource_providers_fixer(**kwargs) else: update_count = resource_providers_fixer(**kwargs) if slack and (args.action == 'update') and ((not args.quiet) or update_count): if update_count > 0: message = ('Commanded update of *{} resource providers*'.format( update_count)) color = '#000000' else: message = ('No resource providers to update') color = '#cccccc' slack.post('orphan-resource-providers', message, color=color)
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser( 'Strange things, as per someone\'s definition of "strange".') parser.add_argument('-v', '--verbose', action='store_true') args = parser.parse_args(argv[1:]) if args.slack: slack = Slackbot(args.slack) else: slack = None os_vars = { k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX) } if args.osrc: os_vars.update(osapi.load_osrc(args.osrc)) missing_os_vars = set(osapi.Auth.required_os_vars) - set(os_vars) if missing_os_vars: print('Missing required OS values in env/rcfile: {}'.format( ', '.join(missing_os_vars)), file=sys.stderr) return -1 auth = osapi.Auth(os_vars) nodes = osrest.ironic_nodes(auth, details=True) # hypervisors = osrest.nova_hypervisors(auth, details=True) errored_nodes = [ n for n in nodes.values() if n['provision_state'] == 'error' and not n['maintenance'] ] if not errored_nodes: if args.verbose: print('All good.') return message = ['Ironic nodes in "error" provision state, not in maintenance'] message.extend( '• `{}`, last error: {}'.format(n['uuid'], n.get('last_error')) for n in errored_nodes) message = '\n'.join(message) print(message.replace('•', '*')) if slack: slack.post(SUBCOMMAND, message, color='xkcd:red')
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser(description='Kick Ironic nodes that ' 'are in an common/known error state') parser.add_argument('mode', choices=['info', 'reset'], help='Just display data on the stuck nodes or reset their states') parser.add_argument('--slack', type=str, help='JSON file with Slack webhook information to send a notification to') parser.add_argument('--osrc', type=str, help='Connection parameter file. Should include password. envars used ' 'if not provided by this file.') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--dry-run', action='store_true', help='Dry run, don\'t actually do anything') args = parser.parse_args(argv[1:]) if args.slack: slack = Slackbot(args.slack) else: slack = None os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)} if args.osrc: os_vars.update(load_osrc(args.osrc)) missing_os_vars = set(Auth.required_os_vars) - set(os_vars) if missing_os_vars: print( 'Missing required OS values in env/rcfile: {}' .format(', '.join(missing_os_vars)), file=sys.stderr ) return -1 auth = Auth(os_vars) nodes = osrest.ironic_nodes(auth, details=True) cureable = cureable_nodes(nodes) if args.mode == 'info': print('{} node(s) in a state that we can treat'.format(len(cureable))) for nid in cureable: print('-' * 40) print('\n'.join( '{:<25s} {}'.format(key, nodes[nid].get(key)) for key in [ 'uuid', 'provision_updated_at', 'provision_state', 'last_error', 'instance_uuid', 'extra', 'maintenance', ] )) if slack: if cureable: message = ('{} nodes in correctable error states (no action ' 'taken)'.format(len(cureable))) color = 'xkcd:orange red' else: error_nodes = sum(1 for (nid, n) in nodes.items() if ( not n['maintenance'] and n['provision_state'] == 'error' ) ) if error_nodes: message = ('No nodes in correctable error states ({} other' ' nodes in error state)').format(error_nodes) color = 'xkcd:yellow' else: message = 'No nodes in correctable error states' color = 'xkcd:green' slack.post(SUBCOMMAND, message, color=color) return if len(cureable) == 0: if args.verbose: message = 'Nothing to do.' print(message) if slack: slack.post(SUBCOMMAND, message, color='xkcd:light grey') return print('To correct: {}'.format(repr(cureable))) if slack: message = ['Ironic nodes in correctable error states'] for nid in cureable: message.append(' • `{}`: "{}"'.format(nid, nodes[nid]['last_error'])) message = '\n'.join(message) slack.post(SUBCOMMAND, message, color='xkcd:darkish red') reset_ok = [] too_many = [] for nid in cureable: resetter = NodeResetter(auth, nid, dry_run=args.dry_run) try: resetter.reset() except TooManyResets as e: too_many.append(nid) except Exception as e: if slack: error = '{}; check logs'.format(str(e)) slack.post(SUBCOMMAND, error, color='xkcd:red') raise else: reset_ok.append((nid, resetter.tracker.count())) print('Attempted to fix: {}'.format(repr(reset_ok))) print('Refused to fix: {}'.format(repr(too_many))) if slack: message = [] if reset_ok: message.append('Attempted reset of nodes') message.extend(' • `{}`: {} resets'.format(*r) for r in reset_ok) if too_many: message.append('\nAbstained (already at limit)') message.extend(' • `{}`'.format(*r) for r in reset_ok) color = 'xkcd:chartreuse' if args.dry_run: color = 'xkcd:yellow' message.append('dry run, no changes actually made.') if too_many: color = 'xkcd:orange' message = '\n'.join(message) slack.post(SUBCOMMAND, message, color=color)
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( description='Floating IP and port reclaimer.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) osapi.add_arguments(parser) parser.add_argument( '-w', '--whitelist', type=str, help='File of project/tenant IDs/names to ignore, one per line. ' 'Ignores case and dashes.') parser.add_argument( '-q', '--quiet', action='store_true', help='Quiet mode. No output if there was nothing to do.') parser.add_argument( '--slack', type=str, help= 'JSON file with Slack webhook information to send a notification to') parser.add_argument( '-d', '--dbversion', type=str, help= 'Version of the database. Schemas differ, pick the appropriate one.', choices=[query.LIBERTY, query.OCATA], default=query.LIBERTY) parser.add_argument('--kvm', help='Run at KVM site', action='store_true') parser.add_argument('action', choices=['info', 'delete'], help='Just display info or actually delete them?') parser.add_argument('type', choices=list(RESOURCE_QUERY), help='Grab floating IPs or ports?') parser.add_argument( 'idle_days', type=float, help='Number of days since last active instance in project was ' 'deleted to consider it idle.') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) auth = osapi.Auth.from_env_or_args(args=args) if args.action == 'delete' and args.type == 'port' and args.dbversion == 'ocata': print( 'Checking ports on Ocata isn\'t validated, refusing to ' 'automatically delete.', file=sys.stderr) sys.exit(1) if args.slack: slack = Slackbot(args.slack, script_name='neutron-reaper') else: slack = None whitelist = set() if args.whitelist: with open(args.whitelist) as f: whitelist = {normalize_project_name(line, args.kvm) for line in f} db = mysqlargs.connect() db.version = args.dbversion kwargs = { 'db': db, 'auth': auth, 'type_': args.type, 'idle_days': args.idle_days, 'whitelist': whitelist, 'kvm': args.kvm, 'describe': args.action == 'info', 'quiet': args.quiet, } if slack: with slack: # log exceptions remove_count = reaper(**kwargs) else: remove_count = reaper(**kwargs) if slack and (args.action == 'delete') and ((not args.quiet) or remove_count): thing = '{}{}'.format( { 'ip': 'floating IP', 'port': 'port' }[args.type], ('' if remove_count == 1 else 's'), ) if remove_count > 0: message = ( 'Commanded deletion of *{} {}* ({:.0f} day grace-period)'. format(remove_count, thing, args.idle_days)) color = '#000000' else: message = ('No {} to delete ({:.0f} day grace-period)'.format( thing, args.idle_days)) color = '#cccccc' slack.post('neutron-reaper', message, color=color)
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser(description='Kick Ironic nodes that ' 'refer to a deleted/nonexistant Nova instance') parser.add_argument('mode', choices=['info', 'delete'], help='Just display data on the bound nodes or delete them') parser.add_argument('--slack', type=str, help='JSON file with Slack webhook information to send a notification to') parser.add_argument('--osrc', type=str, help='Connection parameter file. Should include password. envars used ' 'if not provided by this file.') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--force-sane', action='store_true', help='Disable sanity checking (i.e. things really are that bad)') parser.add_argument('--force-insane', action='store_true', help=argparse.SUPPRESS) # for testing args = parser.parse_args(argv[1:]) if args.slack: slack = Slackbot(args.slack) else: slack = None os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)} if args.osrc: os_vars.update(load_osrc(args.osrc)) missing_os_vars = set(Auth.required_os_vars) - set(os_vars) if missing_os_vars: print( 'Missing required OS values in env/rcfile: {}' .format(', '.join(missing_os_vars)), file=sys.stderr ) return -1 auth = Auth(os_vars) nodes = osrest.ironic_nodes(auth) instances = osrest.nova_instances(auth) node_instance_map = { n['instance_uuid']: n for n in nodes.values() if n['instance_uuid'] is not None } node_instance_ids = set(node_instance_map) instance_ids = set(instances) unbound_instances = node_instance_ids - instance_ids if args.mode == 'info': # no-op if unbound_instances: print('ZOMBIE INSTANCES ON NODES') else: print('No zombies currently.') for inst_id in unbound_instances: node = node_instance_map[inst_id] assert inst_id not in instances, 'contradiction, this should be impossible' print('-----') print('Ironic Node\n' ' ID: {}'.format(node['uuid'])) print(' Instance: {}'.format(node['instance_uuid'])) print(' State: {}'.format(node['provision_state'])) if slack: if unbound_instances: message = ('{} nodes with dead instances (no action taken)' .format(len(unbound_instances))) color = 'xkcd:orange red' else: message = 'No nodes with dead instances.' color = 'xkcd:green' slack.post(SUBCOMMAND, message, color=color) elif args.mode == 'delete': if not args.force_sane or args.force_insane: # sanity check(s) to avoid doing something stupid if len(instance_ids) == 0 and len(unbound_instances) != 0: _thats_crazy('(in)sanity check: 0 running instances(?!)', slack) ubi_limit = 20 if not args.force_insane else -1 if len(unbound_instances) > ubi_limit: _thats_crazy( '(in)sanity check: it thinks there are {} unbound instances' .format(len(unbound_instances)), slack, ) if slack: if unbound_instances: message = 'Possible Ironic nodes with nonexistant instances:\n{}'.format( '\n'.join( ' • node `{}` → instance `{}`'.format( node_instance_map[i]['uuid'], node_instance_map[i]['instance_uuid']) for i in unbound_instances ) ) color = 'xkcd:darkish red' elif args.verbose: message = 'No Ironic nodes visibly clinging to dead instances' color = 'xkcd:light grey' else: message = None if message: slack.post(SUBCOMMAND, message, color=color) try: for inst_id in unbound_instances: node = node_instance_map[inst_id] node_id = node['uuid'] if node['provision_state'] == 'available': clear_node_instance_data(auth, node_id) else: osrest.ironic_node_set_state(auth, node_id, 'deleted') except Exception as e: if slack: error = '{} while trying to clean instances; check logs for traceback'.format(str(e)) slack.post(SUBCOMMAND, error, color='xkcd:red') raise else: if unbound_instances and slack: ok_message = ( 'Cleaned {} instance(s).' .format(len(unbound_instances)) ) slack.post(SUBCOMMAND, ok_message, color='xkcd:chartreuse')