def main(argv=None): script = 'clean-old-aggregates' slack = Slackbot(args.slack, script_name=script) if args.slack else None try: term_leases = [ lease for lease in leases.values() if is_terminated(lease) ] old_aggregates = [ aggs for aggs in (aggregates_for_lease(lease) for lease in term_leases) if aggs != None ] aggregate_list = list(itertools.chain(*old_aggregates)) errors, reports = clear_aggregates(aggregate_list) orphan_list = orphan_find(aggregates) for orphan in orphan_list: destiny = has_active_allocation(orphan) host = osrest.blazar.host(auth, orphan) if destiny is None: reports.append( "Error identifying allocation for orphan host {}.".format( orphan)) elif destiny is False: reports.append( "Returning orphan host {} to freepool.".format(orphan) + "\n") osrest.nova.aggregate_add_host(auth, 1, host['hypervisor_hostname']) else: destination_agg = [ aggr['id'] for aggr in aggregates.values() if aggr['name'] == destiny ][0] reports.append( "Moving orphan host {} to destined aggregate {}.".format( orphan, destination_agg)) osrest.nova.aggregate_add_host(auth, destination_agg, host['hypervisor_hostname']) old_allocations = query.blazar_find_old_host_alloc(conn) for alloc in old_allocations: hostname, lease_id = del_expired_alloc(conn, alloc) reports.append( "Deleted host_allocation for host {} matching expired lease {}." .format(hostname, lease_id)) conn.db.commit() if reports: str_report = '\n'.join(reports) if slack: if errors: slack.error(str_report) else: slack.message(str_report) except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Fixes issues with orphaned resource providers.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument( '-q', '--quiet', action='store_true', help='Quiet mode. No output if there was nothing to do.') parser.add_argument('action', choices=['info', 'update'], help='Just display info or actually update them?') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) slack = Slackbot( args.slack, script_name='orphan-resource-providers') if args.slack else None try: db = mysqlargs.connect() update_count = resource_providers_fixer(db=db, describe=args.action == 'info', quiet=args.quiet) if args.action == 'update': if update_count > 0: message = ('Commanded update of *{} resource providers*'. format(update_count)) print(message) slack.message(message) elif not args.quiet: print('No resource providers to delete') except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Remove orphan ports in Neutron referring to an ' 'inactive Ironic instance') parser.add_argument('mode', choices=['info', 'delete'], help='Just display data on the conflict ports or delete them') parser.add_argument('--ignore-subnet', type=str, help='Ignore Neutron ports in this subnet (UUID). Must provide either ' 'this or --ignore-from-ironic-conf. This overrides the conf.') parser.add_argument('-c', '--ignore-from-ironic-conf', type=str, help='Ignore Neutron ports in the subnet(s) under the ' '"provisioning_network" network in the "neutron" section of ' 'this configuration file.') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--force-sane', action='store_true', help='Disable sanity checking (i.e. things really are that bad)') args = parser.parse_args(argv[1:]) # Validate args slack = Slackbot(args.slack, SUBCOMMAND) if args.slack else None auth = Auth.from_env_or_args(args=args) if args.ignore_subnet: ignore_subnets = [args.ignore_subnet] elif args.ignore_from_ironic_conf: ironic_config = configparser.ConfigParser() ironic_config.read(args.ignore_from_ironic_conf) net_id = ironic_config['neutron']['provisioning_network'] network = osrest.neutron.network(auth, net_id) ignore_subnets = network['subnets'] else: print('Must provide --ignore-subnet or --ignore-from-ironic-conf', file=sys.stderr) return -1 # Do actual work try: conflict_macs = find_conflicts(auth, ignore_subnets) if args.mode == 'info': show_info(conflict_macs) elif args.mode == 'delete': if (not args.force_sane) and len(conflict_macs) > 10: raise RuntimeError('(in)sanity check: thinks there are {} conflicting MACs'.format(len(conflict_macs))) for mac in conflict_macs.values(): osrest.neutron_port_delete(auth, mac['neutron_port_id']) if slack: message = 'Fixed Ironic/Neutron MAC conflicts\n{}'.format( '\n'.join( ' • Neutron Port `{neutron_port_id}` → `{mac}` ← Ironic Node `{ironic_node_id}` (Port `{ironic_port}`)' .format(**m) for m in conflict_macs.values() ) ) slack.success(message) else: print('unknown command', file=sys.stderr) return -1 except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser( 'Kick Ironic nodes that are in an common/known error state') parser.add_argument( 'mode', choices=['info', 'reset'], help='Just display data on the stuck nodes or reset their states') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--dry-run', action='store_true', help='Dry run, don\'t actually do anything') args = parser.parse_args(argv[1:]) slack = Slackbot( args.slack, script_name='ironic-error-resetter') if args.slack else None os_vars = { k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX) } if args.osrc: os_vars.update(load_osrc(args.osrc)) missing_os_vars = set(Auth.required_os_vars) - set(os_vars) if missing_os_vars: print('Missing required OS values in env/rcfile: {}'.format( ', '.join(missing_os_vars)), file=sys.stderr) return -1 auth = Auth(os_vars) try: nodes = osrest.ironic_nodes(auth, details=True) cureable = cureable_nodes(nodes) if args.mode == 'info': print('{} node(s) in a state that we can treat'.format( len(cureable))) for nid in cureable: print('-' * 40) print('\n'.join('{:<25s} {}'.format(key, nodes[nid].get(key)) for key in [ 'uuid', 'provision_updated_at', 'provision_state', 'last_error', 'instance_uuid', 'extra', 'maintenance', ])) return if len(cureable) == 0: if args.verbose: print('Nothing to do.') return print('To correct: {}'.format(repr(cureable))) reset_ok = [] too_many = [] for nid in cureable: resetter = NodeResetter(auth, nid, dry_run=args.dry_run) resetter.reset() reset_ok.append((nid, resetter.tracker.count())) message_lines = [] if reset_ok: message_lines.append('Performed reset of nodes') message_lines.extend(' • `{}`: {} resets'.format(*r) for r in reset_ok) if too_many: message_lines.append('Skipped (already at limit)') message_lines.extend(' • `{}`'.format(r) for r in too_many) if args.dry_run: message_lines.append('dry run, no changes actually made.') message = '\n'.join(message_lines) print(message) if slack and (not args.dry_run): slack.success(message) except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Floating IP and port reclaimer.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument('-w', '--whitelist', type=str, help='File of project/tenant IDs/names to ignore, one per line. ' 'Ignores case and dashes.') parser.add_argument('action', choices=['info', 'delete'], help='Just display info or actually delete them?') parser.add_argument('type', choices=list(RESOURCE_QUERY), help='Grab floating IPs or ports?') parser.add_argument('idle_days', type=float, help='Number of days since last active instance in project was ' 'deleted to consider it idle.') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) auth = osapi.Authv2.from_env_or_args(args=args) slack = Slackbot(args.slack, script_name='neutron-reaper') if args.slack else None whitelist = set() if args.whitelist: with open(args.whitelist) as f: whitelist = {line for line in f} db = mysqlargs.connect() db.version = query.ROCKY try: to_delete = find_reapable_resources(db=db, auth=auth, type_=args.type, idle_days=args.idle_days, whitelist=whitelist) thing = '{}{}'.format( {'ip': 'floating IP', 'port': 'port'}[args.type], ('' if len(to_delete) == 1 else 's'), ) if to_delete: if args.action == 'delete': for resource_id in to_delete: RESOURCE_DELETE_COMMAND[args.type](auth, resource_id) message = ( 'Commanded deletion of *{} {}* ({:.0f} day grace-period)' .format(len(to_delete), thing, args.idle_days) ) print(message) if slack: slack.message(message) else: print(( 'Found *{} {}* to delete ({:.0f} day grace-period):\n{}' .format(len(to_delete), thing, args.idle_days, to_delete) )) else: print('No {} to delete ({:.0f} day grace-period)'.format(thing, args.idle_days)) except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Reserve nodes for maintenance') append_global_identity_args(parser, argv) mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument('--operator', type=str, required=True, help='Chameleon account username of the operator') parser.add_argument('--nodes', type=str, required=True, help='node ids or node names; comma separated') parser.add_argument('--reason', type=str, required=True, help='maintenance reasons') parser.add_argument('--dry-run', action="store_true", help='perform a trial run without making reservations') parser.add_argument( '--start-time', type=valid_date, default=None, help= 'lease start time (YYYY-mm-DD HH:MM:SS); if not given, start at the earliest possible datetime' ) parser.add_argument( '--estimate-hours', type=int, default=168, help= 'estimated hours required for maintenance; default is 168 hours (1 week)' ) args = parser.parse_args(argv[1:]) slack = Slackbot( args.slack, script_name='maintenance-reservation') if args.slack else None # connect to database mysqlargs.extract(args) db = mysqlargs.connect() # keystone authentication auth_args = { 'auth_url': args.os_auth_url, 'username': args.os_username, 'password': args.os_password, 'project_name': args.os_project_name, 'region_name': args.os_region_name, 'interface': 'public' } if args.os_user_domain_name: auth_args['user_domain_name'] = args.os_user_domain_name if args.os_project_domain_name: auth_args['project_domain_name'] = args.os_project_domain_name # get admin session for node information admin_sess = get_session(**auth_args) # get maint session for creating lease auth_args['project_name'] = 'maintenance' maint_sess = get_session(**auth_args) try: # get node details nodes = get_nodes(admin_sess, args.nodes.split(',')) report_info = {} for node in nodes: lease_start_time = args.start_time if not lease_start_time: # find the earliest reservation time for the node lease_start_time = get_node_earliest_reserve_time( db, node.uuid, args.estimate_hours) else: # convert to utc lease_start_time = lease_start_time.replace( tzinfo=tz.tzlocal()).astimezone(tz.gettz('UTC')) # reserve reserve_args = { 'sess': maint_sess, 'node': node, 'start_time': lease_start_time, 'requested_hours': args.estimate_hours, 'reason': args.reason, 'operator': args.operator, 'dryrun': args.dry_run } start_time_str, end_time_str = reserve(**reserve_args) report_info[node.name] = (start_time_str, end_time_str) # summary report_lines = [('Node {node_name} at {region} is under maintenance ' 'from {start_time} to {end_time}').format( node_name=key, region=args.os_region_name, start_time=value[0], end_time=value[1]) for key, value in report_info.items()] if report_lines: report = '\n'.join(report_lines) print(report) if slack: slack.message(report) else: print('nothing reserved!') except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Detects orphan leases and remove them.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument( '-d', '--dbversion', type=str, help= 'Version of the database. Schemas differ, pick the appropriate one.', choices=[query.LIBERTY, query.ROCKY], default=query.ROCKY) parser.add_argument('--kvm', help='Run at KVM site', action='store_true') osapi.add_arguments(parser) args = parser.parse_args(argv[1:]) mysqlargs.extract(args) kvm = args.kvm slack = Slackbot(args.slack, script_name='orphan-detector') if args.slack else None try: db = mysqlargs.connect() db.version = args.dbversion if kvm: # at kvm site os_vars = { k: os.environ[k] for k in os.environ if k.startswith('OS_') } if args.osrc: os_vars.update(osapi.load_osrc(args.osrc)) auth = v2.Password(username=os_vars['OS_USERNAME'], password=os_vars['OS_PASSWORD'], tenant_name=os_vars['OS_TENANT_NAME'], auth_url=os_vars['OS_AUTH_URL']) sess = session.Session(auth=auth) keystone = client.Client(session=sess) orphan_instances = get_orphan_instances_kvm(db, keystone) else: orphan_instances = get_orphan_instances(db) orphan_instances_report = generate_report( orphan_instances, "-" * 45 + "ORPHAN INSTANCES" + "-" * 45) if orphan_instances_report: print(orphan_instances_report) if slack: slack.error(orphan_instances_report) else: print('No orphan instances detected') # Additionally perform lease report for CHI if not kvm: orphan_leases_report = generate_report( get_orphan_leases(db), "-" * 45 + "ORPHAN LEASES" + "-" * 45) if orphan_leases_report: print(orphan_leases_report) if slack: slack.error(orphan_leases_report) else: print('No orphan leases detected') except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('floating IP reaper') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument( '-w', '--whitelist', type=str, help='File of project/tenant IDs to ignore, one per line.') parser.add_argument( '--grace-days', type=int, required=True, help='Number of days since last used to consider to be idle') parser.add_argument('--dryrun', help='dryrun mode', action='store_true') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) auth = osapi.Auth.from_env_or_args(args=args) slack = Slackbot(args.slack, script_name='floatingip-reaper') if args.slack else None whitelist = set() if args.whitelist: with open(args.whitelist) as f: whitelist = {line.rstrip('\n') for line in f} db = mysqlargs.connect() db.version = query.ROCKY try: result = reaper(db=db, auth=auth, grace_days=args.grace_days, whitelist=whitelist, dryrun=args.dryrun) if result and not args.dryrun: message_lines = [] for proj, ips in result.items(): message_lines.append( 'Reclaimed *{} floating ips* from project {} ({:.0f} day grace-period)' .format(str(len(ips)), proj, args.grace_days)) message = '\n'.join(message_lines) print(message) if slack: slack.message(message) except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Unutilized Lease Reaper') parser.add_argument('-w', '--warn-hours', type=int, help='Number of hours after which to warn user.', default=DEFAULT_WARN_HOURS) parser.add_argument('-r', '--grace-hours', type=int, help='Number of hours after which to remove lease.', default=DEFAULT_GRACE_HOURS) parser.add_argument('action', choices=['info', 'delete'], help='Just display info or actually delete them?') parser.add_argument('--sender', type=str, help='Email address of sender.', default='*****@*****.**') args = parser.parse_args(argv[1:]) auth = osapi.Auth.from_env_or_args(args=args) assert args.grace_hours > args.warn_hours, ( "Grace hours must be greater than warning period.") if args.slack: slack = Slackbot(args.slack, script_name='unutilized-leases-reaper') else: slack = None try: sender = args.sender warn_period = args.warn_hours grace_period = args.grace_hours warn, terminate = find_leases_in_violation(auth, warn_period, grace_period) if (len(warn) + len(terminate) > 0): if args.action == 'delete': for lease in warn: if lease not in terminate: send_notification( auth, lease, sender, warn_period, grace_period, "Your lease {} is idle and may be terminated.". format(lease['name']), _email.IDLE_LEASE_WARNING_EMAIL_BODY) for lease in terminate: blazar.lease_delete(auth, lease['id']) send_notification( auth, lease, sender, warn_period, grace_period, "Your lease {} has been terminated.".format( lease['name']), _email.IDLE_LEASE_TERMINATION_EMAIL_BODY) message = ('Warned deletion of *{} idle leases* ' 'Commanded deletion of *{} idle leases* ' '(Unutilized lease violation)'.format( len(warn), len(terminate))) print(message) if slack: slack.message(message) else: pprint( dict(warn=[ dict(lease_id=l['id'], nodes=[n['uuid'] for n in l['nodes']]) for l in warn ], terminate=[ dict(lease_id=l['id'], nodes=[n['uuid'] for n in l['nodes']]) for l in terminate ])) else: print('No leases to warn or delete.') except Exception: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser( 'Kick Ironic nodes that refer to a deleted/nonexistant Nova instance') parser.add_argument( 'mode', choices=['info', 'delete'], help='Just display data on the bound nodes or delete them') parser.add_argument( '--slack', type=str, help= 'JSON file with Slack webhook information to send a notification to') parser.add_argument( '--osrc', type=str, help='Connection parameter file. Should include password. envars used ' 'if not provided by this file.') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument( '--force-sane', action='store_true', help='Disable sanity checking (i.e. things really are that bad)') parser.add_argument('--force-insane', action='store_true', help=argparse.SUPPRESS) # for testing args = parser.parse_args(argv[1:]) slack = Slackbot(args.slack, script_name='undead-instances') if args.slack else None os_vars = { k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX) } if args.osrc: os_vars.update(load_osrc(args.osrc)) missing_os_vars = set(Auth.required_os_vars) - set(os_vars) if missing_os_vars: print('Missing required OS values in env/rcfile: {}'.format( ', '.join(missing_os_vars)), file=sys.stderr) return -1 auth = Auth(os_vars) nodes = osrest.ironic_nodes(auth) instances = osrest.nova_instances(auth) node_instance_map, unbound_instances = find_unbound_instances( auth, nodes, instances) if args.mode == 'info': # no-op if unbound_instances: print('ZOMBIE INSTANCES ON NODES') else: print('No zombies currently.') for inst_id in unbound_instances: node = node_instance_map[inst_id] assert inst_id not in instances, 'contradiction, this should be impossible' print('-----') print('Ironic Node\n' ' ID: {}'.format(node['uuid'])) print(' Instance: {}'.format(node['instance_uuid'])) print(' State: {}'.format(node['provision_state'])) elif args.mode == 'delete': if not args.force_sane or args.force_insane: # sanity check(s) to avoid doing something stupid if len(instance_ids) == 0 and len(unbound_instances) != 0: _thats_crazy('(in)sanity check: 0 running instances(?!)', slack) ubi_limit = 20 if not args.force_insane else -1 if len(unbound_instances) > ubi_limit: _thats_crazy( '(in)sanity check: it thinks there are {} unbound instances' .format(len(unbound_instances)), slack, ) try: for inst_id in unbound_instances: node = node_instance_map[inst_id] node_id = node['uuid'] if node['provision_state'] == 'available': clear_node_instance_data(auth, node_id) else: osrest.ironic_node_set_state(auth, node_id, 'deleted') message = 'Fixed Ironic nodes with nonexistant instances:\n{}'.format( '\n'.join(' • node `{}` → instance `{}`'.format( node_instance_map[i]['uuid'], node_instance_map[i] ['instance_uuid']) for i in unbound_instances)) print(message) if slack: slack.success(message) except: if slack: slack.exception() raise
def main(argv=None): if argv is None: argv = sys.argv parser = base_parser('Floating IP and port reclaimer.') mysqlargs = MySqlArgs({ 'user': '******', 'password': '', 'host': 'localhost', 'port': 3306, }) mysqlargs.inject(parser) parser.add_argument( '-q', '--quiet', action='store_true', help='Quiet mode. No output to Slack if there was nothing to do.') parser.add_argument( '--multiport', action='store_true', help='Enable if Ironic nodes may have multiple ports associated.') parser.add_argument('action', choices=['info', 'clean'], help='Just display info or actually fix them?') args = parser.parse_args(argv[1:]) mysqlargs.extract(args) auth = osapi.Auth.from_env_or_args(args=args) slack = Slackbot(args.slack, script_name='dirty-ports') if args.slack else None assert_single = False if args.multiport else True take_action = args.action == 'clean' db = mysqlargs.connect() try: bad_ports = identify_dirty_ports(auth, assert_single) if bad_ports: str_ports = '\n'.join( ' • port `{uuid}` on node `{node_uuid}`'.format(**p) for p in bad_ports) if take_action: clean_ports(db, bad_ports) message = "Cleaned {} ports with `internal_info` data on `available` nodes:\n{}".format( len(bad_ports), str_ports) print(message) if slack: slack.success(message) else: print("(read-only mode, not cleaning ports):\n{}".format( str_ports)) except: if slack: slack.exception() raise