Ejemplo n.º 1
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        description='Fixes issues with orphaned resource providers.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument('action',
                        choices=['info', 'update'],
                        help='Just display info or actually update them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    if args.slack:
        slack = Slackbot(args.slack, script_name='orphan-resource-providers')
    else:
        slack = None

    db = mysqlargs.connect()

    kwargs = {
        'db': db,
        'describe': args.action == 'info',
        'quiet': args.quiet,
    }
    if slack:
        with slack:  # log exceptions
            update_count = resource_providers_fixer(**kwargs)
    else:
        update_count = resource_providers_fixer(**kwargs)

    if slack and (args.action == 'update') and ((not args.quiet)
                                                or update_count):
        if update_count > 0:
            message = ('Commanded update of *{} resource providers*'.format(
                update_count))
            color = '#000000'
        else:
            message = ('No resource providers to update')
            color = '#cccccc'

        slack.post('orphan-resource-providers', message, color=color)
Ejemplo n.º 2
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Strange things, as per someone\'s definition of "strange".')
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(osapi.load_osrc(args.osrc))
    missing_os_vars = set(osapi.Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = osapi.Auth(os_vars)

    nodes = osrest.ironic_nodes(auth, details=True)
    # hypervisors = osrest.nova_hypervisors(auth, details=True)

    errored_nodes = [
        n for n in nodes.values()
        if n['provision_state'] == 'error' and not n['maintenance']
    ]

    if not errored_nodes:
        if args.verbose:
            print('All good.')
        return

    message = ['Ironic nodes in "error" provision state, not in maintenance']
    message.extend(
        '• `{}`, last error: {}'.format(n['uuid'], n.get('last_error'))
        for n in errored_nodes)
    message = '\n'.join(message)

    print(message.replace('•', '*'))
    if slack:
        slack.post(SUBCOMMAND, message, color='xkcd:red')
Ejemplo n.º 3
0
def main(argv=None):
    script = 'clean-old-aggregates'
    slack = Slackbot(args.slack, script_name=script) if args.slack else None

    try:
        term_leases = [
            lease for lease in leases.values() if is_terminated(lease)
        ]
        old_aggregates = [
            aggs for aggs in (aggregates_for_lease(lease)
                              for lease in term_leases) if aggs != None
        ]
        aggregate_list = list(itertools.chain(*old_aggregates))
        errors, reports = clear_aggregates(aggregate_list)
        orphan_list = orphan_find(aggregates)

        for orphan in orphan_list:
            destiny = has_active_allocation(orphan)
            host = osrest.blazar.host(auth, orphan)
            if destiny is None:
                reports.append(
                    "Error identifying allocation for orphan host {}.".format(
                        orphan))
            elif destiny is False:
                reports.append(
                    "Returning orphan host {} to freepool.".format(orphan) +
                    "\n")
                osrest.nova.aggregate_add_host(auth, 1,
                                               host['hypervisor_hostname'])
            else:
                destination_agg = [
                    aggr['id'] for aggr in aggregates.values()
                    if aggr['name'] == destiny
                ][0]
                reports.append(
                    "Moving orphan host {} to destined aggregate {}.".format(
                        orphan, destination_agg))
                osrest.nova.aggregate_add_host(auth, destination_agg,
                                               host['hypervisor_hostname'])

        old_allocations = query.blazar_find_old_host_alloc(conn)
        for alloc in old_allocations:
            hostname, lease_id = del_expired_alloc(conn, alloc)
            reports.append(
                "Deleted host_allocation for host {} matching expired lease {}."
                .format(hostname, lease_id))
        conn.db.commit()

        if reports:
            str_report = '\n'.join(reports)
            if slack:
                if errors:
                    slack.error(str_report)
                else:
                    slack.message(str_report)
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 4
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Remove orphan ports in '
        'Neutron referring to an inactive Ironic instance')

    parser.add_argument('mode', choices=['info', 'delete'],
        help='Just display data on the conflict ports or delete them')
    parser.add_argument('--ignore-subnet', type=str,
        help='Ignore Neutron ports in this subnet (UUID). Must provide either '
             'this or --ignore-from-ironic-conf. This overrides the conf.')
    parser.add_argument('-c', '--ignore-from-ironic-conf', type=str,
        help='Ignore Neutron ports in the subnet(s) under the '
             '"provisioning_network" network in the "neutron" section of '
             'this configuration file.')
    parser.add_argument('--slack', type=str,
        help='JSON file with Slack webhook information to send a notification to')
    parser.add_argument('--osrc', type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--force-sane', action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')

    args = parser.parse_args(argv[1:])

    ## Validate args

    slack = Slackbot(args.slack, SUBCOMMAND) if args.slack else None
    auth = Auth.from_env_or_args(args=args)

    if args.ignore_subnet:
        ignore_subnets = [args.ignore_subnet]
    elif args.ignore_from_ironic_conf:
        ironic_config = configparser.ConfigParser()
        ironic_config.read(args.ignore_from_ironic_conf)
        net_id = ironic_config['neutron']['provisioning_network']
        network = osrest.neutron.network(auth, net_id)
        ignore_subnets = network['subnets']
    else:
        print('Must provide --ignore-subnet or --ignore-from-ironic-conf',
              file=sys.stderr)
        return -1

    # Do actual work
    with slack or nullcontext():
        conflict_macs = find_conflicts(auth, ignore_subnets, slack=slack)

        if args.mode == 'info':
            show_info(conflict_macs, slack=slack)
        elif args.mode == 'delete':
            delete(auth, conflict_macs,
                safe=not args.force_sane, slack=slack, verbose=args.verbose)
        else:
            print('unknown command', file=sys.stderr)
            return -1
Ejemplo n.º 5
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        description='Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)
    osapi.add_arguments(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output to Slack if there was nothing to do.')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument('action',
                        choices=['info', 'clean'],
                        help='Just display info or actually fix them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Auth.from_env_or_args(args=args)

    if args.slack:
        slack = Slackbot(args.slack, script_name='dirty-ports')
    else:
        slack = None

    db = mysqlargs.connect()

    kwargs = {
        'db': db,
        'auth': auth,
        'take_action': args.action == 'clean',
        'quiet': args.quiet,
        'slack': slack,
    }
    if slack:
        with slack:  # log exceptions
            cleaner(**kwargs)
    else:
        cleaner(**kwargs)
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Fixes issues with orphaned resource providers.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument('action',
                        choices=['info', 'update'],
                        help='Just display info or actually update them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    slack = Slackbot(
        args.slack,
        script_name='orphan-resource-providers') if args.slack else None

    try:
        db = mysqlargs.connect()

        update_count = resource_providers_fixer(db=db,
                                                describe=args.action == 'info',
                                                quiet=args.quiet)

        if args.action == 'update':
            if update_count > 0:
                message = ('Commanded update of *{} resource providers*'.
                           format(update_count))

                print(message)

                slack.message(message)
            elif not args.quiet:
                print('No resource providers to delete')
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 7
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Retired node state enforcer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)
    parser.add_argument('--dryrun', help='dryrun mode', action='store_true')
    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    conn = mysqlargs.connect()
    slack = Slackbot(args.slack,
                     script_name='enforce-retirement') if args.slack else None

    # Find retired nodes and ensure they are non reservable in blazar
    correct_state(conn, slack, dryrun=args.dryrun)
Ejemplo n.º 8
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Kick Ironic nodes that '
        'are in an common/known error state')

    parser.add_argument('mode', choices=['info', 'reset'],
        help='Just display data on the stuck nodes or reset their states')
    parser.add_argument('--slack', type=str,
        help='JSON file with Slack webhook information to send a notification to')
    parser.add_argument('--osrc', type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--dry-run', action='store_true',
        help='Dry run, don\'t actually do anything')

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)}
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print(
            'Missing required OS values in env/rcfile: {}'
            .format(', '.join(missing_os_vars)),
            file=sys.stderr
        )
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth, details=True)
    cureable = cureable_nodes(nodes)

    if args.mode == 'info':
        print('{} node(s) in a state that we can treat'.format(len(cureable)))
        for nid in cureable:
            print('-' * 40)
            print('\n'.join(
                '{:<25s} {}'.format(key, nodes[nid].get(key))
                for key
                in [
                    'uuid',
                    'provision_updated_at',
                    'provision_state',
                    'last_error',
                    'instance_uuid',
                    'extra',
                    'maintenance',
                ]
            ))
        if slack:
            if cureable:
                message = ('{} nodes in correctable error states (no action '
                           'taken)'.format(len(cureable)))
                color = 'xkcd:orange red'
            else:
                error_nodes = sum(1
                    for (nid, n)
                    in nodes.items()
                    if (
                        not n['maintenance'] and
                        n['provision_state'] == 'error'
                    )
                )
                if error_nodes:
                    message = ('No nodes in correctable error states ({} other'
                               ' nodes in error state)').format(error_nodes)
                    color = 'xkcd:yellow'
                else:
                    message = 'No nodes in correctable error states'
                    color = 'xkcd:green'

            slack.post(SUBCOMMAND, message, color=color)
        return

    if len(cureable) == 0:
        if args.verbose:
            message = 'Nothing to do.'
            print(message)
            if slack:
                slack.post(SUBCOMMAND, message, color='xkcd:light grey')
        return

    print('To correct: {}'.format(repr(cureable)))
    if slack:
        message = ['Ironic nodes in correctable error states']
        for nid in cureable:
            message.append(' • `{}`: "{}"'.format(nid, nodes[nid]['last_error']))
        message = '\n'.join(message)
        slack.post(SUBCOMMAND, message, color='xkcd:darkish red')

    reset_ok = []
    too_many = []
    for nid in cureable:
        resetter = NodeResetter(auth, nid, dry_run=args.dry_run)
        try:
            resetter.reset()
        except TooManyResets as e:
            too_many.append(nid)
        except Exception as e:
            if slack:
                error = '{}; check logs'.format(str(e))
                slack.post(SUBCOMMAND, error, color='xkcd:red')
            raise
        else:
            reset_ok.append((nid, resetter.tracker.count()))

    print('Attempted to fix: {}'.format(repr(reset_ok)))
    print('Refused to fix:   {}'.format(repr(too_many)))
    if slack:
        message = []
        if reset_ok:
            message.append('Attempted reset of nodes')
            message.extend(' • `{}`: {} resets'.format(*r) for r in reset_ok)
        if too_many:
            message.append('\nAbstained (already at limit)')
            message.extend(' • `{}`'.format(*r) for r in reset_ok)

        color = 'xkcd:chartreuse'
        if args.dry_run:
            color = 'xkcd:yellow'
            message.append('dry run, no changes actually made.')
        if too_many:
            color = 'xkcd:orange'

        message = '\n'.join(message)
        slack.post(SUBCOMMAND, message, color=color)
Ejemplo n.º 9
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Remove orphan ports in Neutron referring to an '
                         'inactive Ironic instance')
    parser.add_argument('mode', choices=['info', 'delete'],
        help='Just display data on the conflict ports or delete them')
    parser.add_argument('--ignore-subnet', type=str,
        help='Ignore Neutron ports in this subnet (UUID). Must provide either '
             'this or --ignore-from-ironic-conf. This overrides the conf.')
    parser.add_argument('-c', '--ignore-from-ironic-conf', type=str,
        help='Ignore Neutron ports in the subnet(s) under the '
             '"provisioning_network" network in the "neutron" section of '
             'this configuration file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--force-sane', action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')

    args = parser.parse_args(argv[1:])

    # Validate args

    slack = Slackbot(args.slack, SUBCOMMAND) if args.slack else None
    auth = Auth.from_env_or_args(args=args)

    if args.ignore_subnet:
        ignore_subnets = [args.ignore_subnet]
    elif args.ignore_from_ironic_conf:
        ironic_config = configparser.ConfigParser()
        ironic_config.read(args.ignore_from_ironic_conf)
        net_id = ironic_config['neutron']['provisioning_network']
        network = osrest.neutron.network(auth, net_id)
        ignore_subnets = network['subnets']
    else:
        print('Must provide --ignore-subnet or --ignore-from-ironic-conf',
              file=sys.stderr)
        return -1

    # Do actual work
    try:
        conflict_macs = find_conflicts(auth, ignore_subnets)

        if args.mode == 'info':
            show_info(conflict_macs)
        elif args.mode == 'delete':
            if (not args.force_sane) and len(conflict_macs) > 10:
                raise RuntimeError('(in)sanity check: thinks there are {} conflicting MACs'.format(len(conflict_macs)))

            for mac in conflict_macs.values():
                osrest.neutron_port_delete(auth, mac['neutron_port_id'])

            if slack:
                message = 'Fixed Ironic/Neutron MAC conflicts\n{}'.format(
                    '\n'.join(
                        ' • Neutron Port `{neutron_port_id}` → `{mac}` ← Ironic Node `{ironic_node_id}` (Port `{ironic_port}`)'
                        .format(**m) for m in conflict_macs.values()
                    )
                )
                slack.success(message)
        else:
            print('unknown command', file=sys.stderr)
            return -1
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 10
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Kick Ironic nodes that are in an common/known error state')
    parser.add_argument(
        'mode',
        choices=['info', 'reset'],
        help='Just display data on the stuck nodes or reset their states')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--dry-run',
                        action='store_true',
                        help='Dry run, don\'t actually do anything')

    args = parser.parse_args(argv[1:])

    slack = Slackbot(
        args.slack,
        script_name='ironic-error-resetter') if args.slack else None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = Auth(os_vars)

    try:
        nodes = osrest.ironic_nodes(auth, details=True)
        cureable = cureable_nodes(nodes)

        if args.mode == 'info':
            print('{} node(s) in a state that we can treat'.format(
                len(cureable)))
            for nid in cureable:
                print('-' * 40)
                print('\n'.join('{:<25s} {}'.format(key, nodes[nid].get(key))
                                for key in [
                                    'uuid',
                                    'provision_updated_at',
                                    'provision_state',
                                    'last_error',
                                    'instance_uuid',
                                    'extra',
                                    'maintenance',
                                ]))
            return

        if len(cureable) == 0:
            if args.verbose:
                print('Nothing to do.')
            return

        print('To correct: {}'.format(repr(cureable)))

        reset_ok = []
        too_many = []
        for nid in cureable:
            resetter = NodeResetter(auth, nid, dry_run=args.dry_run)
            resetter.reset()
            reset_ok.append((nid, resetter.tracker.count()))

        message_lines = []
        if reset_ok:
            message_lines.append('Performed reset of nodes')
            message_lines.extend(' • `{}`: {} resets'.format(*r)
                                 for r in reset_ok)
        if too_many:
            message_lines.append('Skipped (already at limit)')
            message_lines.extend(' • `{}`'.format(r) for r in too_many)
        if args.dry_run:
            message_lines.append('dry run, no changes actually made.')

        message = '\n'.join(message_lines)

        print(message)

        if slack and (not args.dry_run):
            slack.success(message)
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 11
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument('-w', '--whitelist', type=str,
        help='File of project/tenant IDs/names to ignore, one per line. '
             'Ignores case and dashes.')
    parser.add_argument('action', choices=['info', 'delete'],
        help='Just display info or actually delete them?')
    parser.add_argument('type', choices=list(RESOURCE_QUERY),
        help='Grab floating IPs or ports?')
    parser.add_argument('idle_days', type=float,
        help='Number of days since last active instance in project was '
        'deleted to consider it idle.')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Authv2.from_env_or_args(args=args)

    slack = Slackbot(args.slack, script_name='neutron-reaper') if args.slack else None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {line for line in f}

    db = mysqlargs.connect()
    db.version = query.ROCKY

    try:
        to_delete = find_reapable_resources(db=db, auth=auth, type_=args.type, idle_days=args.idle_days, whitelist=whitelist)

        thing = '{}{}'.format(
            {'ip': 'floating IP', 'port': 'port'}[args.type],
            ('' if len(to_delete) == 1 else 's'),
        )

        if to_delete:
            if args.action == 'delete':
                for resource_id in to_delete:
                    RESOURCE_DELETE_COMMAND[args.type](auth, resource_id)
                message = (
                    'Commanded deletion of *{} {}* ({:.0f} day grace-period)'
                    .format(len(to_delete), thing, args.idle_days)
                )

                print(message)

                if slack:
                    slack.message(message)
            else:
                print((
                    'Found *{} {}* to delete ({:.0f} day grace-period):\n{}'
                    .format(len(to_delete), thing, args.idle_days, to_delete)
                ))
        else:
            print('No {} to delete ({:.0f} day grace-period)'.format(thing, args.idle_days))

    except:
        if slack:
            slack.exception()
        raise
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Reserve nodes for maintenance')
    append_global_identity_args(parser, argv)

    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument('--operator',
                        type=str,
                        required=True,
                        help='Chameleon account username of the operator')
    parser.add_argument('--nodes',
                        type=str,
                        required=True,
                        help='node ids or node names; comma separated')
    parser.add_argument('--reason',
                        type=str,
                        required=True,
                        help='maintenance reasons')
    parser.add_argument('--dry-run',
                        action="store_true",
                        help='perform a trial run without making reservations')
    parser.add_argument(
        '--start-time',
        type=valid_date,
        default=None,
        help=
        'lease start time (YYYY-mm-DD HH:MM:SS); if not given, start at the earliest possible datetime'
    )
    parser.add_argument(
        '--estimate-hours',
        type=int,
        default=168,
        help=
        'estimated hours required for maintenance; default is 168 hours (1 week)'
    )

    args = parser.parse_args(argv[1:])

    slack = Slackbot(
        args.slack,
        script_name='maintenance-reservation') if args.slack else None

    # connect to database
    mysqlargs.extract(args)
    db = mysqlargs.connect()

    # keystone authentication
    auth_args = {
        'auth_url': args.os_auth_url,
        'username': args.os_username,
        'password': args.os_password,
        'project_name': args.os_project_name,
        'region_name': args.os_region_name,
        'interface': 'public'
    }
    if args.os_user_domain_name:
        auth_args['user_domain_name'] = args.os_user_domain_name
    if args.os_project_domain_name:
        auth_args['project_domain_name'] = args.os_project_domain_name
    # get admin session for node information
    admin_sess = get_session(**auth_args)
    # get maint session for creating lease
    auth_args['project_name'] = 'maintenance'
    maint_sess = get_session(**auth_args)

    try:
        # get node details
        nodes = get_nodes(admin_sess, args.nodes.split(','))

        report_info = {}
        for node in nodes:
            lease_start_time = args.start_time
            if not lease_start_time:
                # find the earliest reservation time for the node
                lease_start_time = get_node_earliest_reserve_time(
                    db, node.uuid, args.estimate_hours)
            else:
                # convert to utc
                lease_start_time = lease_start_time.replace(
                    tzinfo=tz.tzlocal()).astimezone(tz.gettz('UTC'))
            # reserve
            reserve_args = {
                'sess': maint_sess,
                'node': node,
                'start_time': lease_start_time,
                'requested_hours': args.estimate_hours,
                'reason': args.reason,
                'operator': args.operator,
                'dryrun': args.dry_run
            }
            start_time_str, end_time_str = reserve(**reserve_args)
            report_info[node.name] = (start_time_str, end_time_str)

        # summary
        report_lines = [('Node {node_name} at {region} is under maintenance '
                         'from {start_time} to {end_time}').format(
                             node_name=key,
                             region=args.os_region_name,
                             start_time=value[0],
                             end_time=value[1])
                        for key, value in report_info.items()]

        if report_lines:
            report = '\n'.join(report_lines)

            print(report)

            if slack:
                slack.message(report)
        else:
            print('nothing reserved!')
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 13
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Kick Ironic nodes that '
        'refer to a deleted/nonexistant Nova instance')

    parser.add_argument('mode', choices=['info', 'delete'],
        help='Just display data on the bound nodes or delete them')
    parser.add_argument('--slack', type=str,
        help='JSON file with Slack webhook information to send a notification to')
    parser.add_argument('--osrc', type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--force-sane', action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')
    parser.add_argument('--force-insane', action='store_true',
        help=argparse.SUPPRESS) # for testing

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)}
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print(
            'Missing required OS values in env/rcfile: {}'
            .format(', '.join(missing_os_vars)),
            file=sys.stderr
        )
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth)
    instances = osrest.nova_instances(auth)

    node_instance_map = {
        n['instance_uuid']: n
        for n
        in nodes.values()
        if n['instance_uuid'] is not None
    }

    node_instance_ids = set(node_instance_map)
    instance_ids = set(instances)

    unbound_instances = node_instance_ids - instance_ids

    if args.mode == 'info':
        # no-op
        if unbound_instances:
            print('ZOMBIE INSTANCES ON NODES')
        else:
            print('No zombies currently.')
        for inst_id in unbound_instances:
            node = node_instance_map[inst_id]

            assert inst_id not in instances, 'contradiction, this should be impossible'

            print('-----')
            print('Ironic Node\n'
                  '  ID:       {}'.format(node['uuid']))
            print('  Instance: {}'.format(node['instance_uuid']))
            print('  State:    {}'.format(node['provision_state']))

        if slack:
            if unbound_instances:
                message = ('{} nodes with dead instances (no action taken)'
                           .format(len(unbound_instances)))
                color = 'xkcd:orange red'
            else:
                message = 'No nodes with dead instances.'
                color = 'xkcd:green'
            slack.post(SUBCOMMAND, message, color=color)

    elif args.mode == 'delete':
        if not args.force_sane or args.force_insane:
            # sanity check(s) to avoid doing something stupid
            if len(instance_ids) == 0 and len(unbound_instances) != 0:
                _thats_crazy('(in)sanity check: 0 running instances(?!)', slack)

            ubi_limit = 20 if not args.force_insane else -1
            if len(unbound_instances) > ubi_limit:
                _thats_crazy(
                    '(in)sanity check: it thinks there are {} unbound instances'
                        .format(len(unbound_instances)),
                    slack,
                )

        if slack:
            if unbound_instances:
                message = 'Possible Ironic nodes with nonexistant instances:\n{}'.format(
                    '\n'.join(
                        ' • node `{}` → instance `{}`'.format(
                            node_instance_map[i]['uuid'],
                            node_instance_map[i]['instance_uuid'])
                        for i in unbound_instances
                    )
                )
                color = 'xkcd:darkish red'
            elif args.verbose:
                message = 'No Ironic nodes visibly clinging to dead instances'
                color = 'xkcd:light grey'
            else:
                message = None

            if message:
                slack.post(SUBCOMMAND, message, color=color)

        try:
            for inst_id in unbound_instances:
                node = node_instance_map[inst_id]
                node_id = node['uuid']
                if node['provision_state'] == 'available':
                    clear_node_instance_data(auth, node_id)
                else:
                    osrest.ironic_node_set_state(auth, node_id, 'deleted')
        except Exception as e:
            if slack:
                error = '{} while trying to clean instances; check logs for traceback'.format(str(e))
                slack.post(SUBCOMMAND, error, color='xkcd:red')
            raise
        else:
            if unbound_instances and slack:
                ok_message = (
                    'Cleaned {} instance(s).'
                    .format(len(unbound_instances))
                )
                slack.post(SUBCOMMAND, ok_message, color='xkcd:chartreuse')
Ejemplo n.º 14
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Detects orphan leases and remove them.')

    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-d',
        '--dbversion',
        type=str,
        help=
        'Version of the database. Schemas differ, pick the appropriate one.',
        choices=[query.LIBERTY, query.ROCKY],
        default=query.ROCKY)
    parser.add_argument('--kvm', help='Run at KVM site', action='store_true')
    osapi.add_arguments(parser)

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    kvm = args.kvm

    slack = Slackbot(args.slack,
                     script_name='orphan-detector') if args.slack else None

    try:
        db = mysqlargs.connect()
        db.version = args.dbversion

        if kvm:
            # at kvm site
            os_vars = {
                k: os.environ[k]
                for k in os.environ if k.startswith('OS_')
            }
            if args.osrc:
                os_vars.update(osapi.load_osrc(args.osrc))

            auth = v2.Password(username=os_vars['OS_USERNAME'],
                               password=os_vars['OS_PASSWORD'],
                               tenant_name=os_vars['OS_TENANT_NAME'],
                               auth_url=os_vars['OS_AUTH_URL'])
            sess = session.Session(auth=auth)
            keystone = client.Client(session=sess)

            orphan_instances = get_orphan_instances_kvm(db, keystone)
        else:
            orphan_instances = get_orphan_instances(db)

        orphan_instances_report = generate_report(
            orphan_instances, "-" * 45 + "ORPHAN INSTANCES" + "-" * 45)

        if orphan_instances_report:
            print(orphan_instances_report)

            if slack:
                slack.error(orphan_instances_report)
        else:
            print('No orphan instances detected')

        # Additionally perform lease report for CHI
        if not kvm:
            orphan_leases_report = generate_report(
                get_orphan_leases(db), "-" * 45 + "ORPHAN LEASES" + "-" * 45)

            if orphan_leases_report:
                print(orphan_leases_report)

                if slack:
                    slack.error(orphan_leases_report)
            else:
                print('No orphan leases detected')
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 15
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('floating IP reaper')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-w',
        '--whitelist',
        type=str,
        help='File of project/tenant IDs to ignore, one per line.')
    parser.add_argument(
        '--grace-days',
        type=int,
        required=True,
        help='Number of days since last used to consider to be idle')
    parser.add_argument('--dryrun', help='dryrun mode', action='store_true')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Auth.from_env_or_args(args=args)

    slack = Slackbot(args.slack,
                     script_name='floatingip-reaper') if args.slack else None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {line.rstrip('\n') for line in f}

    db = mysqlargs.connect()
    db.version = query.ROCKY

    try:
        result = reaper(db=db,
                        auth=auth,
                        grace_days=args.grace_days,
                        whitelist=whitelist,
                        dryrun=args.dryrun)
        if result and not args.dryrun:
            message_lines = []
            for proj, ips in result.items():
                message_lines.append(
                    'Reclaimed *{} floating ips* from project {} ({:.0f} day grace-period)'
                    .format(str(len(ips)), proj, args.grace_days))
            message = '\n'.join(message_lines)
            print(message)

            if slack:
                slack.message(message)
    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 16
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output to Slack if there was nothing to do.')
    parser.add_argument(
        '--multiport',
        action='store_true',
        help='Enable if Ironic nodes may have multiple ports associated.')
    parser.add_argument('action',
                        choices=['info', 'clean'],
                        help='Just display info or actually fix them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    auth = osapi.Auth.from_env_or_args(args=args)
    slack = Slackbot(args.slack,
                     script_name='dirty-ports') if args.slack else None
    assert_single = False if args.multiport else True
    take_action = args.action == 'clean'

    db = mysqlargs.connect()

    try:
        bad_ports = identify_dirty_ports(auth, assert_single)

        if bad_ports:
            str_ports = '\n'.join(
                ' • port `{uuid}` on node `{node_uuid}`'.format(**p)
                for p in bad_ports)

            if take_action:
                clean_ports(db, bad_ports)
                message = "Cleaned {} ports with `internal_info` data on `available` nodes:\n{}".format(
                    len(bad_ports), str_ports)
                print(message)

                if slack:
                    slack.success(message)
            else:
                print("(read-only mode, not cleaning ports):\n{}".format(
                    str_ports))

    except:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 17
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        description='Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)
    osapi.add_arguments(parser)

    parser.add_argument(
        '-w',
        '--whitelist',
        type=str,
        help='File of project/tenant IDs/names to ignore, one per line. '
        'Ignores case and dashes.')
    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument(
        '-d',
        '--dbversion',
        type=str,
        help=
        'Version of the database. Schemas differ, pick the appropriate one.',
        choices=[query.LIBERTY, query.OCATA],
        default=query.LIBERTY)
    parser.add_argument('--kvm', help='Run at KVM site', action='store_true')

    parser.add_argument('action',
                        choices=['info', 'delete'],
                        help='Just display info or actually delete them?')
    parser.add_argument('type',
                        choices=list(RESOURCE_QUERY),
                        help='Grab floating IPs or ports?')
    parser.add_argument(
        'idle_days',
        type=float,
        help='Number of days since last active instance in project was '
        'deleted to consider it idle.')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Auth.from_env_or_args(args=args)

    if args.action == 'delete' and args.type == 'port' and args.dbversion == 'ocata':
        print(
            'Checking ports on Ocata isn\'t validated, refusing to '
            'automatically delete.',
            file=sys.stderr)
        sys.exit(1)

    if args.slack:
        slack = Slackbot(args.slack, script_name='neutron-reaper')
    else:
        slack = None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {normalize_project_name(line, args.kvm) for line in f}

    db = mysqlargs.connect()
    db.version = args.dbversion

    kwargs = {
        'db': db,
        'auth': auth,
        'type_': args.type,
        'idle_days': args.idle_days,
        'whitelist': whitelist,
        'kvm': args.kvm,
        'describe': args.action == 'info',
        'quiet': args.quiet,
    }
    if slack:
        with slack:  # log exceptions
            remove_count = reaper(**kwargs)
    else:
        remove_count = reaper(**kwargs)

    if slack and (args.action == 'delete') and ((not args.quiet)
                                                or remove_count):
        thing = '{}{}'.format(
            {
                'ip': 'floating IP',
                'port': 'port'
            }[args.type],
            ('' if remove_count == 1 else 's'),
        )

        if remove_count > 0:
            message = (
                'Commanded deletion of *{} {}* ({:.0f} day grace-period)'.
                format(remove_count, thing, args.idle_days))
            color = '#000000'
        else:
            message = ('No {} to delete ({:.0f} day grace-period)'.format(
                thing, args.idle_days))
            color = '#cccccc'

        slack.post('neutron-reaper', message, color=color)
Ejemplo n.º 18
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Unutilized Lease Reaper')
    parser.add_argument('-w',
                        '--warn-hours',
                        type=int,
                        help='Number of hours after which to warn user.',
                        default=DEFAULT_WARN_HOURS)
    parser.add_argument('-r',
                        '--grace-hours',
                        type=int,
                        help='Number of hours after which to remove lease.',
                        default=DEFAULT_GRACE_HOURS)
    parser.add_argument('action',
                        choices=['info', 'delete'],
                        help='Just display info or actually delete them?')
    parser.add_argument('--sender',
                        type=str,
                        help='Email address of sender.',
                        default='*****@*****.**')

    args = parser.parse_args(argv[1:])
    auth = osapi.Auth.from_env_or_args(args=args)

    assert args.grace_hours > args.warn_hours, (
        "Grace hours must be greater than warning period.")

    if args.slack:
        slack = Slackbot(args.slack, script_name='unutilized-leases-reaper')
    else:
        slack = None

    try:
        sender = args.sender
        warn_period = args.warn_hours
        grace_period = args.grace_hours
        warn, terminate = find_leases_in_violation(auth, warn_period,
                                                   grace_period)

        if (len(warn) + len(terminate) > 0):
            if args.action == 'delete':
                for lease in warn:
                    if lease not in terminate:
                        send_notification(
                            auth, lease, sender, warn_period, grace_period,
                            "Your lease {} is idle and may be terminated.".
                            format(lease['name']),
                            _email.IDLE_LEASE_WARNING_EMAIL_BODY)

                for lease in terminate:
                    blazar.lease_delete(auth, lease['id'])
                    send_notification(
                        auth, lease, sender, warn_period, grace_period,
                        "Your lease {} has been terminated.".format(
                            lease['name']),
                        _email.IDLE_LEASE_TERMINATION_EMAIL_BODY)

                message = ('Warned deletion of *{} idle leases* '
                           'Commanded deletion of *{} idle leases* '
                           '(Unutilized lease violation)'.format(
                               len(warn), len(terminate)))

                print(message)

                if slack:
                    slack.message(message)
            else:
                pprint(
                    dict(warn=[
                        dict(lease_id=l['id'],
                             nodes=[n['uuid'] for n in l['nodes']])
                        for l in warn
                    ],
                         terminate=[
                             dict(lease_id=l['id'],
                                  nodes=[n['uuid'] for n in l['nodes']])
                             for l in terminate
                         ]))
        else:
            print('No leases to warn or delete.')
    except Exception:
        if slack:
            slack.exception()
        raise
Ejemplo n.º 19
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Kick Ironic nodes that refer to a deleted/nonexistant Nova instance')

    parser.add_argument(
        'mode',
        choices=['info', 'delete'],
        help='Just display data on the bound nodes or delete them')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument(
        '--osrc',
        type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument(
        '--force-sane',
        action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')
    parser.add_argument('--force-insane',
                        action='store_true',
                        help=argparse.SUPPRESS)  # for testing

    args = parser.parse_args(argv[1:])

    slack = Slackbot(args.slack,
                     script_name='undead-instances') if args.slack else None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth)
    instances = osrest.nova_instances(auth)

    node_instance_map, unbound_instances = find_unbound_instances(
        auth, nodes, instances)

    if args.mode == 'info':
        # no-op
        if unbound_instances:
            print('ZOMBIE INSTANCES ON NODES')
        else:
            print('No zombies currently.')
        for inst_id in unbound_instances:
            node = node_instance_map[inst_id]

            assert inst_id not in instances, 'contradiction, this should be impossible'

            print('-----')
            print('Ironic Node\n' '  ID:       {}'.format(node['uuid']))
            print('  Instance: {}'.format(node['instance_uuid']))
            print('  State:    {}'.format(node['provision_state']))

    elif args.mode == 'delete':
        if not args.force_sane or args.force_insane:
            # sanity check(s) to avoid doing something stupid
            if len(instance_ids) == 0 and len(unbound_instances) != 0:
                _thats_crazy('(in)sanity check: 0 running instances(?!)',
                             slack)

            ubi_limit = 20 if not args.force_insane else -1
            if len(unbound_instances) > ubi_limit:
                _thats_crazy(
                    '(in)sanity check: it thinks there are {} unbound instances'
                    .format(len(unbound_instances)),
                    slack,
                )

        try:
            for inst_id in unbound_instances:
                node = node_instance_map[inst_id]
                node_id = node['uuid']
                if node['provision_state'] == 'available':
                    clear_node_instance_data(auth, node_id)
                else:
                    osrest.ironic_node_set_state(auth, node_id, 'deleted')

            message = 'Fixed Ironic nodes with nonexistant instances:\n{}'.format(
                '\n'.join(' • node `{}` → instance `{}`'.format(
                    node_instance_map[i]['uuid'], node_instance_map[i]
                    ['instance_uuid']) for i in unbound_instances))

            print(message)

            if slack:
                slack.success(message)
        except:
            if slack:
                slack.exception()
            raise