Esempio n. 1
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Diagnose node(s) for error states.')
    parser.add_argument('--nodes', nargs="+", type=str, default=[])

    args = parser.parse_args(sys.argv[1:])
    auth = osapi.Auth.from_env_or_args(args=args)

    nodes = {
        nid: dict(n, **{"ailments": []}) for nid, n
        in osrest.ironic_nodes(auth, details=True).items()
        if n['name'] in args.nodes or not args.nodes}

    node_in_error_state(nodes)
    node_stuck_deleting(nodes)
    node_maintenance_state_error(auth, nodes)
    node_not_in_freepool(auth, nodes)
    node_undead_instance(auth, nodes)
    resource_provider_failure(auth, nodes)

    for node_id, node in nodes.items():
        print("Checking Node {name} (uuid: {uuid})".format(
            name=node['name'], uuid=node_id))

        if node.get("ailments"):
            for ailment in node.get("ailments"):
                print("\t{node_name}: {msg}".format(
                    node_name=node.get("name"),
                    msg=NODE_AILMENTS_MESSAGES[ailment]))
        else:
            print("\tNODE PASSED ALL TESTS. EVERYTHING SHOULD BE FINE.")
Esempio n. 2
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Strange things, as per someone\'s definition of "strange".')
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(osapi.load_osrc(args.osrc))
    missing_os_vars = set(osapi.Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = osapi.Auth(os_vars)

    nodes = osrest.ironic_nodes(auth, details=True)
    # hypervisors = osrest.nova_hypervisors(auth, details=True)

    errored_nodes = [
        n for n in nodes.values()
        if n['provision_state'] == 'error' and not n['maintenance']
    ]

    if not errored_nodes:
        if args.verbose:
            print('All good.')
        return

    message = ['Ironic nodes in "error" provision state, not in maintenance']
    message.extend(
        '• `{}`, last error: {}'.format(n['uuid'], n.get('last_error'))
        for n in errored_nodes)
    message = '\n'.join(message)

    print(message.replace('•', '*'))
    if slack:
        slack.post(SUBCOMMAND, message, color='xkcd:red')
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Fixes issues with orphaned resource providers.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument('action',
                        choices=['info', 'update'],
                        help='Just display info or actually update them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    slack = Slackbot(
        args.slack,
        script_name='orphan-resource-providers') if args.slack else None

    try:
        db = mysqlargs.connect()

        update_count = resource_providers_fixer(db=db,
                                                describe=args.action == 'info',
                                                quiet=args.quiet)

        if args.action == 'update':
            if update_count > 0:
                message = ('Commanded update of *{} resource providers*'.
                           format(update_count))

                print(message)

                slack.message(message)
            elif not args.quiet:
                print('No resource providers to delete')
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 4
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Check and notify users about their reservation')
    parser.add_argument(
        '--sender',
        type=str,
        help='Email address of sender',
        default='*****@*****.**')

    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    db = mysqlargs.connect()
    db.version = query.ROCKY

    auth = osapi.Auth.from_env_or_args(args=args)
    email_host = _email.get_host()

    # get all future reservations start next day in UTC
    for email_pack in get_reservations_start_next_day(db):
        email_pack['content_vars']['site'] = auth.region
        html = _email.render_template(
            _email.RESERVATION_START_EMAIL_BODY,
            vars=email_pack['content_vars'])
        subject = 'Chameleon lease {} starts tomorrow'.format(
            email_pack['content_vars']['leasename'])
        _email.send(
            email_host,
            email_pack['address'],
            args.sender, subject,
            html)
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Retired node state enforcer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)
    parser.add_argument('--dryrun', help='dryrun mode', action='store_true')
    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    conn = mysqlargs.connect()
    slack = Slackbot(args.slack,
                     script_name='enforce-retirement') if args.slack else None

    # Find retired nodes and ensure they are non reservable in blazar
    correct_state(conn, slack, dryrun=args.dryrun)
Esempio n. 6
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Remove orphan ports in Neutron referring to an '
                         'inactive Ironic instance')
    parser.add_argument('mode', choices=['info', 'delete'],
        help='Just display data on the conflict ports or delete them')
    parser.add_argument('--ignore-subnet', type=str,
        help='Ignore Neutron ports in this subnet (UUID). Must provide either '
             'this or --ignore-from-ironic-conf. This overrides the conf.')
    parser.add_argument('-c', '--ignore-from-ironic-conf', type=str,
        help='Ignore Neutron ports in the subnet(s) under the '
             '"provisioning_network" network in the "neutron" section of '
             'this configuration file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--force-sane', action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')

    args = parser.parse_args(argv[1:])

    # Validate args

    slack = Slackbot(args.slack, SUBCOMMAND) if args.slack else None
    auth = Auth.from_env_or_args(args=args)

    if args.ignore_subnet:
        ignore_subnets = [args.ignore_subnet]
    elif args.ignore_from_ironic_conf:
        ironic_config = configparser.ConfigParser()
        ironic_config.read(args.ignore_from_ironic_conf)
        net_id = ironic_config['neutron']['provisioning_network']
        network = osrest.neutron.network(auth, net_id)
        ignore_subnets = network['subnets']
    else:
        print('Must provide --ignore-subnet or --ignore-from-ironic-conf',
              file=sys.stderr)
        return -1

    # Do actual work
    try:
        conflict_macs = find_conflicts(auth, ignore_subnets)

        if args.mode == 'info':
            show_info(conflict_macs)
        elif args.mode == 'delete':
            if (not args.force_sane) and len(conflict_macs) > 10:
                raise RuntimeError('(in)sanity check: thinks there are {} conflicting MACs'.format(len(conflict_macs)))

            for mac in conflict_macs.values():
                osrest.neutron_port_delete(auth, mac['neutron_port_id'])

            if slack:
                message = 'Fixed Ironic/Neutron MAC conflicts\n{}'.format(
                    '\n'.join(
                        ' • Neutron Port `{neutron_port_id}` → `{mac}` ← Ironic Node `{ironic_node_id}` (Port `{ironic_port}`)'
                        .format(**m) for m in conflict_macs.values()
                    )
                )
                slack.success(message)
        else:
            print('unknown command', file=sys.stderr)
            return -1
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 7
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Kick Ironic nodes that are in an common/known error state')
    parser.add_argument(
        'mode',
        choices=['info', 'reset'],
        help='Just display data on the stuck nodes or reset their states')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--dry-run',
                        action='store_true',
                        help='Dry run, don\'t actually do anything')

    args = parser.parse_args(argv[1:])

    slack = Slackbot(
        args.slack,
        script_name='ironic-error-resetter') if args.slack else None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = Auth(os_vars)

    try:
        nodes = osrest.ironic_nodes(auth, details=True)
        cureable = cureable_nodes(nodes)

        if args.mode == 'info':
            print('{} node(s) in a state that we can treat'.format(
                len(cureable)))
            for nid in cureable:
                print('-' * 40)
                print('\n'.join('{:<25s} {}'.format(key, nodes[nid].get(key))
                                for key in [
                                    'uuid',
                                    'provision_updated_at',
                                    'provision_state',
                                    'last_error',
                                    'instance_uuid',
                                    'extra',
                                    'maintenance',
                                ]))
            return

        if len(cureable) == 0:
            if args.verbose:
                print('Nothing to do.')
            return

        print('To correct: {}'.format(repr(cureable)))

        reset_ok = []
        too_many = []
        for nid in cureable:
            resetter = NodeResetter(auth, nid, dry_run=args.dry_run)
            resetter.reset()
            reset_ok.append((nid, resetter.tracker.count()))

        message_lines = []
        if reset_ok:
            message_lines.append('Performed reset of nodes')
            message_lines.extend(' • `{}`: {} resets'.format(*r)
                                 for r in reset_ok)
        if too_many:
            message_lines.append('Skipped (already at limit)')
            message_lines.extend(' • `{}`'.format(r) for r in too_many)
        if args.dry_run:
            message_lines.append('dry run, no changes actually made.')

        message = '\n'.join(message_lines)

        print(message)

        if slack and (not args.dry_run):
            slack.success(message)
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 8
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument('-w', '--whitelist', type=str,
        help='File of project/tenant IDs/names to ignore, one per line. '
             'Ignores case and dashes.')
    parser.add_argument('action', choices=['info', 'delete'],
        help='Just display info or actually delete them?')
    parser.add_argument('type', choices=list(RESOURCE_QUERY),
        help='Grab floating IPs or ports?')
    parser.add_argument('idle_days', type=float,
        help='Number of days since last active instance in project was '
        'deleted to consider it idle.')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Authv2.from_env_or_args(args=args)

    slack = Slackbot(args.slack, script_name='neutron-reaper') if args.slack else None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {line for line in f}

    db = mysqlargs.connect()
    db.version = query.ROCKY

    try:
        to_delete = find_reapable_resources(db=db, auth=auth, type_=args.type, idle_days=args.idle_days, whitelist=whitelist)

        thing = '{}{}'.format(
            {'ip': 'floating IP', 'port': 'port'}[args.type],
            ('' if len(to_delete) == 1 else 's'),
        )

        if to_delete:
            if args.action == 'delete':
                for resource_id in to_delete:
                    RESOURCE_DELETE_COMMAND[args.type](auth, resource_id)
                message = (
                    'Commanded deletion of *{} {}* ({:.0f} day grace-period)'
                    .format(len(to_delete), thing, args.idle_days)
                )

                print(message)

                if slack:
                    slack.message(message)
            else:
                print((
                    'Found *{} {}* to delete ({:.0f} day grace-period):\n{}'
                    .format(len(to_delete), thing, args.idle_days, to_delete)
                ))
        else:
            print('No {} to delete ({:.0f} day grace-period)'.format(thing, args.idle_days))

    except:
        if slack:
            slack.exception()
        raise
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Reserve nodes for maintenance')
    append_global_identity_args(parser, argv)

    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument('--operator',
                        type=str,
                        required=True,
                        help='Chameleon account username of the operator')
    parser.add_argument('--nodes',
                        type=str,
                        required=True,
                        help='node ids or node names; comma separated')
    parser.add_argument('--reason',
                        type=str,
                        required=True,
                        help='maintenance reasons')
    parser.add_argument('--dry-run',
                        action="store_true",
                        help='perform a trial run without making reservations')
    parser.add_argument(
        '--start-time',
        type=valid_date,
        default=None,
        help=
        'lease start time (YYYY-mm-DD HH:MM:SS); if not given, start at the earliest possible datetime'
    )
    parser.add_argument(
        '--estimate-hours',
        type=int,
        default=168,
        help=
        'estimated hours required for maintenance; default is 168 hours (1 week)'
    )

    args = parser.parse_args(argv[1:])

    slack = Slackbot(
        args.slack,
        script_name='maintenance-reservation') if args.slack else None

    # connect to database
    mysqlargs.extract(args)
    db = mysqlargs.connect()

    # keystone authentication
    auth_args = {
        'auth_url': args.os_auth_url,
        'username': args.os_username,
        'password': args.os_password,
        'project_name': args.os_project_name,
        'region_name': args.os_region_name,
        'interface': 'public'
    }
    if args.os_user_domain_name:
        auth_args['user_domain_name'] = args.os_user_domain_name
    if args.os_project_domain_name:
        auth_args['project_domain_name'] = args.os_project_domain_name
    # get admin session for node information
    admin_sess = get_session(**auth_args)
    # get maint session for creating lease
    auth_args['project_name'] = 'maintenance'
    maint_sess = get_session(**auth_args)

    try:
        # get node details
        nodes = get_nodes(admin_sess, args.nodes.split(','))

        report_info = {}
        for node in nodes:
            lease_start_time = args.start_time
            if not lease_start_time:
                # find the earliest reservation time for the node
                lease_start_time = get_node_earliest_reserve_time(
                    db, node.uuid, args.estimate_hours)
            else:
                # convert to utc
                lease_start_time = lease_start_time.replace(
                    tzinfo=tz.tzlocal()).astimezone(tz.gettz('UTC'))
            # reserve
            reserve_args = {
                'sess': maint_sess,
                'node': node,
                'start_time': lease_start_time,
                'requested_hours': args.estimate_hours,
                'reason': args.reason,
                'operator': args.operator,
                'dryrun': args.dry_run
            }
            start_time_str, end_time_str = reserve(**reserve_args)
            report_info[node.name] = (start_time_str, end_time_str)

        # summary
        report_lines = [('Node {node_name} at {region} is under maintenance '
                         'from {start_time} to {end_time}').format(
                             node_name=key,
                             region=args.os_region_name,
                             start_time=value[0],
                             end_time=value[1])
                        for key, value in report_info.items()]

        if report_lines:
            report = '\n'.join(report_lines)

            print(report)

            if slack:
                slack.message(report)
        else:
            print('nothing reserved!')
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 10
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Detects orphan leases and remove them.')

    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-d',
        '--dbversion',
        type=str,
        help=
        'Version of the database. Schemas differ, pick the appropriate one.',
        choices=[query.LIBERTY, query.ROCKY],
        default=query.ROCKY)
    parser.add_argument('--kvm', help='Run at KVM site', action='store_true')
    osapi.add_arguments(parser)

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    kvm = args.kvm

    slack = Slackbot(args.slack,
                     script_name='orphan-detector') if args.slack else None

    try:
        db = mysqlargs.connect()
        db.version = args.dbversion

        if kvm:
            # at kvm site
            os_vars = {
                k: os.environ[k]
                for k in os.environ if k.startswith('OS_')
            }
            if args.osrc:
                os_vars.update(osapi.load_osrc(args.osrc))

            auth = v2.Password(username=os_vars['OS_USERNAME'],
                               password=os_vars['OS_PASSWORD'],
                               tenant_name=os_vars['OS_TENANT_NAME'],
                               auth_url=os_vars['OS_AUTH_URL'])
            sess = session.Session(auth=auth)
            keystone = client.Client(session=sess)

            orphan_instances = get_orphan_instances_kvm(db, keystone)
        else:
            orphan_instances = get_orphan_instances(db)

        orphan_instances_report = generate_report(
            orphan_instances, "-" * 45 + "ORPHAN INSTANCES" + "-" * 45)

        if orphan_instances_report:
            print(orphan_instances_report)

            if slack:
                slack.error(orphan_instances_report)
        else:
            print('No orphan instances detected')

        # Additionally perform lease report for CHI
        if not kvm:
            orphan_leases_report = generate_report(
                get_orphan_leases(db), "-" * 45 + "ORPHAN LEASES" + "-" * 45)

            if orphan_leases_report:
                print(orphan_leases_report)

                if slack:
                    slack.error(orphan_leases_report)
            else:
                print('No orphan leases detected')
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 11
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('floating IP reaper')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-w',
        '--whitelist',
        type=str,
        help='File of project/tenant IDs to ignore, one per line.')
    parser.add_argument(
        '--grace-days',
        type=int,
        required=True,
        help='Number of days since last used to consider to be idle')
    parser.add_argument('--dryrun', help='dryrun mode', action='store_true')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Auth.from_env_or_args(args=args)

    slack = Slackbot(args.slack,
                     script_name='floatingip-reaper') if args.slack else None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {line.rstrip('\n') for line in f}

    db = mysqlargs.connect()
    db.version = query.ROCKY

    try:
        result = reaper(db=db,
                        auth=auth,
                        grace_days=args.grace_days,
                        whitelist=whitelist,
                        dryrun=args.dryrun)
        if result and not args.dryrun:
            message_lines = []
            for proj, ips in result.items():
                message_lines.append(
                    'Reclaimed *{} floating ips* from project {} ({:.0f} day grace-period)'
                    .format(str(len(ips)), proj, args.grace_days))
            message = '\n'.join(message_lines)
            print(message)

            if slack:
                slack.message(message)
    except:
        if slack:
            slack.exception()
        raise
Esempio n. 12
0
import sys
import os
import itertools
import re
from hammers.slack import Slackbot
from hammers import osapi, osrest
from hammers.osrest.nova import aggregate_delete, _addremove_host
from hammers.util import base_parser
from hammers import MySqlArgs, query

# Append "/v3" to OS_AUTH_URL, if necesary
auth_url = os.environ["OS_AUTH_URL"]
if not re.search("\/v3$", auth_url):
    os.environ["OS_AUTH_URL"] = auth_url + "/v3"

parser = base_parser(
    'Clean old Nova aggregates tied to expired Blazar leases.')
mysqlargs = MySqlArgs({
    'user': '******',
    'password': '',
    'host': 'localhost',
    'port': '3306',
})
mysqlargs.inject(parser)
args = parser.parse_args(sys.argv[1:])
auth = osapi.Auth.from_env_or_args(args=args)
mysqlargs.extract(args)
conn = mysqlargs.connect()

aggregates = osrest.nova.aggregates(auth)
host_allocs = osrest.blazar.host_allocations(auth)
leases = osrest.blazar.leases(auth)
Esempio n. 13
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Unutilized Lease Reaper')
    parser.add_argument('-w',
                        '--warn-hours',
                        type=int,
                        help='Number of hours after which to warn user.',
                        default=DEFAULT_WARN_HOURS)
    parser.add_argument('-r',
                        '--grace-hours',
                        type=int,
                        help='Number of hours after which to remove lease.',
                        default=DEFAULT_GRACE_HOURS)
    parser.add_argument('action',
                        choices=['info', 'delete'],
                        help='Just display info or actually delete them?')
    parser.add_argument('--sender',
                        type=str,
                        help='Email address of sender.',
                        default='*****@*****.**')

    args = parser.parse_args(argv[1:])
    auth = osapi.Auth.from_env_or_args(args=args)

    assert args.grace_hours > args.warn_hours, (
        "Grace hours must be greater than warning period.")

    if args.slack:
        slack = Slackbot(args.slack, script_name='unutilized-leases-reaper')
    else:
        slack = None

    try:
        sender = args.sender
        warn_period = args.warn_hours
        grace_period = args.grace_hours
        warn, terminate = find_leases_in_violation(auth, warn_period,
                                                   grace_period)

        if (len(warn) + len(terminate) > 0):
            if args.action == 'delete':
                for lease in warn:
                    if lease not in terminate:
                        send_notification(
                            auth, lease, sender, warn_period, grace_period,
                            "Your lease {} is idle and may be terminated.".
                            format(lease['name']),
                            _email.IDLE_LEASE_WARNING_EMAIL_BODY)

                for lease in terminate:
                    blazar.lease_delete(auth, lease['id'])
                    send_notification(
                        auth, lease, sender, warn_period, grace_period,
                        "Your lease {} has been terminated.".format(
                            lease['name']),
                        _email.IDLE_LEASE_TERMINATION_EMAIL_BODY)

                message = ('Warned deletion of *{} idle leases* '
                           'Commanded deletion of *{} idle leases* '
                           '(Unutilized lease violation)'.format(
                               len(warn), len(terminate)))

                print(message)

                if slack:
                    slack.message(message)
            else:
                pprint(
                    dict(warn=[
                        dict(lease_id=l['id'],
                             nodes=[n['uuid'] for n in l['nodes']])
                        for l in warn
                    ],
                         terminate=[
                             dict(lease_id=l['id'],
                                  nodes=[n['uuid'] for n in l['nodes']])
                             for l in terminate
                         ]))
        else:
            print('No leases to warn or delete.')
    except Exception:
        if slack:
            slack.exception()
        raise
Esempio n. 14
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Kick Ironic nodes that refer to a deleted/nonexistant Nova instance')

    parser.add_argument(
        'mode',
        choices=['info', 'delete'],
        help='Just display data on the bound nodes or delete them')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument(
        '--osrc',
        type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument(
        '--force-sane',
        action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')
    parser.add_argument('--force-insane',
                        action='store_true',
                        help=argparse.SUPPRESS)  # for testing

    args = parser.parse_args(argv[1:])

    slack = Slackbot(args.slack,
                     script_name='undead-instances') if args.slack else None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth)
    instances = osrest.nova_instances(auth)

    node_instance_map, unbound_instances = find_unbound_instances(
        auth, nodes, instances)

    if args.mode == 'info':
        # no-op
        if unbound_instances:
            print('ZOMBIE INSTANCES ON NODES')
        else:
            print('No zombies currently.')
        for inst_id in unbound_instances:
            node = node_instance_map[inst_id]

            assert inst_id not in instances, 'contradiction, this should be impossible'

            print('-----')
            print('Ironic Node\n' '  ID:       {}'.format(node['uuid']))
            print('  Instance: {}'.format(node['instance_uuid']))
            print('  State:    {}'.format(node['provision_state']))

    elif args.mode == 'delete':
        if not args.force_sane or args.force_insane:
            # sanity check(s) to avoid doing something stupid
            if len(instance_ids) == 0 and len(unbound_instances) != 0:
                _thats_crazy('(in)sanity check: 0 running instances(?!)',
                             slack)

            ubi_limit = 20 if not args.force_insane else -1
            if len(unbound_instances) > ubi_limit:
                _thats_crazy(
                    '(in)sanity check: it thinks there are {} unbound instances'
                    .format(len(unbound_instances)),
                    slack,
                )

        try:
            for inst_id in unbound_instances:
                node = node_instance_map[inst_id]
                node_id = node['uuid']
                if node['provision_state'] == 'available':
                    clear_node_instance_data(auth, node_id)
                else:
                    osrest.ironic_node_set_state(auth, node_id, 'deleted')

            message = 'Fixed Ironic nodes with nonexistant instances:\n{}'.format(
                '\n'.join(' • node `{}` → instance `{}`'.format(
                    node_instance_map[i]['uuid'], node_instance_map[i]
                    ['instance_uuid']) for i in unbound_instances))

            print(message)

            if slack:
                slack.success(message)
        except:
            if slack:
                slack.exception()
            raise
Esempio n. 15
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser('Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output to Slack if there was nothing to do.')
    parser.add_argument(
        '--multiport',
        action='store_true',
        help='Enable if Ironic nodes may have multiple ports associated.')
    parser.add_argument('action',
                        choices=['info', 'clean'],
                        help='Just display info or actually fix them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    auth = osapi.Auth.from_env_or_args(args=args)
    slack = Slackbot(args.slack,
                     script_name='dirty-ports') if args.slack else None
    assert_single = False if args.multiport else True
    take_action = args.action == 'clean'

    db = mysqlargs.connect()

    try:
        bad_ports = identify_dirty_ports(auth, assert_single)

        if bad_ports:
            str_ports = '\n'.join(
                ' • port `{uuid}` on node `{node_uuid}`'.format(**p)
                for p in bad_ports)

            if take_action:
                clean_ports(db, bad_ports)
                message = "Cleaned {} ports with `internal_info` data on `available` nodes:\n{}".format(
                    len(bad_ports), str_ports)
                print(message)

                if slack:
                    slack.success(message)
            else:
                print("(read-only mode, not cleaning ports):\n{}".format(
                    str_ports))

    except:
        if slack:
            slack.exception()
        raise
Esempio n. 16
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(__doc__)

    parser.add_argument(
        'action',
        choices=['info', 'update'],
        nargs='?',
        default='info',
        help='Info only prints out actions to be taken without doing '
        'anything. Update does them.')
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args(argv[1:])
    auth = osapi.Auth.from_env_or_args(args=args)
    dry_run = args.action == 'info'
    any_updates = False

    blazar_hosts = get_blazar_hosts(auth)
    grid_hosts = get_g5k_hosts(auth)

    blazar_uids = set(blazar_hosts)
    grid_uids = set(grid_hosts)

    uids_both = grid_uids & blazar_uids
    blazar_missing = grid_uids - blazar_uids
    grid_missing = blazar_uids - grid_uids

    if blazar_missing:
        print('Blazar missing node UIDs: {}'.format(blazar_missing))
        any_updates = True
    if grid_missing:
        print('Grid missing node UIDs: {}'.format(grid_missing))
        any_updates = True

    for uid in sorted(uids_both):
        gh = grid_hosts[uid]
        bh = blazar_hosts[uid]

        actions = compare_host(gh, bh)

        # collect updates instead of doing one-by-one to reduce number
        # of requests
        updates = {}
        for action, action_args in actions:
            if action in {'add', 'replace'}:
                key, value = action_args
                updates[key] = str(value)  # blazar likes strings

                if dry_run or args.verbose:
                    if action == 'add':
                        old_value = ''
                    else:
                        old_value = ' (old value: {})'.format(bh[key])
                    print('{} {}({}=\'{}\'){}'.format(uid, action, key, value,
                                                      old_value))
            elif action in {'remove'}:
                key, = action_args
                updates[key] = None

                if dry_run or args.verbose:
                    print('{} {}({}) (old value: {})'.format(
                        uid, action, key, bh[key]))
            else:
                raise RuntimeError('unknown action "{}"'.format(action))

        if updates:
            any_updates = True

            if dry_run:
                continue

            try:
                osrest.blazar.host_update(auth, bh['id'], updates)
            except Exception as e:
                print(e)
                print("UPDATE SKIPPED DUE TO ERROR")
                print("\tNODE ID: {}\n\tUpdate Detail: {}".format(
                    bh['id'], str(updates)))

    if any_updates:
        return 1
    else:
        print("Blazar and G5K repo are synced.")
        return 0