def parse_args():
    """Parse and return command line arguments, validate the host."""
    parser = lib.get_base_parser('Automated reimaging of a single host')
    parser.add_argument(
        '--rename',
        help='FQDN of the new name to rename this host to while reimaging')
    parser.add_argument(
        '--rename-mgmt',
        help='FQDN of the new name management interface, see --rename')
    parser.add_argument('host',
                        metavar='HOST',
                        action='store',
                        help='FQDN of the host to be reimaged')
    parser.add_argument('mgmt',
                        metavar='MGMT',
                        action='store',
                        nargs='?',
                        default=None,
                        help='FQDN of the management interface for the host')

    args = parser.parse_args()

    fqdns = [args.host]

    if args.rename is not None:
        fqdns.append(args.rename)
        if args.no_pxe:
            raise argparse.ArgumentTypeError(
                'The --rename option cannot be used in conjunction with --no-pxe.'
            )

    # Gather the management interfaces, if missing
    if args.mgmt is None:
        mgmts = lib.get_mgmts([args.host])
        args.mgmt = mgmts[args.host]
    else:
        mgmts = {args.host: args.mgmt}

    fqdns.append(args.mgmt)
    if args.rename is not None and args.rename_mgmt is None:
        mgmts.update(lib.get_mgmts([args.rename]))
        fqdns.append(mgmts[args.rename])

    # Perform a quick sanity check on the host and mgmt
    for name in fqdns:
        if '.' not in name or not lib.HOSTS_PATTERN.match(name):
            raise argparse.ArgumentTypeError(
                "Expected FQDN, got '{name}'".format(name=name))

            if not lib.is_hostname_valid(name):
                raise argparse.ArgumentTypeError(
                    "Unable to resolve host '{name}'".format(name=name))

    for mgmt in mgmts.values():
        if '.mgmt.' not in mgmt:
            raise argparse.ArgumentTypeError(
                'The MGMT parameter {} does not follow the *.mgmt.* format'.
                format(mgmt))

    return args
Esempio n. 2
0
def parse_args():
    """Parse and return command line arguments, validate the host."""
    parser = argparse.ArgumentParser(
        description=
        'Downtime a host on Icinga, after running Puppet on the Icinga host.')
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help='debug level logging and cumin output')
    parser.add_argument(
        '-s',
        '--sleep',
        type=int,
        help='amount of seconds to sleep before downtiming the host')
    parser.add_argument('-p',
                        '--phab-task-id',
                        action='store',
                        help='the Phabricator task ID, i.e.: T12345)')
    parser.add_argument('host',
                        metavar='HOST',
                        action='store',
                        help='FQDN of the host to be downtimed')

    args = parser.parse_args()

    # Perform a quick sanity check on the host
    if '.' not in args.host or not lib.HOSTS_PATTERN.match(args.host):
        raise argparse.ArgumentTypeError(
            "Expected FQDN, got '{host}'".format(host=args.host))

        if not lib.is_hostname_valid(args.host):
            raise argparse.ArgumentTypeError(
                "Unable to resolve host '{host}'".format(host=args.host))

    return args
def main():
    args = parse_args()
    user = lib.get_running_user()
    setup_logging(logger, user)

    if not lib.is_hostname_valid(args.host):
        logger.error("{} is not a valid hostname. Exiting.".format(args.host))
        return 1

    # Set Icinga downtime for the host to be upgraded
    icinga_downtime(args.host, "Software upgrade and reboot", 1200)

    # Disable puppet
    lib.disable_puppet([args.host], '{} --{}'.format(NAME, user))

    # Depool and wait a bit for the host to be drained
    if args.depool_cmd:
        if not run_cumin(args.host, [args.depool_cmd]):
            logger.error("Failed depooling {}. Exiting.".format(args.host))
            return 1
    else:
        logging.info("Not performing any depool action as requested (empty --depool-cmd)")

    logging.info("Waiting for {} to be drained.".format(args.host))
    time.sleep(30)

    # Upgrade all pacakges, leave config files untouched, do not prompt
    upgrade_cmd = ("DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::='--force-confdef' "
                   "-o Dpkg::Options::='--force-confold' dist-upgrade")
    if not run_cumin(args.host, [upgrade_cmd], timeout=300):
        logger.error("Failed upgrading {}. Exiting.".format(args.host))
        return 1

    reboot_time = datetime.utcnow()

    lib.reboot_host(args.host)

    boot_time = datetime.utcnow()

    lib.wait_reboot(args.host, start=reboot_time)

    # Enable puppet
    lib.enable_puppet([args.host], '{} --{}'.format(NAME, user))

    # Run puppet
    lib.run_puppet([args.host])

    lib.wait_puppet_run(args.host, start=boot_time)

    # Repool
    if args.repool_cmd:
        if not run_cumin(args.host, [args.repool_cmd]):
            logger.error("Failed repooling {}. Exiting.".format(args.host))
            return 1
    else:
        logging.info("Not performing any repool action as requested (empty --repool-cmd)")

    # Cancel Icinga downtime
    icinga_cancel_downtime(args.host)
    return 0
def main():
    """Run the script."""
    script_name = os.path.basename(__file__)
    args = parse_args()
    user = lib.get_running_user()
    phab_client = lib.get_phabricator_client()
    is_valid_host = lib.is_hostname_valid(args.host)
    actions = []

    if not is_valid_host and not args.force:
        logger.error(
            "{host} is not a valid hostname. Aborting.".format(host=args.host))
        return 1

    # Remove from Puppet and PuppetDB
    lib.puppet_remove_host(args.host)
    actions += ['Revoked Puppet certificate', 'Removed from PuppetDB']

    # Downtime on Icinga both the host and the mgmt host, they will be removed by Puppet
    if is_valid_host:
        try:
            lib.icinga_downtime(args.host,
                                user,
                                args.phab_task_id,
                                title=script_name)
            actions.append('Downtimed host on Icinga')
        except RuntimeError:
            actions.append(
                'Skipped downtime host on Icinga (likely already removed)')

        mgmts = lib.get_mgmts([args.host])
        try:
            lib.icinga_downtime(mgmts[args.host],
                                user,
                                args.phab_task_id,
                                title=script_name)
            actions.append('Downtimed mgmt interface on Icinga')
        except RuntimeError:
            actions.append(
                'Skipped downtime mgmt interface on Icinga (likely already removed)'
            )

    # Remove from DebMonitor
    lib.debmonitor_remove_host(args.host)
    actions.append('Removed from DebMonitor')

    message = (
        '{script} was executed by {user} for {host} and performed the following actions:\n'
        '- {actions}').format(script=script_name,
                              user=user,
                              host=args.host,
                              actions='\n- '.join(actions))
    lib.phabricator_task_update(phab_client, args.phab_task_id, message)

    return 0
Esempio n. 5
0
def main():
    args = parse_args()
    user = lib.get_running_user()
    setup_logging(logger, user)

    if not lib.is_hostname_valid(args.host):
        logger.error("{} is not a valid hostname. Exiting.".format(args.host))
        return 1

    # Set Icinga downtime for the host to be upgraded
    icinga_downtime(args.host, "Software upgrade and reboot", 1200)

    # Depool and wait a bit for the host to be drained
    if args.depool_cmd:
        if not run_cumin(args.host, [args.depool_cmd]):
            logger.error("Failed depooling {}. Exiting.".format(args.host))
            return 1
    else:
        logging.info(
            "Not performing any depool action as requested (empty --depool-cmd)"
        )

    logging.info("Waiting for {} to be drained.".format(args.host))
    time.sleep(30)

    # Run apt full-upgrade
    if not run_cumin(args.host, ['apt -y full-upgrade'], timeout=300):
        logger.error("Failed upgrading {}. Exiting.".format(args.host))
        return 1

    reboot_time = datetime.utcnow()

    lib.reboot_host(args.host)

    boot_time = datetime.utcnow()

    lib.wait_reboot(args.host, start=reboot_time)

    lib.wait_puppet_run(args.host, start=boot_time)

    # Repool
    if args.repool_cmd:
        if not run_cumin(args.host, [args.repool_cmd]):
            logger.error("Failed repooling {}. Exiting.".format(args.host))
            return 1
    else:
        logging.info(
            "Not performing any repool action as requested (empty --repool-cmd)"
        )

    # Cancel Icinga downtime
    icinga_cancel_downtime(args.host)
    return 0
Esempio n. 6
0
def parse_args():
    """Parse and return command line arguments, validate the hosts."""
    parser = lib.get_base_parser('Automated reimaging of a list of hosts')
    parser.add_argument(
        '--sequential', action='store_true',
        help=('run one reimage at a time, sequentially. By default the reimage for all the hosts '
              'are run in parallel.'))
    parser.add_argument(
        '--sleep', action='store', type=int, default=0,
        help=('amount of seconds to sleep between one reimage and the next when --sequential '
              'is set. Has no effect if --sequential is not set. [default: 0]'))
    parser.add_argument(
        '--force', action='store_true',
        help='override the default limit of that can be reimaged: 3 in parallel, 5 in sequence.')
    parser.add_argument(
        'hosts', metavar='HOST', nargs='+', action='store',
        help='FQDN of the host(s) to be reimaged')

    args = parser.parse_args()

    # Safety limits
    if not args.force:
        if args.sequential and len(args.hosts) > 5:
            parser.error('More than 5 sequential hosts specified and --force not set')
        elif len(args.hosts) > 3:
            parser.error(("More than 3 parallel hosts specified and --force not set. Before using "
                          "the --force parameter, ensure that there aren't too many hosts in the "
                          "same rack."))

    # Perform a quick sanity check on the hosts
    for host in args.hosts:
        if '.' not in host or not lib.HOSTS_PATTERN.match(host):
            parser.error("Expected FQDN of hosts, got '{host}'".format(host=host))

        if not lib.is_hostname_valid(host):
            parser.error("Unable to resolve host '{host}'".format(host=host))

    # Ensure there are no duplicates in the hosts list
    duplicates = {host for host in args.hosts if args.hosts.count(host) > 1}
    if len(duplicates) > 0:
        parser.error("Duplicate hosts detected: {dup}".format(dup=duplicates))

    # Ensure Phab task is properly formatted
    if (args.phab_task_id is not None and
            lib.PHAB_TASK_PATTERN.search(args.phab_task_id) is None):
        parser.error(("Invalid Phabricator task ID '{task}', expected in "
                      "the form T12345").format(task=args.phab_task_id))

    return args
Esempio n. 7
0
def main():
    args = parse_args()
    user = lib.get_running_user()
    setup_logging(logger, user)

    if not lib.is_hostname_valid(args.host):
        logger.error("{} is not a valid hostname. Exiting.".format(args.host))
        return 1

    action = 'Upgrading'
    if args.downgrade:
        action = 'Downgrading'

    reason = "{} Varnish on {} --{}".format(action, args.host, user)

    logger.info(reason)

    if not args.hiera_merged:
        # Check that puppet is not already disabled. We skip this check if
        # invoked with --hiera-merged because in that case puppet must
        # necessarily be disabled already. If that were not the case, it would
        # fail because of the discrepancy between the hiera setting
        # profile::cache::base::varnish_version and the Varnish version
        # installed on the system.
        if not run_cumin(args.host, ['puppet-enabled']):
            logger.error("puppet is disabled on {}. Exiting.".format(
                args.host))
            return 1
    else:
        logger.info(
            "Not disabling puppet/waiting for puppet merge as requested (--hiera-merged)"
        )

        # On the contrary, if --hiera-merged is specified, make sure puppet
        # is disabled with the given message
        expected_output = "Puppet is disabled. {}".format(args.hiera_merged)
        if not check_cumin_output(args.host, ['puppet-enabled'],
                                  expected_output):
            logger.error(
                "puppet on {} must be disabled with commit message='{}'. Exiting."
                .format(args.host, args.hiera_merged))
            return 1

    # Set Icinga downtime for the host to be upgraded
    icinga_downtime(args.host, reason, 1200)

    # Depool and wait a bit for the host to be drained
    if not run_cumin(args.host, ['depool']):
        logger.error("Failed depooling {}. Exiting.".format(args.host))

    logging.info("Waiting for {} to be drained.".format(args.host))
    time.sleep(30)

    if not args.hiera_merged:
        # Disable puppet
        if not run_cumin(
                args.host,
            ['disable-puppet "{message}"'.format(message=reason)]):
            logger.error("Failed to disable puppet on {}. Exiting.".format(
                args.host))
            return 1

        # Wait for admin to merge the puppet patch toggling hiera settings
        if not ask_confirmation(
                "Waiting for you to puppet-merge "
                "the change toggling {}'s hiera settings".format(args.host)):
            return 1

    # Remove old stuff
    pre_puppet(args.host, downgrading=args.downgrade)

    # Enable and run puppet
    if args.hiera_merged:
        # If invoked with --hiera-merged we need to use the reason passed to
        # --hiera-merged itself in order to re-enable puppet
        reason = args.hiera_merged

    cmd = 'run-puppet-agent --enable "{message}"'.format(message=reason)
    if not run_cumin(args.host, [cmd], timeout=300):
        logger.error("Failed to enable and run puppet on {}. Exiting.".format(
            args.host))
        return 1

    # Post upgrade
    post_puppet(args.host)

    # check HTTP response from backend/frontend
    if args.host != "cp1008.wikimedia.org":
        # Skip HTTP check if working on pinkunicorn. PU is firewalled and does
        # not allow us to establish TCP connections to varnish.
        check_http_responses(args.host)

    # Repool
    if not run_cumin(args.host, ['pool']):
        logger.error("Failed repooling {}. Exiting.".format(args.host))

    # Cancel Icinga downtime
    icinga_cancel_downtime(args.host)
    return 0