def parse_args(): """Parse and return command line arguments, validate the host.""" parser = lib.get_base_parser('Automated reimaging of a single host') parser.add_argument( '--rename', help='FQDN of the new name to rename this host to while reimaging') parser.add_argument( '--rename-mgmt', help='FQDN of the new name management interface, see --rename') parser.add_argument('host', metavar='HOST', action='store', help='FQDN of the host to be reimaged') parser.add_argument('mgmt', metavar='MGMT', action='store', nargs='?', default=None, help='FQDN of the management interface for the host') args = parser.parse_args() fqdns = [args.host] if args.rename is not None: fqdns.append(args.rename) if args.no_pxe: raise argparse.ArgumentTypeError( 'The --rename option cannot be used in conjunction with --no-pxe.' ) # Gather the management interfaces, if missing if args.mgmt is None: mgmts = lib.get_mgmts([args.host]) args.mgmt = mgmts[args.host] else: mgmts = {args.host: args.mgmt} fqdns.append(args.mgmt) if args.rename is not None and args.rename_mgmt is None: mgmts.update(lib.get_mgmts([args.rename])) fqdns.append(mgmts[args.rename]) # Perform a quick sanity check on the host and mgmt for name in fqdns: if '.' not in name or not lib.HOSTS_PATTERN.match(name): raise argparse.ArgumentTypeError( "Expected FQDN, got '{name}'".format(name=name)) if not lib.is_hostname_valid(name): raise argparse.ArgumentTypeError( "Unable to resolve host '{name}'".format(name=name)) for mgmt in mgmts.values(): if '.mgmt.' not in mgmt: raise argparse.ArgumentTypeError( 'The MGMT parameter {} does not follow the *.mgmt.* format'. format(mgmt)) return args
def parse_args(): """Parse and return command line arguments, validate the host.""" parser = argparse.ArgumentParser( description= 'Downtime a host on Icinga, after running Puppet on the Icinga host.') parser.add_argument('-d', '--debug', action='store_true', help='debug level logging and cumin output') parser.add_argument( '-s', '--sleep', type=int, help='amount of seconds to sleep before downtiming the host') parser.add_argument('-p', '--phab-task-id', action='store', help='the Phabricator task ID, i.e.: T12345)') parser.add_argument('host', metavar='HOST', action='store', help='FQDN of the host to be downtimed') args = parser.parse_args() # Perform a quick sanity check on the host if '.' not in args.host or not lib.HOSTS_PATTERN.match(args.host): raise argparse.ArgumentTypeError( "Expected FQDN, got '{host}'".format(host=args.host)) if not lib.is_hostname_valid(args.host): raise argparse.ArgumentTypeError( "Unable to resolve host '{host}'".format(host=args.host)) return args
def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, "Software upgrade and reboot", 1200) # Disable puppet lib.disable_puppet([args.host], '{} --{}'.format(NAME, user)) # Depool and wait a bit for the host to be drained if args.depool_cmd: if not run_cumin(args.host, [args.depool_cmd]): logger.error("Failed depooling {}. Exiting.".format(args.host)) return 1 else: logging.info("Not performing any depool action as requested (empty --depool-cmd)") logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) # Upgrade all pacakges, leave config files untouched, do not prompt upgrade_cmd = ("DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::='--force-confdef' " "-o Dpkg::Options::='--force-confold' dist-upgrade") if not run_cumin(args.host, [upgrade_cmd], timeout=300): logger.error("Failed upgrading {}. Exiting.".format(args.host)) return 1 reboot_time = datetime.utcnow() lib.reboot_host(args.host) boot_time = datetime.utcnow() lib.wait_reboot(args.host, start=reboot_time) # Enable puppet lib.enable_puppet([args.host], '{} --{}'.format(NAME, user)) # Run puppet lib.run_puppet([args.host]) lib.wait_puppet_run(args.host, start=boot_time) # Repool if args.repool_cmd: if not run_cumin(args.host, [args.repool_cmd]): logger.error("Failed repooling {}. Exiting.".format(args.host)) return 1 else: logging.info("Not performing any repool action as requested (empty --repool-cmd)") # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0
def main(): """Run the script.""" script_name = os.path.basename(__file__) args = parse_args() user = lib.get_running_user() phab_client = lib.get_phabricator_client() is_valid_host = lib.is_hostname_valid(args.host) actions = [] if not is_valid_host and not args.force: logger.error( "{host} is not a valid hostname. Aborting.".format(host=args.host)) return 1 # Remove from Puppet and PuppetDB lib.puppet_remove_host(args.host) actions += ['Revoked Puppet certificate', 'Removed from PuppetDB'] # Downtime on Icinga both the host and the mgmt host, they will be removed by Puppet if is_valid_host: try: lib.icinga_downtime(args.host, user, args.phab_task_id, title=script_name) actions.append('Downtimed host on Icinga') except RuntimeError: actions.append( 'Skipped downtime host on Icinga (likely already removed)') mgmts = lib.get_mgmts([args.host]) try: lib.icinga_downtime(mgmts[args.host], user, args.phab_task_id, title=script_name) actions.append('Downtimed mgmt interface on Icinga') except RuntimeError: actions.append( 'Skipped downtime mgmt interface on Icinga (likely already removed)' ) # Remove from DebMonitor lib.debmonitor_remove_host(args.host) actions.append('Removed from DebMonitor') message = ( '{script} was executed by {user} for {host} and performed the following actions:\n' '- {actions}').format(script=script_name, user=user, host=args.host, actions='\n- '.join(actions)) lib.phabricator_task_update(phab_client, args.phab_task_id, message) return 0
def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, "Software upgrade and reboot", 1200) # Depool and wait a bit for the host to be drained if args.depool_cmd: if not run_cumin(args.host, [args.depool_cmd]): logger.error("Failed depooling {}. Exiting.".format(args.host)) return 1 else: logging.info( "Not performing any depool action as requested (empty --depool-cmd)" ) logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) # Run apt full-upgrade if not run_cumin(args.host, ['apt -y full-upgrade'], timeout=300): logger.error("Failed upgrading {}. Exiting.".format(args.host)) return 1 reboot_time = datetime.utcnow() lib.reboot_host(args.host) boot_time = datetime.utcnow() lib.wait_reboot(args.host, start=reboot_time) lib.wait_puppet_run(args.host, start=boot_time) # Repool if args.repool_cmd: if not run_cumin(args.host, [args.repool_cmd]): logger.error("Failed repooling {}. Exiting.".format(args.host)) return 1 else: logging.info( "Not performing any repool action as requested (empty --repool-cmd)" ) # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0
def parse_args(): """Parse and return command line arguments, validate the hosts.""" parser = lib.get_base_parser('Automated reimaging of a list of hosts') parser.add_argument( '--sequential', action='store_true', help=('run one reimage at a time, sequentially. By default the reimage for all the hosts ' 'are run in parallel.')) parser.add_argument( '--sleep', action='store', type=int, default=0, help=('amount of seconds to sleep between one reimage and the next when --sequential ' 'is set. Has no effect if --sequential is not set. [default: 0]')) parser.add_argument( '--force', action='store_true', help='override the default limit of that can be reimaged: 3 in parallel, 5 in sequence.') parser.add_argument( 'hosts', metavar='HOST', nargs='+', action='store', help='FQDN of the host(s) to be reimaged') args = parser.parse_args() # Safety limits if not args.force: if args.sequential and len(args.hosts) > 5: parser.error('More than 5 sequential hosts specified and --force not set') elif len(args.hosts) > 3: parser.error(("More than 3 parallel hosts specified and --force not set. Before using " "the --force parameter, ensure that there aren't too many hosts in the " "same rack.")) # Perform a quick sanity check on the hosts for host in args.hosts: if '.' not in host or not lib.HOSTS_PATTERN.match(host): parser.error("Expected FQDN of hosts, got '{host}'".format(host=host)) if not lib.is_hostname_valid(host): parser.error("Unable to resolve host '{host}'".format(host=host)) # Ensure there are no duplicates in the hosts list duplicates = {host for host in args.hosts if args.hosts.count(host) > 1} if len(duplicates) > 0: parser.error("Duplicate hosts detected: {dup}".format(dup=duplicates)) # Ensure Phab task is properly formatted if (args.phab_task_id is not None and lib.PHAB_TASK_PATTERN.search(args.phab_task_id) is None): parser.error(("Invalid Phabricator task ID '{task}', expected in " "the form T12345").format(task=args.phab_task_id)) return args
def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 action = 'Upgrading' if args.downgrade: action = 'Downgrading' reason = "{} Varnish on {} --{}".format(action, args.host, user) logger.info(reason) if not args.hiera_merged: # Check that puppet is not already disabled. We skip this check if # invoked with --hiera-merged because in that case puppet must # necessarily be disabled already. If that were not the case, it would # fail because of the discrepancy between the hiera setting # profile::cache::base::varnish_version and the Varnish version # installed on the system. if not run_cumin(args.host, ['puppet-enabled']): logger.error("puppet is disabled on {}. Exiting.".format( args.host)) return 1 else: logger.info( "Not disabling puppet/waiting for puppet merge as requested (--hiera-merged)" ) # On the contrary, if --hiera-merged is specified, make sure puppet # is disabled with the given message expected_output = "Puppet is disabled. {}".format(args.hiera_merged) if not check_cumin_output(args.host, ['puppet-enabled'], expected_output): logger.error( "puppet on {} must be disabled with commit message='{}'. Exiting." .format(args.host, args.hiera_merged)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, reason, 1200) # Depool and wait a bit for the host to be drained if not run_cumin(args.host, ['depool']): logger.error("Failed depooling {}. Exiting.".format(args.host)) logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) if not args.hiera_merged: # Disable puppet if not run_cumin( args.host, ['disable-puppet "{message}"'.format(message=reason)]): logger.error("Failed to disable puppet on {}. Exiting.".format( args.host)) return 1 # Wait for admin to merge the puppet patch toggling hiera settings if not ask_confirmation( "Waiting for you to puppet-merge " "the change toggling {}'s hiera settings".format(args.host)): return 1 # Remove old stuff pre_puppet(args.host, downgrading=args.downgrade) # Enable and run puppet if args.hiera_merged: # If invoked with --hiera-merged we need to use the reason passed to # --hiera-merged itself in order to re-enable puppet reason = args.hiera_merged cmd = 'run-puppet-agent --enable "{message}"'.format(message=reason) if not run_cumin(args.host, [cmd], timeout=300): logger.error("Failed to enable and run puppet on {}. Exiting.".format( args.host)) return 1 # Post upgrade post_puppet(args.host) # check HTTP response from backend/frontend if args.host != "cp1008.wikimedia.org": # Skip HTTP check if working on pinkunicorn. PU is firewalled and does # not allow us to establish TCP connections to varnish. check_http_responses(args.host) # Repool if not run_cumin(args.host, ['pool']): logger.error("Failed repooling {}. Exiting.".format(args.host)) # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0