def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, "Software upgrade and reboot", 1200) # Disable puppet lib.disable_puppet([args.host], '{} --{}'.format(NAME, user)) # Depool and wait a bit for the host to be drained if args.depool_cmd: if not run_cumin(args.host, [args.depool_cmd]): logger.error("Failed depooling {}. Exiting.".format(args.host)) return 1 else: logging.info("Not performing any depool action as requested (empty --depool-cmd)") logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) # Upgrade all pacakges, leave config files untouched, do not prompt upgrade_cmd = ("DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::='--force-confdef' " "-o Dpkg::Options::='--force-confold' dist-upgrade") if not run_cumin(args.host, [upgrade_cmd], timeout=300): logger.error("Failed upgrading {}. Exiting.".format(args.host)) return 1 reboot_time = datetime.utcnow() lib.reboot_host(args.host) boot_time = datetime.utcnow() lib.wait_reboot(args.host, start=reboot_time) # Enable puppet lib.enable_puppet([args.host], '{} --{}'.format(NAME, user)) # Run puppet lib.run_puppet([args.host]) lib.wait_puppet_run(args.host, start=boot_time) # Repool if args.repool_cmd: if not run_cumin(args.host, [args.repool_cmd]): logger.error("Failed repooling {}. Exiting.".format(args.host)) return 1 else: logging.info("Not performing any repool action as requested (empty --repool-cmd)") # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0
def main(): """Automated reimaging of a list of hosts.""" # Setup args = parse_args() lib.ensure_shell_mode() user = lib.get_running_user() log_path = setup_logging(user) if args.debug: logger.setLevel(logging.DEBUG) logger.info('wmf-auto-reimage called with args: {args}'.format(args=args)) lib.print_line('START. To monitor the full log:') lib.print_line('sudo tail -F {log}'.format(log=log_path), skip_time=True) try: retcode = run(args, user, log_path) except BaseException as e: message = 'Unable to run wmf-auto-reimage' lib.print_line('{message}: {error}'.format(message=message, error=e)) logger.exception(message) retcode = 2 finally: lib.print_line('END') return retcode
def main(): """Run the automated reimaging of a single host.""" # Setup phab_client = None args = parse_args() lib.ensure_shell_mode() user = lib.get_running_user() log_path = setup_logging(user, args.host) cumin_output_path = log_path.replace('.log', '_cumin.out') if args.debug: logger.setLevel(logging.DEBUG) logger.info('wmf-auto-reimage-host called with args: {args}'.format(args=args)) lib.print_line('REIMAGE START | To monitor the full log and cumin output:', host=args.host) lib.print_line('sudo tail -F {log}'.format(log=log_path), skip_time=True) lib.print_line('sudo tail -F {log}'.format(log=cumin_output_path), skip_time=True) try: lib.ensure_ipmi_password() lib.check_remote_ipmi(args.mgmt) if args.rename_mgmt: lib.check_remote_ipmi(args.rename_mgmt) if args.phab_task_id is not None: phab_client = lib.get_phabricator_client() lib.phabricator_task_update( phab_client, args.phab_task_id, lib.PHAB_COMMENT_PRE.format( user=user, hostname=socket.getfqdn(), hosts=args.host, log=log_path)) try: # This is needed due to a bug in tqdm and a limitation in Cumin with open(cumin_output_path, 'w', 1) as cumin_output: stderr = sys.stderr stdout = sys.stdout sys.stderr = cumin_output sys.stdout = cumin_output run(args, user, log_path) retcode = 0 finally: sys.stderr = stderr sys.stdout = stdout except BaseException as e: message = 'Unable to run wmf-auto-reimage-host' lib.print_line('{message}: {error}'.format(message=message, error=e), host=args.host) logger.exception(message) retcode = 2 finally: lib.print_line('REIMAGE END | retcode={ret}'.format(ret=retcode), host=args.host) # Comment on the Phabricator task if args.phab_task_id is not None and phab_client is not None: phabricator_message = lib.get_phabricator_post_message({retcode: [args.host]}) lib.phabricator_task_update(phab_client, args.phab_task_id, phabricator_message) return retcode
def main(): """Run the script.""" script_name = os.path.basename(__file__) args = parse_args() user = lib.get_running_user() phab_client = lib.get_phabricator_client() is_valid_host = lib.is_hostname_valid(args.host) actions = [] if not is_valid_host and not args.force: logger.error( "{host} is not a valid hostname. Aborting.".format(host=args.host)) return 1 # Remove from Puppet and PuppetDB lib.puppet_remove_host(args.host) actions += ['Revoked Puppet certificate', 'Removed from PuppetDB'] # Downtime on Icinga both the host and the mgmt host, they will be removed by Puppet if is_valid_host: try: lib.icinga_downtime(args.host, user, args.phab_task_id, title=script_name) actions.append('Downtimed host on Icinga') except RuntimeError: actions.append( 'Skipped downtime host on Icinga (likely already removed)') mgmts = lib.get_mgmts([args.host]) try: lib.icinga_downtime(mgmts[args.host], user, args.phab_task_id, title=script_name) actions.append('Downtimed mgmt interface on Icinga') except RuntimeError: actions.append( 'Skipped downtime mgmt interface on Icinga (likely already removed)' ) # Remove from DebMonitor lib.debmonitor_remove_host(args.host) actions.append('Removed from DebMonitor') message = ( '{script} was executed by {user} for {host} and performed the following actions:\n' '- {actions}').format(script=script_name, user=user, host=args.host, actions='\n- '.join(actions)) lib.phabricator_task_update(phab_client, args.phab_task_id, message) return 0
def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, "Software upgrade and reboot", 1200) # Depool and wait a bit for the host to be drained if args.depool_cmd: if not run_cumin(args.host, [args.depool_cmd]): logger.error("Failed depooling {}. Exiting.".format(args.host)) return 1 else: logging.info( "Not performing any depool action as requested (empty --depool-cmd)" ) logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) # Run apt full-upgrade if not run_cumin(args.host, ['apt -y full-upgrade'], timeout=300): logger.error("Failed upgrading {}. Exiting.".format(args.host)) return 1 reboot_time = datetime.utcnow() lib.reboot_host(args.host) boot_time = datetime.utcnow() lib.wait_reboot(args.host, start=reboot_time) lib.wait_puppet_run(args.host, start=boot_time) # Repool if args.repool_cmd: if not run_cumin(args.host, [args.repool_cmd]): logger.error("Failed repooling {}. Exiting.".format(args.host)) return 1 else: logging.info( "Not performing any repool action as requested (empty --repool-cmd)" ) # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0
def main(): """Downtime a single host on Icinga.""" args = parse_args() user = lib.get_running_user() if args.debug: logger.setLevel(logging.DEBUG) if args.sleep: lib.print_line('Sleeping for {s} seconds'.format(s=args.sleep), host=args.host) time.sleep(args.sleep) lib.print_line('Running Puppet on the Icinga server', host=args.host) try: if args.debug: lib.run_puppet([lib.resolve_dns(lib.ICINGA_DOMAIN, 'CNAME')], no_raise=True) lib.icinga_downtime(args.host, user, args.phab_task_id, title='wmf-downtime-host') else: # This is needed due to a bug in tqdm and a limitation in Cumin with open(os.devnull, 'w', 1) as cumin_output: stderr = sys.stderr stdout = sys.stdout sys.stderr = cumin_output sys.stdout = cumin_output lib.run_puppet([lib.resolve_dns(lib.ICINGA_DOMAIN, 'CNAME')], no_raise=True) lib.icinga_downtime(args.host, user, args.phab_task_id, title='wmf-downtime-host') retcode = 0 except BaseException as e: message = 'Unable to run wmf-downtime-host' lib.print_line('{message}: {error}'.format(message=message, error=e), host=args.host) logger.exception(message) retcode = 2 finally: if not args.debug: sys.stderr = stderr sys.stdout = stdout return retcode
def main(): args = parse_args() user = lib.get_running_user() setup_logging(logger, user) if not lib.is_hostname_valid(args.host): logger.error("{} is not a valid hostname. Exiting.".format(args.host)) return 1 action = 'Upgrading' if args.downgrade: action = 'Downgrading' reason = "{} Varnish on {} --{}".format(action, args.host, user) logger.info(reason) if not args.hiera_merged: # Check that puppet is not already disabled. We skip this check if # invoked with --hiera-merged because in that case puppet must # necessarily be disabled already. If that were not the case, it would # fail because of the discrepancy between the hiera setting # profile::cache::base::varnish_version and the Varnish version # installed on the system. if not run_cumin(args.host, ['puppet-enabled']): logger.error("puppet is disabled on {}. Exiting.".format( args.host)) return 1 else: logger.info( "Not disabling puppet/waiting for puppet merge as requested (--hiera-merged)" ) # On the contrary, if --hiera-merged is specified, make sure puppet # is disabled with the given message expected_output = "Puppet is disabled. {}".format(args.hiera_merged) if not check_cumin_output(args.host, ['puppet-enabled'], expected_output): logger.error( "puppet on {} must be disabled with commit message='{}'. Exiting." .format(args.host, args.hiera_merged)) return 1 # Set Icinga downtime for the host to be upgraded icinga_downtime(args.host, reason, 1200) # Depool and wait a bit for the host to be drained if not run_cumin(args.host, ['depool']): logger.error("Failed depooling {}. Exiting.".format(args.host)) logging.info("Waiting for {} to be drained.".format(args.host)) time.sleep(30) if not args.hiera_merged: # Disable puppet if not run_cumin( args.host, ['disable-puppet "{message}"'.format(message=reason)]): logger.error("Failed to disable puppet on {}. Exiting.".format( args.host)) return 1 # Wait for admin to merge the puppet patch toggling hiera settings if not ask_confirmation( "Waiting for you to puppet-merge " "the change toggling {}'s hiera settings".format(args.host)): return 1 # Remove old stuff pre_puppet(args.host, downgrading=args.downgrade) # Enable and run puppet if args.hiera_merged: # If invoked with --hiera-merged we need to use the reason passed to # --hiera-merged itself in order to re-enable puppet reason = args.hiera_merged cmd = 'run-puppet-agent --enable "{message}"'.format(message=reason) if not run_cumin(args.host, [cmd], timeout=300): logger.error("Failed to enable and run puppet on {}. Exiting.".format( args.host)) return 1 # Post upgrade post_puppet(args.host) # check HTTP response from backend/frontend if args.host != "cp1008.wikimedia.org": # Skip HTTP check if working on pinkunicorn. PU is firewalled and does # not allow us to establish TCP connections to varnish. check_http_responses(args.host) # Repool if not run_cumin(args.host, ['pool']): logger.error("Failed repooling {}. Exiting.".format(args.host)) # Cancel Icinga downtime icinga_cancel_downtime(args.host) return 0