def __init__(self, host_name, host_args, updater_args): super(JenkinsUpdater, self).__init__(host_name, host_args, updater_args) self.wait = self.updater_args.get('wait', True) try: self.wait_timeout = int(self.updater_args.get('wait-timeout', 500)) except (ValueError, TypeError): log.debug('[jenkins] Default to 200 seconds timeout') self.wait_timeout = 500 try: self.wait_check_interval = int( self.updater_args.get('wait-check-interval', 10)) except (ValueError, TypeError): log.debug('[jenkins] Default to 10 seconds check interval') self.wait_check_interval = 10 self.server = jinja(self.updater_args.get('server')) self.username = jinja(self.updater_args.get('username')) self.password = jinja(self.updater_args.get('password')) self.job = jinja(self.updater_args.get('job')) try: self.jenkins = jenkins.Jenkins(self.server, self.username, self.password) except: log.exception('[{}] [jenkins] Could not connect to {}'.format( self.host, self.server)) self.jenkins = None
def amaltheia(args): job = parse_job(args) config.load(job.get('config', {})) log.setup(level=config.log_level) log.debug('[amaltheia] Loaded variables: {}'.format(config.variables)) log.debug('[amaltheia] Loaded config: {}'.format(config._entries)) run_strategy(job)
def __init__(self, host_name, host_args, updater_args): super(RebootUpdater, self).__init__(host_name, host_args, updater_args) self.wait = self.updater_args.get('wait', True) try: self.wait_timeout = int(self.updater_args.get('wait-timeout', 500)) except (ValueError, TypeError): log.debug('[reboot] Default to 500 seconds timeout') self.wait_timeout = 500 try: self.wait_check_interval = int( self.updater_args.get('wait-check-interval', 10)) except (ValueError, TypeError): log.debug('[reboot] Default to 10 seconds check interval') self.wait_check_interval = 10
def update(self): ssh_cmd(self.host, self.host_args, 'sudo reboot') if not self.wait: log.debug('[{}] Not waiting for reboot'.format(self.host)) return True now = datetime.now() timeout = now + timedelta(seconds=self.wait_timeout) success = False while not success and datetime.now() <= timeout: log.debug('[{}] Waiting for reboot...'.format(self.host)) success = ssh_try_connect(self.host, self.host_args, timeout=self.wait_check_interval) if not success: log.fatal('[{}] Timeout waiting for reboot'.format(self.host)) return success
def evacuate(self): """Disable nova-compute service on this host, migrate away all running and stopped instances""" if self.service_args.get('skip-evacuate'): return True # Disable nova-compute openstack_cmd( 'openstack compute service set {} nova-compute --disable'.format( quote(self.host))) # Retrieve list of VMs, indexable by their Instance ID server_list = openstack_cmd_table('nova hypervisor-servers {}'.format( quote(self.host))) servers = {s['ID']: s for s in server_list} # Schedule live migration for running VMs result = openstack_cmd_table('nova host-evacuate-live {}'.format( quote(self.host))) for server in result: iid = server['Server UUID'] if server['Live Migration Accepted'] == 'True': servers[iid].update({'status': 'OK'}) else: servers[iid].update({ 'status': 'NOTOK', 'error': server['Error Message'] }) # Errors with live migration may occur for VMs that are stopped. # Migrate them as well result = openstack_cmd_table('nova host-servers-migrate {}'.format( quote(self.host))) for server in result: iid = server['Server UUID'] if server['Migration Accepted'] == 'True': servers[iid].update({'status': 'OK'}) del servers[iid]['error'] elif servers[iid].get('status', '') != 'OK': servers[iid].update({ 'status': 'NOTOK', 'error': server['Error Message'] }) errors = {k: v for k, v in servers.items() if v['status'] != 'OK'} if errors: log.fatal('[{}] {}'.format(self.host, errors)) return False # Wait for migrations to complete try: timeout_per_server = int(self.service_args.get('timeout', 40)) except (ValueError, TypeError): log.debug('[{}] Defaulting to 40 seconds timeout'.format( self.host)) timeout_per_server = 40 timeout = len(server_list) * timeout_per_server while server_list and timeout > 0: timeout -= 5 sleep(5) server_list = openstack_cmd_table( 'nova hypervisor-servers {}'.format(quote(self.host))) log.debug('[{}] Waiting for migrations, {} remaining'.format( self.host, len(server_list))) if server_list: log.fatal('[{}] Some migrations timed-out: {}'.format( self.host, server_list)) return False else: log.debug('[{}] All servers migrated successfully'.format( self.host)) return True
def update(self): try: self.jenkins.get_whoami() except: log.exception('[{}] [jenkins] Failed to authenticate'.format( self.host)) return False if self.job is None: log.fatal('[{}] [jenkins] Empty job name'.format(self.host)) return False raw_args = self.updater_args.get('build-arguments') try: if raw_args: queue_id = self.jenkins.build_job( self.job, jinja(raw_args, host=self.host, host_args=self.host_args)) else: queue_id = self.jenkins.build_job(self.job) except: log.exception('[{}] [jenkins] Failed to queue job {}'.format( self.host, self.job)) return False log.info('[{}] [jenkins] Queued job {} (queue id {})'.format( self.host, self.job, queue_id)) if not self.wait: return True now = datetime.now() timeout = now + timedelta(seconds=self.wait_timeout) while True: try: queue_item = self.jenkins.get_queue_item(queue_id) job_number = queue_item['executable']['number'] break except KeyError: sleep(self.wait_check_interval) log.debug('[{}] [jenkins] Waiting for job queue {}'.format( self.host, self.job)) except: log.exception('[{}] [jenkins] Failed to queue job {}'.format( self.host, self.job)) return False if datetime.now() > timeout: log.fatal( '[{}] [jenkins] Timeout waiting for job queue {}'.format( self.host, self.job)) return False log.info('[{}] [jenkins] Started job {}/{} (queue id {})'.format( self.host, self.job, job_number, queue_id)) done = False while not done and datetime.now() <= timeout: log.debug('[{}] [jenkins] Waiting for job run {}/{}'.format( self.host, self.job, job_number)) build_info = self.jenkins.get_build_info(self.job, job_number) done = build_info['result'] is not None if not done: sleep(self.wait_check_interval) if not done: log.fatal( '[{}] [jenkins] Timeout waiting for job run {}/{}'.format( self.host, self.job, job_number)) return False return build_info['result'] == 'SUCCESS'