def _do_evacuate(context, host_name, instance_list, reserved_host=None): failed_evacuation_instances = [] if reserved_host: if CONF.host_failure.add_reserved_host_to_aggregate: # Assign reserved_host to an aggregate to which the failed # compute host belongs to. aggregates = self.novaclient.get_aggregate_list(context) for aggregate in aggregates: if host_name in aggregate.hosts: try: self.novaclient.add_host_to_aggregate( context, reserved_host.name, aggregate) except exception.Conflict: msg = ("Host '%(reserved_host)s' already has " "been added to aggregate " "'%(aggregate)s'.") LOG.info( msg, { 'reserved_host': reserved_host.name, 'aggregate': aggregate.name }) # A failed compute host can be associated with # multiple aggregates but operators will not # associate it with multiple aggregates in real # deployment so adding reserved_host to the very # first aggregate from the list. break self.novaclient.enable_disable_service(context, reserved_host.name, enable=True) # Set reserved property of reserved_host to False reserved_host.reserved = False reserved_host.save() thread_pool = greenpool.GreenPool( CONF.host_failure_recovery_threads) for instance in instance_list: thread_pool.spawn_n(self._evacuate_and_confirm, context, instance, host_name, failed_evacuation_instances, reserved_host) thread_pool.waitall() if failed_evacuation_instances: msg = _("Failed to evacuate instances %(instances)s from " "host %(host_name)s.") % { 'instances': failed_evacuation_instances, 'host_name': host_name } raise exception.HostRecoveryFailureException(message=msg)
def _execute_rh_workflow(self, novaclient, process_what, reserved_host_list): if not reserved_host_list: msg = _('No reserved_hosts available for evacuation.') LOG.info(msg) raise exception.ReservedHostsUnavailable(message=msg) process_what['reserved_host_list'] = reserved_host_list flow_engine = host_failure.get_rh_flow(novaclient, process_what) with base.DynamicLogListener(flow_engine, logger=LOG): try: flow_engine.run() except exception.LockAlreadyAcquired as ex: raise exception.HostRecoveryFailureException(ex.message)
def execute(self, context, instance_list, host_name): failed_evacuation_instances = [] for instance in instance_list: def _wait_for_evacuation(): new_instance = self.novaclient.get_server(context, instance.id) instance_host = getattr(new_instance, "OS-EXT-SRV-ATTR:hypervisor_hostname") old_vm_state = getattr(instance, "OS-EXT-STS:vm_state") new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state") if instance_host != host_name: if ((old_vm_state == 'error' and new_vm_state == 'active') or old_vm_state == new_vm_state): raise loopingcall.LoopingCallDone() periodic_call = loopingcall.FixedIntervalLoopingCall( _wait_for_evacuation) try: # add a timeout to the periodic call. periodic_call.start(interval=CONF.verify_interval) etimeout.with_timeout(CONF.wait_period_after_evacuation, periodic_call.wait) except etimeout.Timeout: # Instance is not evacuated in the expected time_limit. failed_evacuation_instances.append(instance.id) finally: # stop the periodic call, in case of exceptions or Timeout. periodic_call.stop() if failed_evacuation_instances: msg = _("Failed to evacuate instances %(instances)s from " "host %(host_name)s.") % { 'instances': failed_evacuation_instances, 'host_name': host_name } raise exception.HostRecoveryFailureException(message=msg)
def _do_evacuate(context, host_name, instance_list, reserved_host=None): failed_evacuation_instances = [] if reserved_host: msg = "Enabling reserved host: '%s'" % reserved_host self.update_details(msg, 0.1) if CONF.host_failure.add_reserved_host_to_aggregate: # Assign reserved_host to an aggregate to which the failed # compute host belongs to. aggregates = self.novaclient.get_aggregate_list(context) for aggregate in aggregates: if host_name in aggregate.hosts: try: msg = ("Add host %(reserved_host)s to " "aggregate %(aggregate)s") % { 'reserved_host': reserved_host, 'aggregate': aggregate.name } self.update_details(msg, 0.2) self.novaclient.add_host_to_aggregate( context, reserved_host, aggregate) msg = ("Added host %(reserved_host)s to " "aggregate %(aggregate)s") % { 'reserved_host': reserved_host, 'aggregate': aggregate.name } self.update_details(msg, 0.3) except exception.Conflict: msg = ("Host '%(reserved_host)s' already has " "been added to aggregate " "'%(aggregate)s'.") % { 'reserved_host': reserved_host, 'aggregate': aggregate.name } self.update_details(msg, 1.0) LOG.info(msg) # A failed compute host can be associated with # multiple aggregates but operators will not # associate it with multiple aggregates in real # deployment so adding reserved_host to the very # first aggregate from the list. break self.novaclient.enable_disable_service(context, reserved_host, enable=True) # Set reserved property of reserved_host to False self.update_host_method(self.context, reserved_host) thread_pool = greenpool.GreenPool( CONF.host_failure_recovery_threads) for instance_id in instance_list: msg = "Evacuation of instance started: '%s'" % instance_id self.update_details(msg, 0.5) instance = self.novaclient.get_server(self.context, instance_id) thread_pool.spawn_n(self._evacuate_and_confirm, context, instance, host_name, failed_evacuation_instances, reserved_host) thread_pool.waitall() evacuated_instances = list( set(instance_list).difference( set(failed_evacuation_instances))) if evacuated_instances: evacuated_instances.sort() msg = ("Successfully evacuate instances '%(instance_list)s' " "from host '%(host_name)s'") % { 'instance_list': ','.join(evacuated_instances), 'host_name': host_name } self.update_details(msg, 0.7) if failed_evacuation_instances: msg = ("Failed to evacuate instances " "'%(failed_evacuation_instances)s' from host " "'%(host_name)s'") % { 'failed_evacuation_instances': ','.join(failed_evacuation_instances), 'host_name': host_name } self.update_details(msg, 0.7) raise exception.HostRecoveryFailureException(message=msg) msg = "Evacuation process completed!" self.update_details(msg, 1.0)