def del_host(self, deregister=True): if deregister: try: self.dbapi.unregister_wampagent(self.host) LOG.info( _LI('Successfully stopped wampagent with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.WampAgentNotFound: pass else: LOG.info( _LI('Not deregistering wampagent with hostname ' '%(hostname)s.'), {'hostname': self.host})
def del_host(self, deregister=True): if deregister: try: self.dbapi.unregister_conductor(self.host) LOG.info(_LI('Successfully stopped conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.ConductorNotFound: pass else: LOG.info(_LI('Not deregistering conductor with hostname ' '%(hostname)s.'), {'hostname': self.host})
def del_host(self, deregister=True): if deregister: try: self.dbapi.unregister_wampagent(self.host) LOG.info(_LI('Successfully stopped wampagent with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.WampAgentNotFound: pass else: LOG.info(_LI('Not deregistering wampagent with hostname ' '%(hostname)s.'), {'hostname': self.host})
def init_host(self): self.dbapi = dbapi.get_instance() self._keepalive_evt = threading.Event() """Event for the keepalive thread.""" self._worker_pool = greenpool.GreenPool( size=CONF.conductor.workers_pool_size) """GreenPool of background workers for performing tasks async.""" try: # Register this conductor with the cluster cdr = self.dbapi.register_conductor({'hostname': self.host}) except exception.ConductorAlreadyRegistered: LOG.warn( _LW("A conductor with hostname %(hostname)s " "was previously registered. Updating registration"), {'hostname': self.host}) cdr = self.dbapi.register_conductor({'hostname': self.host}, update_existing=True) self.conductor = cdr # Spawn a dedicated greenthread for the keepalive try: self._spawn_worker(self._conductor_service_record_keepalive) LOG.info( _LI('Successfully started conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.NoFreeConductorWorker: with excutils.save_and_reraise_exception(): LOG.critical(_LC('Failed to start keepalive')) self.del_host()
def _handle_signal(self, signo, frame): LOG.info( _LI('Got signal SIGUSR1. Not deregistering on next shutdown ' 'of service %(service)s on host %(host)s.'), { 'service': self.topic, 'host': self.host }) self.deregister = False
def del_host(self, deregister=True): self._keepalive_evt.set() if deregister: try: # Inform the cluster that this conductor is shutting down. # Note that rebalancing will not occur immediately, but when # the periodic sync takes place. self.dbapi.unregister_conductor(self.host) LOG.info( _LI('Successfully stopped conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.ConductorNotFound: pass else: LOG.info( _LI('Not deregistering conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) # Waiting here to give workers the chance to finish. This has the # benefit of releasing locks workers placed on nodes, as well as # having work complete normally. self._worker_pool.waitall()
def stop(self): try: self.rpcserver.stop() self.rpcserver.wait() except Exception as e: LOG.exception(_LE('Service error occurred when stopping the ' 'RPC server. Error: %s'), e) try: self.manager.del_host(deregister=self.deregister) except Exception as e: LOG.exception(_LE('Service error occurred when cleaning up ' 'the RPC manager. Error: %s'), e) super(RPCService, self).stop(graceful=True) LOG.info(_LI('Stopped RPC server for service %(service)s on host ' '%(host)s.'), {'service': self.topic, 'host': self.host})
def start(self): super(RPCService, self).start() admin_context = context.RequestContext('admin', 'admin', is_admin=True) target = messaging.Target(topic=self.topic, server=self.host) endpoints = [self.manager] serializer = objects_base.IotronicObjectSerializer() self.rpcserver = rpc.get_server(target, endpoints, serializer) self.rpcserver.start() self.handle_signal() self.manager.init_host() self.tg.add_dynamic_timer( self.manager.periodic_tasks, periodic_interval_max=cfg.CONF.periodic_interval, context=admin_context) LOG.info(_LI('Created RPC server for service %(service)s on host ' '%(host)s.'), {'service': self.topic, 'host': self.host})
def destroy_node(self, context, node_id): """Delete a node. :param context: request context. :param node_id: node id or uuid. :raises: NodeLocked if node is locked by another conductor. :raises: NodeNotConnected if the node is not connected. """ with task_manager.acquire(context, node_id) as task: node = task.node r = WampResponse() r.clearConfig() response = self.wamp.rpc_call( 'stack4things.' + node.uuid + '.configure', r.getResponse()) if response['result'] == 0: node.destroy() LOG.info(_LI('Successfully deleted node %(node)s.'), {'node': node.uuid}) else: raise exception.NodeNotConnected(node=node.uuid)
def _handle_signal(self, signo, frame): LOG.info(_LI('Got signal SIGUSR1. Not deregistering on next shutdown ' 'of service %(service)s on host %(host)s.'), {'service': self.topic, 'host': self.host}) self.deregister = False
def node_power_action(task, new_state): """Change power state or reset for a node. Perform the requested power action if the transition is required. :param task: a TaskManager instance containing the node to act on. :param new_state: Any power state from iotronic.common.states. If the state is 'REBOOT' then a reboot will be attempted, otherwise the node power state is directly set to 'state'. :raises: InvalidParameterValue when the wrong state is specified or the wrong driver info is specified. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. """ node = task.node target_state = states.POWER_ON if new_state == states.REBOOT else new_state if new_state != states.REBOOT: try: curr_state = task.driver.power.get_power_state(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = _( "Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': new_state, 'error': e } node['target_power_state'] = states.NOSTATE node.save() if curr_state == new_state: # Neither the iotronic service nor the hardware has erred. The # node is, for some reason, already in the requested state, # though we don't know why. eg, perhaps the user previously # requested the node POWER_ON, the network delayed those IPMI # packets, and they are trying again -- but the node finally # responds to the first request, and so the second request # gets to this check and stops. # This isn't an error, so we'll clear last_error field # (from previous operation), log a warning, and return. node['last_error'] = None # NOTE(dtantsur): under rare conditions we can get out of sync here node['power_state'] = new_state node['target_power_state'] = states.NOSTATE node.save() LOG.warn( _LW("Not going to change_node_power_state because " "current state = requested state = '%(state)s'."), {'state': curr_state}) return if curr_state == states.ERROR: # be optimistic and continue action LOG.warn(_LW("Driver returns ERROR power state for node %s."), node.uuid) # Set the target_power_state and clear any last_error, if we're # starting a new operation. This will expose to other processes # and clients that work is in progress. if node['target_power_state'] != target_state: node['target_power_state'] = target_state node['last_error'] = None node.save() # take power action try: if new_state != states.REBOOT: task.driver.power.set_power_state(task, new_state) else: task.driver.power.reboot(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = _( "Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': target_state, 'error': e } else: # success! node['power_state'] = target_state LOG.info( _LI('Successfully set node %(node)s power state to ' '%(state)s.'), { 'node': node.uuid, 'state': target_state }) finally: node['target_power_state'] = states.NOSTATE node.save()