def _start_child(self, wrap): if len(wrap.forktimes) > wrap.workers: # Limit ourselves to one process a second (over the period of # number of workers * 1 second). This will allow workers to # start up quickly but ensure we don't fork off children that # die instantly too quickly. if time.time() - wrap.forktimes[0] < wrap.workers: LOG.info(_LI('Forking too fast, sleeping')) time.sleep(1) wrap.forktimes.pop(0) wrap.forktimes.append(time.time()) pid = os.fork() if pid == 0: launcher = self._child_process(wrap.service) while True: self._child_process_handle_signal() status, signo = self._child_wait_for_exit_or_signal(launcher) if not _is_sighup_and_daemon(signo): break launcher.restart() os._exit(status) LOG.info(_LI('Started child %d'), pid) wrap.children.add(pid) self.children[pid] = wrap return pid
def _wait_child(self): try: # Don't block if no child processes have exited pid, status = os.waitpid(0, os.WNOHANG) if not pid: return None except OSError as exc: if exc.errno not in (errno.EINTR, errno.ECHILD): raise return None if os.WIFSIGNALED(status): sig = os.WTERMSIG(status) LOG.info(_LI('Child %(pid)d killed by signal %(sig)d'), dict(pid=pid, sig=sig)) else: code = os.WEXITSTATUS(status) LOG.info(_LI('Child %(pid)s exited with status %(code)d'), dict(pid=pid, code=code)) if pid not in self.children: LOG.warning(_LW('pid %d not in child list'), pid) return None wrap = self.children.pop(pid) wrap.children.remove(pid) return wrap
def wait(self): """Loop waiting on children to die and respawning as necessary.""" systemd.notify_once() LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: while True: self.handle_signal() self._respawn_children() # No signal means that stop was called. Don't clean up here. if not self.sigcaught: return signame = _signo_to_signame(self.sigcaught) LOG.info(_LI('Caught %s, stopping children'), signame) if not _is_sighup_and_daemon(self.sigcaught): break for pid in self.children: os.kill(pid, signal.SIGHUP) self.running = True self.sigcaught = None except eventlet.greenlet.GreenletExit: LOG.info(_LI("Wait called after thread killed. Cleaning up.")) self.stop()
def _wait_for_exit_or_signal(self, ready_callback=None): status = None signo = 0 LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: if ready_callback: ready_callback() super(ServiceLauncher, self).wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code finally: self.stop() if rpc: try: rpc.cleanup() except Exception: # We're shutting down, so it doesn't matter at this point. LOG.exception(_LE('Exception during rpc cleanup.')) return status, signo
def _do_node_tear_down(self, context, task): """Internal RPC method to tear down an existing node deployment.""" node = task.node try: task.driver.deploy.clean_up(task) new_state = task.driver.deploy.tear_down(task) except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning(_('Error in tear_down of node %(node)s: %(err)s'), {'node': task.node.uuid, 'err': e}) node.last_error = _("Failed to tear down. Error: %s") % e node.provision_state = states.ERROR node.target_provision_state = states.NOSTATE else: # NOTE(deva): Some drivers may return states.DELETING # eg. if they are waiting for a callback if new_state == states.DELETED: node.target_provision_state = states.NOSTATE node.provision_state = states.NOSTATE LOG.info(_LI('Successfully unprovisioned node %(node)s with ' 'instance %(instance)s.'), {'node': node.uuid, 'instance': node.instance_uuid}) else: node.provision_state = new_state finally: # Clean the instance_info node.instance_info = {} node.save(context)
def acquire(self): basedir = os.path.dirname(self.fname) if not os.path.exists(basedir): fileutils.ensure_tree(basedir) LOG.info(_LI('Created lock path: %s'), basedir) self.lockfile = open(self.fname, 'w') while True: try: # Using non-blocking locks since green threads are not # patched to deal with blocking locking calls. # Also upon reading the MSDN docs for locking(), it seems # to have a laughable 10 attempts "blocking" mechanism. self.trylock() LOG.debug('Got file lock "%s"', self.fname) return True except IOError as e: if e.errno in (errno.EACCES, errno.EAGAIN): # external locks synchronise things like iptables # updates - give it some time to prevent busy spinning time.sleep(0.01) else: raise threading.ThreadError( _("Unable to acquire lock on" " `%(filename)s` due to" " %(exception)s") % { 'filename': self.fname, 'exception': e, })
def _set_session_sql_mode(dbapi_con, connection_rec, connection_proxy, sql_mode=None): """Set the sql_mode session variable. MySQL supports several server modes. The default is None, but sessions may choose to enable server modes like TRADITIONAL, ANSI, several STRICT_* modes and others. Note: passing in '' (empty string) for sql_mode clears the SQL mode for the session, overriding a potentially set server default. Passing in None (the default) makes this a no-op, meaning if a server-side SQL mode is set, it still applies. """ cursor = dbapi_con.cursor() if sql_mode is not None: cursor.execute("SET SESSION sql_mode = %s", [sql_mode]) # Check against the real effective SQL mode. Even when unset by # our own config, the server may still be operating in a specific # SQL mode as set by the server configuration cursor.execute("SHOW VARIABLES LIKE 'sql_mode'") row = cursor.fetchone() if row is None: LOG.warning(_LW('Unable to detect effective SQL mode')) return realmode = row[1] LOG.info(_LI('MySQL server mode set to %s') % realmode) # 'TRADITIONAL' mode enables several other modes, so # we need a substring match here if not ('TRADITIONAL' in realmode.upper() or 'STRICT_ALL_TABLES' in realmode.upper()): LOG.warning(_LW("MySQL SQL mode is '%s', " "consider enabling TRADITIONAL or STRICT_ALL_TABLES") % realmode)
def acquire(self): basedir = os.path.dirname(self.fname) if not os.path.exists(basedir): fileutils.ensure_tree(basedir) LOG.info(_LI('Created lock path: %s'), basedir) self.lockfile = open(self.fname, 'w') while True: try: # Using non-blocking locks since green threads are not # patched to deal with blocking locking calls. # Also upon reading the MSDN docs for locking(), it seems # to have a laughable 10 attempts "blocking" mechanism. self.trylock() LOG.debug('Got file lock "%s"', self.fname) return True except IOError as e: if e.errno in (errno.EACCES, errno.EAGAIN): # external locks synchronise things like iptables # updates - give it some time to prevent busy spinning time.sleep(0.01) else: raise threading.ThreadError(_("Unable to acquire lock on" " `%(filename)s` due to" " %(exception)s") % { 'filename': self.fname, 'exception': e, })
def destroy_node(self, context, node_id): """Delete a node. :param context: request context. :param node_id: node id or uuid. :raises: NodeLocked if node is locked by another conductor. :raises: NodeAssociated if the node contains an instance associated with it. :raises: NodeInWrongPowerState if the node is not powered off. """ with task_manager.acquire(context, node_id) as task: node = task.node if node.instance_uuid is not None: raise exception.NodeAssociated(node=node.uuid, instance=node.instance_uuid) if node.power_state not in [states.POWER_OFF, states.NOSTATE]: msg = (_("Node %s can't be deleted because it's not " "powered off") % node.uuid) raise exception.NodeInWrongPowerState(msg) # FIXME(comstud): Remove context argument after we ensure # every instantiation of Node includes the context node.destroy(context) LOG.info(_LI('Successfully deleted node %(node)s.'), {'node': node.uuid})
def _do_node_deploy(self, context, task): """Prepare the environment and deploy a node.""" node = task.node try: task.driver.deploy.prepare(task) new_state = task.driver.deploy.deploy(task) except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning(_('Error in deploy of node %(node)s: %(err)s'), {'node': task.node.uuid, 'err': e}) node.last_error = _("Failed to deploy. Error: %s") % e node.provision_state = states.DEPLOYFAIL node.target_provision_state = states.NOSTATE else: # NOTE(deva): Some drivers may return states.DEPLOYWAIT # eg. if they are waiting for a callback if new_state == states.DEPLOYDONE: node.target_provision_state = states.NOSTATE node.provision_state = states.ACTIVE LOG.info(_LI('Successfully deployed node %(node)s with ' 'instance %(instance)s.'), {'node': node.uuid, 'instance': node.instance_uuid}) else: node.provision_state = new_state finally: node.save(context)
def _pipe_watcher(self): # This will block until the write end is closed when the parent # dies unexpectedly self.readpipe.read() LOG.info(_LI('Parent process has died unexpectedly, exiting')) sys.exit(1)
def del_host(self): self._keepalive_evt.set() try: self.dbapi.unregister_conductor(self.host) LOG.info(_LI('Successfully stopped conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.ConductorNotFound: pass
def __init__(cls, names, bases, dict_): """Metaclass that allows us to collect decorated periodic tasks.""" super(_PeriodicTasksMeta, cls).__init__(names, bases, dict_) # NOTE(sirp): if the attribute is not present then we must be the base # class, so, go ahead an initialize it. If the attribute is present, # then we're a subclass so make a copy of it so we don't step on our # parent's toes. try: cls._periodic_tasks = cls._periodic_tasks[:] except AttributeError: cls._periodic_tasks = [] try: cls._periodic_spacing = cls._periodic_spacing.copy() except AttributeError: cls._periodic_spacing = {} for value in cls.__dict__.values(): if getattr(value, '_periodic_task', False): task = value name = task.__name__ if task._periodic_spacing < 0: LOG.info(_LI('Skipping periodic task %(task)s because ' 'its interval is negative'), {'task': name}) continue if not task._periodic_enabled: LOG.info(_LI('Skipping periodic task %(task)s because ' 'it is disabled'), {'task': name}) continue # A periodic spacing of zero indicates that this task should # be run on the default interval to avoid running too # frequently. if task._periodic_spacing == 0: task._periodic_spacing = DEFAULT_INTERVAL cls._periodic_tasks.append((name, task)) cls._periodic_spacing[name] = task._periodic_spacing
def __init__(cls, names, bases, dict_): """Metaclass that allows us to collect decorated periodic tasks.""" super(_PeriodicTasksMeta, cls).__init__(names, bases, dict_) # NOTE(sirp): if the attribute is not present then we must be the base # class, so, go ahead an initialize it. If the attribute is present, # then we're a subclass so make a copy of it so we don't step on our # parent's toes. try: cls._periodic_tasks = cls._periodic_tasks[:] except AttributeError: cls._periodic_tasks = [] try: cls._periodic_spacing = cls._periodic_spacing.copy() except AttributeError: cls._periodic_spacing = {} for value in cls.__dict__.values(): if getattr(value, '_periodic_task', False): task = value name = task.__name__ if task._periodic_spacing < 0: LOG.info( _LI('Skipping periodic task %(task)s because ' 'its interval is negative'), {'task': name}) continue if not task._periodic_enabled: LOG.info( _LI('Skipping periodic task %(task)s because ' 'it is disabled'), {'task': name}) continue # A periodic spacing of zero indicates that this task should # be run on the default interval to avoid running too # frequently. if task._periodic_spacing == 0: task._periodic_spacing = DEFAULT_INTERVAL cls._periodic_tasks.append((name, task)) cls._periodic_spacing[name] = task._periodic_spacing
def remove_external_lock_file(name, lock_file_prefix=None): """Remove an external lock file when it's not used anymore This will be helpful when we have a lot of lock files """ with internal_lock(name): lock_file_path = _get_lock_path(name, lock_file_prefix) try: os.remove(lock_file_path) except OSError: LOG.info(_LI('Failed to remove file %(file)s'), {'file': lock_file_path})
def drop_old_duplicate_entries_from_table(migrate_engine, table_name, use_soft_delete, *uc_column_names): """Drop all old rows having the same values for columns in uc_columns. This method drop (or mark ad `deleted` if use_soft_delete is True) old duplicate rows form table with name `table_name`. :param migrate_engine: Sqlalchemy engine :param table_name: Table with duplicates :param use_soft_delete: If True - values will be marked as `deleted`, if False - values will be removed from table :param uc_column_names: Unique constraint columns """ meta = MetaData() meta.bind = migrate_engine table = Table(table_name, meta, autoload=True) columns_for_group_by = [table.c[name] for name in uc_column_names] columns_for_select = [func.max(table.c.id)] columns_for_select.extend(columns_for_group_by) duplicated_rows_select = select(columns_for_select, group_by=columns_for_group_by, having=func.count(table.c.id) > 1) for row in migrate_engine.execute(duplicated_rows_select): # NOTE(boris-42): Do not remove row that has the biggest ID. delete_condition = table.c.id != row[0] is_none = None # workaround for pyflakes delete_condition &= table.c.deleted_at == is_none for name in uc_column_names: delete_condition &= table.c[name] == row[name] rows_to_delete_select = select([table.c.id]).where(delete_condition) for row in migrate_engine.execute(rows_to_delete_select).fetchall(): LOG.info(_LI("Deleting duplicated row with id: %(id)s from table: " "%(table)s") % dict(id=row[0], table=table_name)) if use_soft_delete: delete_statement = table.update().\ where(delete_condition).\ values({ 'deleted': literal_column('id'), 'updated_at': literal_column('updated_at'), 'deleted_at': timeutils.utcnow() }) else: delete_statement = table.delete().where(delete_condition) migrate_engine.execute(delete_statement)
def stop(self): super(RPCService, self).stop() try: self.rpcserver.stop() self.rpcserver.wait() except Exception as e: LOG.exception(_("Service error occurred when stopping the " "RPC server. Error: %s"), e) try: self.manager.del_host() except Exception as e: LOG.exception(_("Service error occurred when cleaning up " "the RPC manager. Error: %s"), e) LOG.info( _LI("Stopped RPC server for service %(service)s on host " "%(host)s."), {"service": self.topic, "host": self.host}, )
def stop(self): """Terminate child processes and wait on each.""" self.running = False for pid in self.children: try: os.kill(pid, signal.SIGTERM) except OSError as exc: if exc.errno != errno.ESRCH: raise # Wait for children to die if self.children: LOG.info(_LI('Waiting on %d children to exit'), len(self.children)) while self.children: self._wait_child()
def stop(self): super(RPCService, self).stop() try: self.rpcserver.stop() self.rpcserver.wait() except Exception as e: LOG.exception(_('Service error occurred when stopping the ' 'RPC server. Error: %s'), e) try: self.manager.del_host() except Exception as e: LOG.exception(_('Service error occurred when cleaning up ' 'the RPC manager. Error: %s'), e) LOG.info(_LI('Stopped RPC server for service %(service)s on host ' '%(host)s.'), {'service': self.topic, 'host': self.host})
def start(self): super(RPCService, self).start() admin_context = context.RequestContext('admin', 'admin', is_admin=True) self.tg.add_dynamic_timer( self.manager.periodic_tasks, periodic_interval_max=cfg.CONF.periodic_interval, context=admin_context) self.manager.init_host() target = messaging.Target(topic=self.topic, server=self.host) endpoints = [self.manager] serializer = objects_base.IronicObjectSerializer() self.rpcserver = rpc.get_server(target, endpoints, serializer) self.rpcserver.start() LOG.info(_LI('Created RPC server for service %(service)s on host ' '%(host)s.'), {'service': self.topic, 'host': self.host})
def start(self): super(RPCService, self).start() admin_context = context.RequestContext('admin', 'admin', is_admin=True) self.tg.add_dynamic_timer( self.manager.periodic_tasks, periodic_interval_max=cfg.CONF.periodic_interval, context=admin_context) self.manager.init_host() target = messaging.Target(topic=self.topic, server=self.host) endpoints = [self.manager] serializer = objects_base.IronicObjectSerializer() self.rpcserver = rpc.get_server(target, endpoints, serializer) self.rpcserver.start() LOG.info( _LI('Created RPC server for service %(service)s on host ' '%(host)s.'), { 'service': self.topic, 'host': self.host })
def stop(self): super(RPCService, self).stop() try: self.rpcserver.stop() self.rpcserver.wait() except Exception as e: LOG.exception( _('Service error occurred when stopping the ' 'RPC server. Error: %s'), e) try: self.manager.del_host() except Exception as e: LOG.exception( _('Service error occurred when cleaning up ' 'the RPC manager. Error: %s'), e) LOG.info( _LI('Stopped RPC server for service %(service)s on host ' '%(host)s.'), { 'service': self.topic, 'host': self.host })
def initialize_if_enabled(): backdoor_locals = { 'exit': _dont_use_this, # So we don't exit the entire process 'quit': _dont_use_this, # So we don't exit the entire process 'fo': _find_objects, 'pgt': _print_greenthreads, 'pnt': _print_nativethreads, } if CONF.backdoor_port is None: return None start_port, end_port = _parse_port_range(str(CONF.backdoor_port)) # NOTE(johannes): The standard sys.displayhook will print the value of # the last expression and set it to __builtin__._, which overwrites # the __builtin__._ that gettext sets. Let's switch to using pprint # since it won't interact poorly with gettext, and it's easier to # read the output too. def displayhook(val): if val is not None: pprint.pprint(val) sys.displayhook = displayhook sock = _listen('localhost', start_port, end_port, eventlet.listen) # In the case of backdoor port being zero, a port number is assigned by # listen(). In any case, pull the port number out here. port = sock.getsockname()[1] LOG.info( _LI('Eventlet backdoor listening on %(port)s for process %(pid)d') % { 'port': port, 'pid': os.getpid() }) eventlet.spawn_n(eventlet.backdoor.backdoor_server, sock, locals=backdoor_locals) return port
def _child_wait_for_exit_or_signal(self, launcher): status = 0 signo = 0 # NOTE(johannes): All exceptions are caught to ensure this # doesn't fallback into the loop spawning children. It would # be bad for a child to spawn more children. try: launcher.wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Child caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code except BaseException: LOG.exception(_LE('Unhandled exception')) status = 2 finally: launcher.stop() return status, signo
def _set_session_sql_mode(dbapi_con, connection_rec, connection_proxy, sql_mode=None): """Set the sql_mode session variable. MySQL supports several server modes. The default is None, but sessions may choose to enable server modes like TRADITIONAL, ANSI, several STRICT_* modes and others. Note: passing in '' (empty string) for sql_mode clears the SQL mode for the session, overriding a potentially set server default. Passing in None (the default) makes this a no-op, meaning if a server-side SQL mode is set, it still applies. """ cursor = dbapi_con.cursor() if sql_mode is not None: cursor.execute("SET SESSION sql_mode = %s", [sql_mode]) # Check against the real effective SQL mode. Even when unset by # our own config, the server may still be operating in a specific # SQL mode as set by the server configuration cursor.execute("SHOW VARIABLES LIKE 'sql_mode'") row = cursor.fetchone() if row is None: LOG.warning(_LW('Unable to detect effective SQL mode')) return realmode = row[1] LOG.info(_LI('MySQL server mode set to %s') % realmode) # 'TRADITIONAL' mode enables several other modes, so # we need a substring match here if not ('TRADITIONAL' in realmode.upper() or 'STRICT_ALL_TABLES' in realmode.upper()): LOG.warning( _LW("MySQL SQL mode is '%s', " "consider enabling TRADITIONAL or STRICT_ALL_TABLES") % realmode)
def initialize_if_enabled(): backdoor_locals = { 'exit': _dont_use_this, # So we don't exit the entire process 'quit': _dont_use_this, # So we don't exit the entire process 'fo': _find_objects, 'pgt': _print_greenthreads, 'pnt': _print_nativethreads, } if CONF.backdoor_port is None: return None start_port, end_port = _parse_port_range(str(CONF.backdoor_port)) # NOTE(johannes): The standard sys.displayhook will print the value of # the last expression and set it to __builtin__._, which overwrites # the __builtin__._ that gettext sets. Let's switch to using pprint # since it won't interact poorly with gettext, and it's easier to # read the output too. def displayhook(val): if val is not None: pprint.pprint(val) sys.displayhook = displayhook sock = _listen('localhost', start_port, end_port, eventlet.listen) # In the case of backdoor port being zero, a port number is assigned by # listen(). In any case, pull the port number out here. port = sock.getsockname()[1] LOG.info( _LI('Eventlet backdoor listening on %(port)s for process %(pid)d') % {'port': port, 'pid': os.getpid()} ) eventlet.spawn_n(eventlet.backdoor.backdoor_server, sock, locals=backdoor_locals) return port
def init_host(self): self.dbapi = dbapi.get_instance() self.driver_factory = driver_factory.DriverFactory() self.drivers = self.driver_factory.names """List of driver names which this conductor supports.""" try: self.dbapi.register_conductor({'hostname': self.host, 'drivers': self.drivers}) except exception.ConductorAlreadyRegistered: LOG.warn(_("A conductor with hostname %(hostname)s " "was previously registered. Updating registration") % {'hostname': self.host}) self.dbapi.unregister_conductor(self.host) self.dbapi.register_conductor({'hostname': self.host, 'drivers': self.drivers}) self.ring_manager = hash.HashRingManager() """Consistent hash ring which maps drivers to conductors.""" self._worker_pool = greenpool.GreenPool( size=CONF.conductor.workers_pool_size) """GreenPool of background workers for performing tasks async.""" # Spawn a dedicated greenthread for the keepalive try: self._keepalive_evt = threading.Event() self._spawn_worker(self._conductor_service_record_keepalive) LOG.info(_LI('Successfuly started conductor with hostname ' '%(hostname)s.'), {'hostname': self.host}) except exception.NoFreeConductorWorker: with excutils.save_and_reraise_exception(): LOG.critical(_('Failed to start keepalive')) self.del_host()
def launch_service(self, service, workers=1): wrap = ServiceWrapper(service, workers) LOG.info(_LI('Starting %d workers'), wrap.workers) while self.running and len(wrap.children) < wrap.workers: self._start_child(wrap)
def node_power_action(task, state): """Change power state or reset for a node. Perform the requested power action if the transition is required. :param task: a TaskManager instance containing the node to act on. :param state: Any power state from ironic.common.states. If the state is 'REBOOT' then a reboot will be attempted, otherwise the node power state is directly set to 'state'. :raises: InvalidParameterValue when the wrong state is specified or the wrong driver info is specified. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. """ node = task.node context = task.context new_state = states.POWER_ON if state == states.REBOOT else state if state != states.REBOOT: try: curr_state = task.driver.power.get_power_state(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = \ _("Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': new_state, 'error': e} node.save(context) if curr_state == new_state: # Neither the ironic service nor the hardware has erred. The # node is, for some reason, already in the requested state, # though we don't know why. eg, perhaps the user previously # requested the node POWER_ON, the network delayed those IPMI # packets, and they are trying again -- but the node finally # responds to the first request, and so the second request # gets to this check and stops. # This isn't an error, so we'll clear last_error field # (from previous operation), log a warning, and return. node['last_error'] = None node.save(context) LOG.warn(_("Not going to change_node_power_state because " "current state = requested state = '%(state)s'.") % {'state': curr_state}) return # Set the target_power_state and clear any last_error, since we're # starting a new operation. This will expose to other processes # and clients that work is in progress. node['target_power_state'] = new_state node['last_error'] = None node.save(context) # take power action try: if state != states.REBOOT: task.driver.power.set_power_state(task, new_state) else: task.driver.power.reboot(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = \ _("Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': new_state, 'error': e} else: # success! node['power_state'] = new_state LOG.info(_LI('Succesfully set node %(node)s power state to ' '%(state)s.'), {'node': node.uuid, 'state': new_state}) finally: node['target_power_state'] = states.NOSTATE node.save(context)
def node_power_action(task, state): """Change power state or reset for a node. Perform the requested power action if the transition is required. :param task: a TaskManager instance containing the node to act on. :param state: Any power state from ironic.common.states. If the state is 'REBOOT' then a reboot will be attempted, otherwise the node power state is directly set to 'state'. :raises: InvalidParameterValue when the wrong state is specified or the wrong driver info is specified. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. """ node = task.node context = task.context new_state = states.POWER_ON if state == states.REBOOT else state if state != states.REBOOT: try: curr_state = task.driver.power.get_power_state(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = \ _("Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': new_state, 'error': e} node.save(context) if curr_state == new_state: # Neither the ironic service nor the hardware has erred. The # node is, for some reason, already in the requested state, # though we don't know why. eg, perhaps the user previously # requested the node POWER_ON, the network delayed those IPMI # packets, and they are trying again -- but the node finally # responds to the first request, and so the second request # gets to this check and stops. # This isn't an error, so we'll clear last_error field # (from previous operation), log a warning, and return. node['last_error'] = None node.save(context) LOG.warn( _("Not going to change_node_power_state because " "current state = requested state = '%(state)s'.") % {'state': curr_state}) return # Set the target_power_state and clear any last_error, since we're # starting a new operation. This will expose to other processes # and clients that work is in progress. node['target_power_state'] = new_state node['last_error'] = None node.save(context) # take power action try: if state != states.REBOOT: task.driver.power.set_power_state(task, new_state) else: task.driver.power.reboot(task) except Exception as e: with excutils.save_and_reraise_exception(): node['last_error'] = \ _("Failed to change power state to '%(target)s'. " "Error: %(error)s") % { 'target': new_state, 'error': e} else: # success! node['power_state'] = new_state LOG.info( _LI('Succesfully set node %(node)s power state to ' '%(state)s.'), { 'node': node.uuid, 'state': new_state }) finally: node['target_power_state'] = states.NOSTATE node.save(context)