def _execute_ilo_clean_step(node, step, *args, **kwargs): """Executes a particular clean step. :param node: an Ironic node object. :param step: a clean step to be executed. :param args: The args to be passed to the clean step. :param kwargs: The kwargs to be passed to the clean step. :raises: NodeCleaningFailure, on failure to execute step. """ ilo_object = ilo_common.get_ilo_object(node) try: clean_step = getattr(ilo_object, step) except AttributeError: # The specified clean step is not present in the proliantutils # package. Raise exception to update the proliantutils package # to newer version. raise exception.NodeCleaningFailure( _("Clean step '%s' not found. 'proliantutils' package needs to be " "updated.") % step) try: clean_step(*args, **kwargs) except ilo_error.IloCommandNotSupportedError: # This clean step is not supported on Gen8 and below servers. # Log the failure and continue with cleaning. LOG.warning("'%(step)s' clean step is not supported on node " "%(uuid)s. Skipping the clean step.", {'step': step, 'uuid': node.uuid}) except ilo_error.IloError as ilo_exception: raise exception.NodeCleaningFailure(_( "Clean step %(step)s failed " "on node %(node)s with error: %(err)s") % {'node': node.uuid, 'step': step, 'err': ilo_exception})
def _validate_clean_steps(steps, node_uuid): missing = [] for step in steps: name = step.get('name') if not name: missing.append({'name': 'undefined', 'field': 'name'}) continue if 'interface' not in step: missing.append({'name': name, 'field': 'interface'}) args = step.get('args', {}) for arg_name, arg in args.items(): if arg.get('required', False) and 'value' not in arg: missing.append({'name': name, 'field': '%s.value' % arg_name}) if missing: err_string = ', '.join( 'name %(name)s, field %(field)s' % i for i in missing) msg = _("Malformed clean_steps file: %s") % err_string LOG.error(msg) raise exception.NodeCleaningFailure(node=node_uuid, reason=msg) if len(set(s['name'] for s in steps)) != len(steps): msg = _("Cleaning steps do not have unique names.") LOG.error(msg) raise exception.NodeCleaningFailure(node=node_uuid, reason=msg)
def delete_cleaning_ports(self, task): """Deletes the neutron port created for booting the ramdisk. :param task: a TaskManager instance. """ neutron_client = _build_client(task.context.auth_token) macs = [p.address for p in task.ports] params = { 'network_id': CONF.neutron.cleaning_network_uuid } try: ports = neutron_client.list_ports(**params) except neutron_client_exc.ConnectionFailed as e: msg = (_('Could not get cleaning network vif for %(node)s ' 'from Neutron, possible network issue. %(exc)s') % {'node': task.node.uuid, 'exc': e}) LOG.exception(msg) raise exception.NodeCleaningFailure(msg) # Iterate the list of Neutron port dicts, remove the ones we added for neutron_port in ports.get('ports', []): # Only delete ports using the node's mac addresses if neutron_port.get('mac_address') in macs: try: neutron_client.delete_port(neutron_port.get('id')) except neutron_client_exc.ConnectionFailed as e: msg = (_('Could not remove cleaning ports on network ' '%(net)s from %(node)s, possible network issue. ' '%(exc)s') % {'net': CONF.neutron.cleaning_network_uuid, 'node': task.node.uuid, 'exc': e}) LOG.exception(msg) raise exception.NodeCleaningFailure(msg)
def _execute_pre_boot_bios_step(self, task, step, data=None): """Perform operations required prior to the reboot. Depending on the step, it executes the operations required and moves the node to CLEANWAIT or DEPLOYWAIT state prior to reboot based on the operation being performed. :param task: a task from TaskManager. :param step: name of the clean step to be performed :param data: if the clean step is apply_configuration it holds the settings data. :raises: NodeCleaningFailure, on failure to execute of clean step. :raises: InstanceDeployFailure, on failure to execute of deploy step. """ node = task.node if step not in ('apply_configuration', 'factory_reset'): errmsg = (_('Could not find the step %(step)s for the ' 'node %(node)s.') % { 'step': step, 'node': node.uuid }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) try: ilo_object = ilo_common.get_ilo_object(node) ilo_object.set_bios_settings(data) if step == ( 'apply_configuration') else ilo_object.reset_bios_to_default() except (exception.MissingParameterValue, exception.InvalidParameterValue, ilo_error.IloError, ilo_error.IloCommandNotSupportedError) as ir_exception: errmsg = (_('Step %(step)s failed ' 'on the node %(node)s with error: %(err)s') % { 'step': step, 'node': node.uuid, 'err': ir_exception }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) deploy_opts = deploy_utils.build_agent_options(node) task.driver.boot.prepare_ramdisk(task, deploy_opts) manager_utils.node_power_action(task, states.REBOOT) deploy_utils.set_async_step_flags(node, reboot=True, skip_current_step=False) driver_internal_info = node.driver_internal_info if step == 'apply_configuration': driver_internal_info['apply_bios'] = True else: driver_internal_info['reset_bios'] = True node.driver_internal_info = driver_internal_info node.save() return deploy_utils.get_async_step_return_state(node)
def _refresh_clean_steps(self, task): """Refresh the node's cached clean steps from the booted agent. Gets the node's clean steps from the booted agent and caches them. The steps are cached to make get_clean_steps() calls synchronous, and should be refreshed as soon as the agent boots to start cleaning or if cleaning is restarted because of a cleaning version mismatch. :param task: a TaskManager instance :raises: NodeCleaningFailure if the agent returns invalid results """ node = task.node previous_steps = node.driver_internal_info.get( 'agent_cached_clean_steps') LOG.debug('Refreshing agent clean step cache for node %(node)s. ' 'Previously cached steps: %(steps)s', {'node': node.uuid, 'steps': previous_steps}) agent_result = self._client.get_clean_steps(node, task.ports).get( 'command_result', {}) missing = set(['clean_steps', 'hardware_manager_version']).difference( agent_result) if missing: raise exception.NodeCleaningFailure(_( 'agent get_clean_steps for node %(node)s returned an invalid ' 'result. Keys: %(keys)s are missing from result: %(result)s.') % ({'node': node.uuid, 'keys': missing, 'result': agent_result})) # agent_result['clean_steps'] looks like # {'HardwareManager': [{step1},{steps2}...], ...} steps = collections.defaultdict(list) for step_list in agent_result['clean_steps'].values(): for step in step_list: missing = set(['interface', 'step', 'priority']).difference( step) if missing: raise exception.NodeCleaningFailure(_( 'agent get_clean_steps for node %(node)s returned an ' 'invalid clean step. Keys: %(keys)s are missing from ' 'step: %(step)s.') % ({'node': node.uuid, 'keys': missing, 'step': step})) steps[step['interface']].append(step) # Save hardware manager version, steps, and date info = node.driver_internal_info info['hardware_manager_version'] = agent_result[ 'hardware_manager_version'] info['agent_cached_clean_steps'] = dict(steps) info['agent_cached_clean_steps_refreshed'] = str(timeutils.utcnow()) node.driver_internal_info = info node.save() LOG.debug('Refreshed agent clean step cache for node %(node)s: ' '%(steps)s', {'node': node.uuid, 'steps': steps})
def _execute_post_boot_bios_step(self, task, step): """Perform operations required after the reboot. Caches BIOS settings in the database and clear the flags assocated with the clean step post reboot. :param task: a task from TaskManager. :param step: name of the clean step to be performed :raises: NodeCleaningFailure, on failure to execute of clean step. :raises: InstanceDeployFailure, on failure to execute of deploy step. """ node = task.node driver_internal_info = node.driver_internal_info driver_internal_info.pop('apply_bios', None) driver_internal_info.pop('reset_bios', None) task.node.driver_internal_info = driver_internal_info task.node.save() if step not in ('apply_configuration', 'factory_reset'): errmsg = (_('Could not find the step %(step)s for the ' 'node %(node)s.') % { 'step': step, 'node': node.uuid }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) try: ilo_object = ilo_common.get_ilo_object(node) status = ilo_object.get_bios_settings_result() except (exception.MissingParameterValue, exception.InvalidParameterValue, ilo_error.IloError, ilo_error.IloCommandNotSupportedError) as ir_exception: errmsg = (_('Step %(step)s failed ' 'on the node %(node)s with error: %(err)s') % { 'step': step, 'node': node.uuid, 'err': ir_exception }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) if status.get('status') == 'failed': errmsg = (_('Step %(step)s failed ' 'on the node %(node)s with error: %(err)s') % { 'step': step, 'node': node.uuid, 'err': status.get('results') }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg)
def prepare_cleaning(oneview_client, task): """Applies Server Profile and update the node when preparing cleaning. This method is responsible for applying a Server Profile to the Server Hardware and add the uri of the applied Server Profile in the node's 'applied_server_profile_uri' field on properties/capabilities. :param oneview_client: an instance of the OneView client :param task: A TaskManager object :raises NodeCleaningFailure: If the node doesn't have the needed OneView informations, if Server Hardware is in use by an OneView user, or if the Server Profile can't be applied. """ try: server_profile_name = "Ironic Cleaning [%s]" % task.node.uuid allocate_server_hardware_to_ironic(oneview_client, task.node, server_profile_name) except exception.OneViewError as e: oneview_error = common.SERVER_HARDWARE_ALLOCATION_ERROR driver_internal_info = task.node.driver_internal_info driver_internal_info['oneview_error'] = oneview_error task.node.driver_internal_info = driver_internal_info task.node.save() raise exception.NodeCleaningFailure(node=task.node.uuid, reason=e)
def agent_get_clean_steps(task): """Get the list of clean steps from the agent. #TODO(JoshNang) move to BootInterface :param task: a TaskManager object containing the node :raises: NodeCleaningFailure if the agent returns invalid results :returns: A list of clean step dictionaries """ client = agent_client.AgentClient() ports = objects.Port.list_by_node_id( task.context, task.node.id) result = client.get_clean_steps(task.node, ports).get('command_result') if ('clean_steps' not in result or 'hardware_manager_version' not in result): raise exception.NodeCleaningFailure(_( 'get_clean_steps for node %(node)s returned invalid result:' ' %(result)s') % ({'node': task.node.uuid, 'result': result})) driver_internal_info = task.node.driver_internal_info driver_internal_info['hardware_manager_version'] = result[ 'hardware_manager_version'] task.node.driver_internal_info = driver_internal_info task.node.save() # Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..} # Flatten clean steps into one list steps_list = [step for step_list in result['clean_steps'].values() for step in step_list] # Filter steps to only return deploy steps steps = [step for step in steps_list if step.get('interface') == 'deploy'] return steps
def delete_configuration(self, task): """Delete the RAID configuration. :param task: a TaskManager instance containing the node to act on. :raises: NodeCleaningFailure, on failure to execute clean step. :raises: InstanceDeployFailure, on failure to execute deploy step. """ node = task.node LOG.debug("OOB RAID delete_configuration invoked for node %s.", node.uuid) driver_internal_info = node.driver_internal_info ilo_object = ilo_common.get_ilo_object(node) try: # Raid configuration in progress, checking status if not driver_internal_info.get('ilo_raid_delete_in_progress'): ilo_object.delete_raid_configuration() self._prepare_for_read_raid(task, 'delete_raid') return deploy_utils.get_async_step_return_state(node) else: # Raid configuration is done, updating raid_config raid_conf = ilo_object.read_raid_configuration() fields = ['ilo_raid_delete_in_progress'] if node.clean_step: fields.append('skip_current_clean_step') else: fields.append('skip_current_deploy_step') self._pop_driver_internal_values(task, *fields) if not len(raid_conf['logical_disks']): node.raid_config = {} LOG.debug("Node %(uuid)s raid delete clean step is done.", {'uuid': node.uuid}) else: # Raid configuration failed err_msg = (_("Step delete_configuration failed " "on node %(node)s with error: " "Unable to delete these logical disks: " "%(disks)s") % {'node': node.uuid, 'disks': raid_conf['logical_disks']}) if node.clean_step: raise exception.NodeCleaningFailure(err_msg) else: raise exception.InstanceDeployFailure(reason=err_msg) except ilo_error.IloLogicalDriveNotFoundError: LOG.info("No logical drive found to delete on node %(node)s", {'node': node.uuid}) except ilo_error.IloError as ilo_exception: operation = (_("Failed to delete raid configuration on node %s") % node.uuid) self._pop_driver_internal_values(task, 'ilo_raid_delete_in_progress', 'skip_current_clean_step') fields = ['ilo_raid_delete_in_progress'] if node.clean_step: fields.append('skip_current_clean_step') else: fields.append('skip_current_deploy_step') self._pop_driver_internal_values(task, *fields) self._set_step_failed(task, operation, ilo_exception)
def apply_configuration(self, task, settings): """Applies the provided configuration on the node. :param task: a TaskManager instance. :param settings: Settings intended to be applied on the node. :raises: NodeCleaningFailure when applying the configuration on the node fails. """ data = {} for setting in settings: data.update({setting['name']: setting['value']}) node = task.node errmsg = _("Clean step \"apply_configuration\" failed " "on node %(node)s with error: %(err)s") try: ilo_object = ilo_common.get_ilo_object(node) ilo_object.set_bios_settings(data) except (exception.MissingParameterValue, exception.InvalidParameterValue, ilo_error.IloError, ilo_error.IloCommandNotSupportedError) as ir_exception: raise exception.NodeCleaningFailure( errmsg % {'node': node.uuid, 'err': ir_exception})
def test__validate_user_clean_steps_get_steps_exception(self, mock_steps): node = obj_utils.create_test_node(self.context) mock_steps.side_effect = exception.NodeCleaningFailure('bad') with task_manager.acquire(self.context, node.uuid) as task: self.assertRaises(exception.NodeCleaningFailure, conductor_steps._validate_user_clean_steps, task, []) mock_steps.assert_called_once_with(task, enabled=False, sort=False)
def create_cleaning_ports(self, task): """Create neutron ports for each port on task.node to boot the ramdisk. :param task: a TaskManager instance. :raises: InvalidParameterValue if the cleaning network is None :returns: a dictionary in the form {port.uuid: neutron_port['id']} """ if not CONF.neutron.cleaning_network_uuid: raise exception.InvalidParameterValue( _('Valid cleaning network ' 'UUID not provided')) neutron_client = _build_client(task.context.auth_token) body = { 'port': { 'network_id': CONF.neutron.cleaning_network_uuid, 'admin_state_up': True, } } ports = {} for ironic_port in task.ports: body['port']['mac_address'] = ironic_port.address try: port = neutron_client.create_port(body) except neutron_client_exc.ConnectionFailed as e: self._rollback_cleaning_ports(task) msg = (_('Could not create cleaning port on network %(net)s ' 'from %(node)s. %(exc)s') % { 'net': CONF.neutron.cleaning_network_uuid, 'node': task.node.uuid, 'exc': e }) LOG.exception(msg) raise exception.NodeCleaningFailure(msg) if not port.get('port') or not port['port'].get('id'): self._rollback_cleaning_ports(task) msg = (_('Failed to create cleaning ports for node ' '%(node)s') % { 'node': task.node.uuid }) LOG.error(msg) raise exception.NodeCleaningFailure(msg) # Match return value of get_node_vif_ids() ports[ironic_port.uuid] = port['port']['id'] return ports
def restore_irmc_bios_config(self, task): """Restore BIOS config for a node. :param task: a task from TaskManager. :raises: NodeCleaningFailure, on failure to execute step. :returns: None. """ try: _restore_bios_config(task) except exception.IRMCOperationError as e: raise exception.NodeCleaningFailure(node=task.node.uuid, reason=e)
def agent_get_clean_steps(task, interface=None, override_priorities=None): """Get the list of clean steps from the agent. #TODO(JoshNang) move to BootInterface :param task: a TaskManager object containing the node :param interface: The interface for which clean steps are to be returned. If this is not provided, it returns the clean steps for all interfaces. :param override_priorities: a dictionary with keys being step names and values being new priorities for them. If a step isn't in this dictionary, the step's original priority is used. :raises: NodeCleaningFailure if the agent returns invalid results :returns: A list of clean step dictionaries """ override_priorities = override_priorities or {} client = agent_client.AgentClient() ports = objects.Port.list_by_node_id(task.context, task.node.id) result = client.get_clean_steps(task.node, ports).get('command_result') if ('clean_steps' not in result or 'hardware_manager_version' not in result): raise exception.NodeCleaningFailure( _('get_clean_steps for node %(node)s returned invalid result:' ' %(result)s') % ({ 'node': task.node.uuid, 'result': result })) driver_internal_info = task.node.driver_internal_info driver_internal_info['hardware_manager_version'] = result[ 'hardware_manager_version'] task.node.driver_internal_info = driver_internal_info task.node.save() # Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..} # Flatten clean steps into one list steps_list = [ step for step_list in result['clean_steps'].values() for step in step_list ] result = [] for step in steps_list: if interface and step.get('interface') != interface: continue new_priority = override_priorities.get(step.get('step')) if new_priority is not None: step['priority'] = new_priority result.append(step) return result
def delete_configuration(self, task): """Delete the RAID configuration. :param task: a TaskManager instance containing the node to act on. :raises: NodeCleaningFailure, on failure to execute step. """ node = task.node LOG.debug("OOB RAID delete_configuration invoked for node %s.", node.uuid) driver_internal_info = node.driver_internal_info ilo_object = ilo_common.get_ilo_object(node) try: # Raid configuration in progress, checking status if not driver_internal_info.get('ilo_raid_delete_in_progress'): ilo_object.delete_raid_configuration() self._prepare_for_read_raid(task, 'delete_raid') return states.CLEANWAIT else: # Raid configuration is done, updating raid_config raid_conf = ilo_object.read_raid_configuration() if not len(raid_conf['logical_disks']): node.raid_config = {} LOG.debug("Node %(uuid)s raid delete clean step is done.", {'uuid': node.uuid}) self._pop_driver_internal_values( task, 'ilo_raid_delete_in_progress', 'skip_current_clean_step') else: # Raid configuration failed msg = ("Unable to delete this logical disks: %s" % raid_conf['logical_disks']) self._pop_driver_internal_values( task, 'ilo_raid_delete_in_progress', 'skip_current_clean_step') raise exception.NodeCleaningFailure( "Clean step delete_configuration failed " "on node %(node)s with error: %(err)s" % {'node': node.uuid, 'err': msg}) except ilo_error.IloLogicalDriveNotFoundError: LOG.info("No logical drive found to delete on node %(node)s", {'node': node.uuid}) except ilo_error.IloError as ilo_exception: operation = (_("Failed to delete raid configuration on node %s") % node.uuid) self._pop_driver_internal_values(task, 'ilo_raid_delete_in_progress', 'skip_current_clean_step') self._set_clean_failed(task, operation, ilo_exception)
def cache_bios_settings(self, task): """Store the BIOS settings in the database. :param task: a TaskManager instance. :raises: NodeCleaningFailure, on failure to execute of clean step. :raises: InstanceDeployFailure, on failure to execute of deploy step. """ node = task.node nodeid = node.id try: ilo_object = ilo_common.get_ilo_object(node) bios_settings = ilo_object.get_current_bios_settings() except (exception.MissingParameterValue, exception.InvalidParameterValue, ilo_error.IloError, ilo_error.IloCommandNotSupportedError) as ir_exception: errmsg = (_("Caching BIOS settings failed " "on node %(node)s with error: %(err)s") % { 'node': node.uuid, 'err': ir_exception }) if node.clean_step: raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) fmt_bios_settings = [] for setting in bios_settings: fmt_bios_settings.append({ "name": setting, "value": bios_settings[setting] }) create_list, update_list, delete_list, nochange_list = ( objects.BIOSSettingList.sync_node_setting(task.context, nodeid, fmt_bios_settings)) if len(create_list) > 0: objects.BIOSSettingList.create(task.context, nodeid, create_list) if len(update_list) > 0: objects.BIOSSettingList.save(task.context, nodeid, update_list) if len(delete_list) > 0: delete_name_list = [ delete_name.get("name") for delete_name in delete_list ] objects.BIOSSettingList.delete(task.context, nodeid, delete_name_list)
def _validate_clean_steps(steps, node_uuid): missing = [] for step in steps: name = step.setdefault('name', 'unnamed') if 'interface' not in step: missing.append({'name': name, 'field': 'interface'}) args = step.get('args', {}) for arg_name, arg in args.items(): if arg.get('required', False) and 'value' not in arg: missing.append({'name': name, 'field': '%s.value' % arg_name}) if missing: err_string = ', '.join('name %(name)s, field %(field)s' % i for i in missing) msg = _("Malformed clean_steps file: %s") % err_string LOG.error(msg) raise exception.NodeCleaningFailure(node=node_uuid, reason=msg)
def tear_down_cleaning(task): """Remove Server profile and update the node when tear down cleaning. This method is responsible for power a Server Hardware off, remove a Server Profile from the Server Hardware and remove the uri of the applied Server Profile from the node's 'applied_server_profile_uri' in properties/capabilities. :param task: A TaskManager object :raises NodeCleaningFailure: If node has no uri of applied Server Profile, or if some error occur while deleting Server Profile. """ try: deallocate_server_hardware_from_ironic(task) except exception.OneViewError as e: raise exception.NodeCleaningFailure(node=task.node.uuid, reason=e)
def _get_clean_steps(node, interface=None, override_priorities=None): """Get cleaning steps.""" clean_steps_file = node.driver_info.get( 'ansible_clean_steps_config', CONF.ansible.default_clean_steps_config) path = os.path.join( node.driver_info.get('ansible_playbooks_path', CONF.ansible.playbooks_path), os.path.basename(clean_steps_file)) try: with open(path) as f: internal_steps = yaml.safe_load(f) except Exception as e: msg = _('Failed to load clean steps from file ' '%(file)s: %(exc)s') % { 'file': path, 'exc': e } raise exception.NodeCleaningFailure(node=node.uuid, reason=msg) _validate_clean_steps(internal_steps, node.uuid) steps = [] override = override_priorities or {} for params in internal_steps: name = params['name'] clean_if = params['interface'] if interface is not None and interface != clean_if: continue new_priority = override.get(name) priority = (new_priority if new_priority is not None else params.get( 'priority', 0)) args = {} argsinfo = params.get('args', {}) for arg, arg_info in argsinfo.items(): args[arg] = arg_info.pop('value', None) step = { 'interface': clean_if, 'step': name, 'priority': priority, 'abortable': False, 'argsinfo': argsinfo, 'args': args } steps.append(step) return steps
def agent_get_clean_steps(task, interface=None, override_priorities=None): """Get the list of cached clean steps from the agent. #TODO(JoshNang) move to BootInterface The clean steps cache is updated at the beginning of cleaning. :param task: a TaskManager object containing the node :param interface: The interface for which clean steps are to be returned. If this is not provided, it returns the clean steps for all interfaces. :param override_priorities: a dictionary with keys being step names and values being new priorities for them. If a step isn't in this dictionary, the step's original priority is used. :raises NodeCleaningFailure: if the clean steps are not yet cached, for example, when a node has just been enrolled and has not been cleaned yet. :returns: A list of clean step dictionaries """ node = task.node try: all_steps = node.driver_internal_info['agent_cached_clean_steps'] except KeyError: raise exception.NodeCleaningFailure( _('Cleaning steps are not yet ' 'available for node %(node)s') % {'node': node.uuid}) if interface: steps = [step.copy() for step in all_steps.get(interface, [])] else: steps = [ step.copy() for step_list in all_steps.values() for step in step_list ] if not steps or not override_priorities: return steps for step in steps: new_priority = override_priorities.get(step.get('step')) if new_priority is not None: step['priority'] = new_priority return steps
def execute_clean_step(self, task, step): """Execute a clean step. :param task: a TaskManager object containing the node :param step: a clean step dictionary to execute :returns: None """ node = task.node playbook, user, key = _parse_ansible_driver_info(task.node, action='clean') stepname = step['step'] try: ip_addr = node.driver_internal_info['ansible_cleaning_ip'] except KeyError: raise exception.NodeCleaningFailure(node=node.uuid, reason='undefined node IP ' 'addresses') node_list = [(node.uuid, ip_addr, user, node.extra)] extra_vars = _prepare_extra_vars(node_list) LOG.debug('Starting cleaning step %(step)s on node %(node)s', { 'node': node.uuid, 'step': stepname }) step_tags = step['args'].get('tags', []) try: _run_playbook(playbook, extra_vars, key, tags=step_tags) except exception.InstanceDeployFailure as e: LOG.error( _LE("Ansible failed cleaning step %(step)s " "on node %(node)s."), { 'node': node.uuid, 'step': stepname }) manager_utils.cleaning_error_handler(task, six.text_type(e)) else: LOG.info( _LI('Ansible completed cleaning step %(step)s ' 'on node %(node)s.'), { 'node': node.uuid, 'step': stepname })
def agent_execute_clean_step(task, step): """Execute a clean step asynchronously on the agent. #TODO(JoshNang) move to BootInterface :param task: a TaskManager object containing the node :param step: a clean step dictionary to execute :raises: NodeCleaningFailure if the agent does not return a command status :returns: states.CLEANWAIT to signify the step will be completed async """ client = agent_client.AgentClient() ports = objects.Port.list_by_node_id( task.context, task.node.id) result = client.execute_clean_step(step, task.node, ports) if not result.get('command_status'): raise exception.NodeCleaningFailure(_( 'Agent on node %(node)s returned bad command result: ' '%(result)s') % {'node': task.node.uuid, 'result': result.get('command_error')}) return states.CLEANWAIT
def test_update_firmware_cleans_all_files_if_exc_thrown( self, remove_mock, FirmwareProcessor_mock, clean_step_mock, LOG_mock): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: # | GIVEN | firmware_update_args = { 'firmware_update_mode': 'ilo', 'firmware_images': [{ 'url': 'any_valid_url', 'checksum': 'xxxx', 'component': 'ilo' }, { 'url': 'any_invalid_url', 'checksum': 'xxxx', 'component': 'bios' }] } fw_loc_obj_1 = ( ilo_management.firmware_processor.FirmwareImageLocation( 'extracted_firmware_url_1', 'filename_1')) fw_loc_obj_2 = ( ilo_management.firmware_processor.FirmwareImageLocation( 'extracted_firmware_url_2', 'filename_2')) FirmwareProcessor_mock.return_value.process_fw_on.side_effect = [ fw_loc_obj_1, fw_loc_obj_2 ] clean_step_mock.side_effect = exception.NodeCleaningFailure( node=self.node.uuid, reason='ilo_exc') # | WHEN & THEN | self.assertRaises(exception.NodeCleaningFailure, task.driver.management.update_firmware, task, **firmware_update_args) clean_step_mock.assert_called_once_with( task.node, 'update_firmware', 'extracted_firmware_url_1', 'ilo') self.assertTrue(LOG_mock.error.called) remove_mock.assert_has_calls( [mock.call(fw_loc_obj_1), mock.call(fw_loc_obj_2)])
def prepare_cleaning_ports(task): """Prepare the Ironic ports of the node for cleaning. This method deletes the cleaning ports currently existing for all the ports of the node and then creates a new one for each one of them. It also adds 'vif_port_id' to port.extra of each Ironic port, after creating the cleaning ports. :param task: a TaskManager object containing the node :raises NodeCleaningFailure: if the previous cleaning ports cannot be removed or if new cleaning ports cannot be created """ provider = dhcp_factory.DHCPFactory() # If we have left over ports from a previous cleaning, remove them if getattr(provider.provider, 'delete_cleaning_ports', None): # Allow to raise if it fails, is caught and handled in conductor provider.provider.delete_cleaning_ports(task) # Create cleaning ports if necessary if getattr(provider.provider, 'create_cleaning_ports', None): # Allow to raise if it fails, is caught and handled in conductor ports = provider.provider.create_cleaning_ports(task) # Add vif_port_id for each of the ports because some boot # interfaces expects these to prepare for booting ramdisk. for port in task.ports: extra_dict = port.extra try: extra_dict['vif_port_id'] = ports[port.uuid] except KeyError: # This is an internal error in Ironic. All DHCP providers # implementing create_cleaning_ports are supposed to # return a VIF port ID for all Ironic ports. But # that doesn't seem to be true here. error = (_("When creating cleaning ports, DHCP provider " "didn't return VIF port ID for %s") % port.uuid) raise exception.NodeCleaningFailure( node=task.node.uuid, reason=error) else: port.extra = extra_dict port.save()
def factory_reset(self, task): """Reset the BIOS settings to factory configuration. :param task: a TaskManager instance. :raises: NodeCleaningFailure when IloError or any other exception is caught. """ node = task.node errmsg = _("Clean step \"factory_reset\" failed " "on node %(node)s with error: %(err)s") try: ilo_object = ilo_common.get_ilo_object(node) ilo_object.reset_bios_to_default() except (exception.MissingParameterValue, exception.InvalidParameterValue, ilo_error.IloError, ilo_error.IloCommandNotSupportedError) as ir_exception: raise exception.NodeCleaningFailure( errmsg % {'node': node.uuid, 'err': ir_exception})
def __do_node_clean_steps_fail(self, mock_steps, mock_validate, clean_steps=None, invalid_exc=True): if invalid_exc: mock_steps.side_effect = exception.InvalidParameterValue('invalid') else: mock_steps.side_effect = exception.NodeCleaningFailure('failure') tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake-hardware', uuid=uuidutils.generate_uuid(), provision_state=states.CLEANING, target_provision_state=tgt_prov_state) with task_manager.acquire(self.context, node.uuid, shared=False) as task: cleaning.do_node_clean(task, clean_steps=clean_steps) mock_validate.assert_called_once_with(mock.ANY, task) node.refresh() self.assertEqual(states.CLEANFAIL, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) mock_steps.assert_called_once_with(mock.ANY)
def update_firmware(self, task, **kwargs): """Updates the firmware. :param task: a TaskManager object. :raises: InvalidParameterValue if update firmware mode is not 'ilo'. Even applicable for invalid input cases. :raises: NodeCleaningFailure, on failure to execute step. """ node = task.node fw_location_objs_n_components = [] firmware_images = kwargs['firmware_images'] # Note(deray): Processing of firmware images happens here. As part # of processing checksum validation is also done for the firmware file. # Processing of firmware file essentially means downloading the file # on the conductor, validating the checksum of the downloaded content, # extracting the raw firmware file from its compact format, if it is, # and hosting the file on a web server or a swift store based on the # need of the baremetal server iLO firmware update method. try: for firmware_image_info in firmware_images: url, checksum, component = ( firmware_processor.get_and_validate_firmware_image_info( firmware_image_info, kwargs['firmware_update_mode'])) LOG.debug( "Processing of firmware file: %(firmware_file)s on " "node: %(node)s ... in progress", { 'firmware_file': url, 'node': node.uuid }) fw_processor = firmware_processor.FirmwareProcessor(url) fw_location_obj = fw_processor.process_fw_on(node, checksum) fw_location_objs_n_components.append( (fw_location_obj, component)) LOG.debug( "Processing of firmware file: %(firmware_file)s on " "node: %(node)s ... done", { 'firmware_file': url, 'node': node.uuid }) except exception.IronicException as ilo_exc: # delete all the files extracted so far from the extracted list # and re-raise the exception for fw_loc_obj_n_comp_tup in fw_location_objs_n_components: fw_loc_obj_n_comp_tup[0].remove() LOG.error( "Processing of firmware image: %(firmware_image)s " "on node: %(node)s ... failed", { 'firmware_image': firmware_image_info, 'node': node.uuid }) raise exception.NodeCleaningFailure(node=node.uuid, reason=ilo_exc) # Updating of firmware images happen here. try: for fw_location_obj, component in fw_location_objs_n_components: fw_location = fw_location_obj.fw_image_location LOG.debug( "Firmware update for %(firmware_file)s on " "node: %(node)s ... in progress", { 'firmware_file': fw_location, 'node': node.uuid }) _execute_ilo_clean_step(node, 'update_firmware', fw_location, component) LOG.debug( "Firmware update for %(firmware_file)s on " "node: %(node)s ... done", { 'firmware_file': fw_location, 'node': node.uuid }) except exception.NodeCleaningFailure: with excutils.save_and_reraise_exception(): LOG.error( "Firmware update for %(firmware_file)s on " "node: %(node)s failed.", { 'firmware_file': fw_location, 'node': node.uuid }) finally: for fw_loc_obj_n_comp_tup in fw_location_objs_n_components: fw_loc_obj_n_comp_tup[0].remove() LOG.info( "All Firmware update operations completed successfully " "for node: %s.", node.uuid)
def create_configuration(self, task, create_root_volume=True, create_nonroot_volumes=True): """Create a RAID configuration on a bare metal using agent ramdisk. This method creates a RAID configuration on the given node. :param task: a TaskManager instance. :param create_root_volume: If True, a root volume is created during RAID configuration. Otherwise, no root volume is created. Default is True. :param create_nonroot_volumes: If True, non-root volumes are created. If False, no non-root volumes are created. Default is True. :raises: MissingParameterValue, if node.target_raid_config is missing or was found to be empty after skipping root volume and/or non-root volumes. :raises: NodeCleaningFailure, on failure to execute clean step. :raises: InstanceDeployFailure, on failure to execute deploy step. """ node = task.node target_raid_config = raid.filter_target_raid_config( node, create_root_volume=create_root_volume, create_nonroot_volumes=create_nonroot_volumes) driver_internal_info = node.driver_internal_info driver_internal_info['target_raid_config'] = target_raid_config node.driver_internal_info = driver_internal_info node.save() LOG.debug( "Calling OOB RAID create_configuration for node %(node)s " "with the following target RAID configuration: %(target)s", { 'node': node.uuid, 'target': target_raid_config }) ilo_object = ilo_common.get_ilo_object(node) try: # Raid configuration in progress, checking status if not driver_internal_info.get('ilo_raid_create_in_progress'): ilo_object.create_raid_configuration(target_raid_config) self._prepare_for_read_raid(task, 'create_raid') return deploy_utils.get_async_step_return_state(node) else: # Raid configuration is done, updating raid_config raid_conf = (ilo_object.read_raid_configuration( raid_config=target_raid_config)) fields = ['ilo_raid_create_in_progress'] if node.clean_step: fields.append('skip_current_clean_step') else: fields.append('skip_current_deploy_step') self._pop_driver_internal_values(task, *fields) if len(raid_conf['logical_disks']): raid.update_raid_info(node, raid_conf) LOG.debug("Node %(uuid)s raid create clean step is done.", {'uuid': node.uuid}) else: # Raid configuration failed msg = (_("Step create_configuration failed " "on node %(node)s with error: " "Unable to create raid") % { 'node': node.uuid }) if node.clean_step: raise exception.NodeCleaningFailure(msg) else: raise exception.InstanceDeployFailure(reason=msg) except ilo_error.IloError as ilo_exception: operation = (_("Failed to create raid configuration on node %s") % node.uuid) fields = ['ilo_raid_create_in_progress'] if node.clean_step: fields.append('skip_current_clean_step') else: fields.append('skip_current_deploy_step') self._pop_driver_internal_values(task, *fields) self._set_step_failed(task, operation, ilo_exception)