def _get_pxe_dev_nics(self, client, nics, node): """Get a list of pxe device interfaces. :param client: Dracclient to list the bios settings and nics :param nics: list of nics :returns: Returns list of pxe device interfaces. """ pxe_dev_nics = [] pxe_params = [ "PxeDev1EnDis", "PxeDev2EnDis", "PxeDev3EnDis", "PxeDev4EnDis" ] pxe_nics = [ "PxeDev1Interface", "PxeDev2Interface", "PxeDev3Interface", "PxeDev4Interface" ] try: bios_settings = client.list_bios_settings() except drac_exceptions.BaseClientException as exc: LOG.error( 'DRAC driver failed to list bios settings ' 'for %(node_uuid)s. Reason: %(error)s.', { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) if bios_settings["BootMode"].current_value == "Uefi": for param, nic in zip(pxe_params, pxe_nics): if param in bios_settings and bios_settings[ param].current_value == "Enabled": pxe_dev_nics.append(bios_settings[nic].current_value) elif bios_settings["BootMode"].current_value == "Bios": for nic in nics: try: nic_cap = client.list_nic_settings(nic_id=nic.id) except drac_exceptions.BaseClientException as exc: LOG.error( 'DRAC driver failed to list nic settings ' 'for %(node_uuid)s. Reason: %(error)s.', { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) if ("LegacyBootProto" in nic_cap and nic_cap['LegacyBootProto'].current_value == "PXE"): pxe_dev_nics.append(nic.id) return pxe_dev_nics
def _inspect_hardware(node): """Inspect the node and get hardware information. :param node: node object. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties(report, IRMCInspect.ESSENTIAL_PROPERTIES) macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % { 'node_id': node.uuid, 'error': e }) raise exception.HardwareInspectionFailure(error=error) return (props, macs)
def _inspect_hardware(node, **kwargs): """Inspect the node and get hardware information. :param node: node object. :param kwargs: the dictionary of additional parameters. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: a pair of dictionary and list, the dictionary contains keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected values, the list contains mac addresses. """ capabilities_props = set(CAPABILITIES_PROPERTIES) # Remove all capabilities item which will be inspected in the existing # capabilities of node if 'capabilities' in node.properties: existing_cap = node.properties['capabilities'].split(',') for item in capabilities_props: for prop in existing_cap: if item == prop.split(':')[0]: existing_cap.remove(prop) node.properties['capabilities'] = ",".join(existing_cap) # get gpu_ids in ironic configuration values = [gpu_id.lower() for gpu_id in CONF.irmc.gpu_ids] # if gpu_ids = [], pci_gpu_devices will not be inspected if len(values) == 0: capabilities_props.remove('pci_gpu_devices') try: report = irmc_common.get_irmc_report(node) props = scci.get_essential_properties(report, IRMCInspect.ESSENTIAL_PROPERTIES) d_info = irmc_common.parse_driver_info(node) capabilities = scci.get_capabilities_properties( d_info, capabilities_props, values, **kwargs) if capabilities: if capabilities.get('pci_gpu_devices') == 0: capabilities.pop('pci_gpu_devices') if capabilities.get('trusted_boot') is False: capabilities.pop('trusted_boot') capabilities = utils.get_updated_capabilities( node.properties.get('capabilities'), capabilities) if capabilities: props['capabilities'] = capabilities macs = _get_mac_addresses(node) except (scci.SCCIInvalidInputError, scci.SCCIClientError, exception.SNMPFailure) as e: error = (_("Inspection failed for node %(node_id)s " "with the following error: %(error)s") % { 'node_id': node.uuid, 'error': e }) raise exception.HardwareInspectionFailure(error=error) return (props, macs)
def _inspection_error_handler(task, error, raise_exc=False, clean_up=True): if clean_up: _tear_down_managed_boot(task) task.node.last_error = error if raise_exc: task.node.save() raise exception.HardwareInspectionFailure(error=error) else: task.process_event('fail')
def test_inspect_hardware_inspect_exception(self, _inspect_hardware_mock, port_mock): side_effect = exception.HardwareInspectionFailure("fake exception") _inspect_hardware_mock.side_effect = side_effect with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: self.assertRaises(exception.HardwareInspectionFailure, task.driver.inspect.inspect_hardware, task) self.assertFalse(port_mock.called)
def _validate(node, data): """Validate the received value against the supported keys in ironic. :param node: node object. :param data: the dictionary received by querying server. :raises: HardwareInspectionFailure """ if data.get('properties'): if isinstance(data['properties'], dict): valid_keys = IloInspect.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(data['properties']) if missing_keys: error = (_("Server didn't return the key(s): %(key)s") % { 'key': ', '.join(missing_keys) }) raise exception.HardwareInspectionFailure(error=error) else: error = (_("Essential properties are expected to be in dictionary " "format, received %(properties)s from node " "%(node)s.") % { "properties": data['properties'], 'node': node.uuid }) raise exception.HardwareInspectionFailure(error=error) else: error = (_("The node %s didn't return 'properties' as the key with " "inspection.") % node.uuid) raise exception.HardwareInspectionFailure(error=error) if data.get('macs'): if not isinstance(data['macs'], dict): error = (_("Node %(node)s didn't return MACs %(macs)s " "in dictionary format.") % { "macs": data['macs'], 'node': node.uuid }) raise exception.HardwareInspectionFailure(error=error) else: error = (_("The node %s didn't return 'macs' as the key with " "inspection.") % node.uuid) raise exception.HardwareInspectionFailure(error=error)
def _get_essential_properties(node, ilo_object): """Inspects the node and get essential scheduling properties :param node: node object. :param ilo_object: an instance of proliantutils.ilo.IloClient :raises: HardwareInspectionFailure if any of the properties values are missing. :returns: The dictionary containing properties and MAC data. The dictionary possible keys are 'properties' and 'macs'. The 'properties' should contain keys as in IloInspect.ESSENTIAL_PROPERTIES. The 'macs' is a dictionary containing key:value pairs of <port_numbers:mac_addresses> """ try: # Retrieve the mandatory properties from hardware result = ilo_object.get_essential_properties() except ilo_error.IloError as e: raise exception.HardwareInspectionFailure(error=e) _validate(node, result) return result
def inspect_hardware(self, task): """Inspect hardware. Inspect hardware to obtain the essential & additional hardware properties. :param task: a TaskManager instance containing the node to act on. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: states.MANAGEABLE """ node = task.node client = drac_common.get_drac_client(node) properties = {} try: properties['memory_mb'] = sum( [memory.size_mb for memory in client.list_memory()]) cpus = client.list_cpus() properties['cpus'] = len(cpus) properties['cpu_arch'] = 'x86_64' if cpus[0].arch64 else 'x86' virtual_disks = client.list_virtual_disks() root_disk = self._guess_root_disk(virtual_disks) if root_disk: properties['local_gb'] = int(root_disk.size_mb / units.Ki) else: physical_disks = client.list_physical_disks() root_disk = self._guess_root_disk(physical_disks) if root_disk: properties['local_gb'] = int(root_disk.size_mb / units.Ki) except drac_exceptions.BaseClientException as exc: LOG.error( _LE('DRAC driver failed to introspect node ' '%(node_uuid)s. Reason: %(error)s.'), { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) valid_keys = self.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(properties) if missing_keys: error = (_('Failed to discover the following properties: ' '%(missing_keys)s') % { 'missing_keys': ', '.join(missing_keys) }) raise exception.HardwareInspectionFailure(error=error) node.properties = dict(node.properties, **properties) node.save() try: nics = client.list_nics() except drac_exceptions.BaseClientException as exc: LOG.error( _LE('DRAC driver failed to introspect node ' '%(node_uuid)s. Reason: %(error)s.'), { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) for nic in nics: try: port = objects.Port(task.context, address=nic.mac, node_id=node.id) port.create() LOG.info( _LI('Port created with MAC address %(mac)s ' 'for node %(node_uuid)s during inspection'), { 'mac': nic.mac, 'node_uuid': node.uuid }) except exception.MACAlreadyExists: LOG.warning( _LW('Failed to create a port with MAC address ' '%(mac)s when inspecting the node ' '%(node_uuid)s because the address is already ' 'registered'), { 'mac': nic.mac, 'node_uuid': node.uuid }) LOG.info(_LI('Node %s successfully inspected.'), node.uuid) return states.MANAGEABLE
def inspect_hardware(self, task): """Inspect hardware to get the hardware properties. Inspects hardware to get the essential properties. It fails if any of the essential properties are not received from the node. :param task: a TaskManager instance. :raises: HardwareInspectionFailure if essential properties could not be retrieved successfully. :returns: The resulting state of inspection. """ system = redfish_utils.get_system(task.node) # get the essential properties and update the node properties # with it. inspected_properties = task.node.properties if system.memory_summary and system.memory_summary.size_gib: inspected_properties['memory_mb'] = str( system.memory_summary.size_gib * units.Ki) if system.processors and system.processors.summary: cpus, arch = system.processors.summary if cpus: inspected_properties['cpus'] = cpus if arch: try: inspected_properties['cpu_arch'] = CPU_ARCH_MAP[arch] except KeyError: LOG.warning( "Unknown CPU arch %(arch)s discovered " "for node %(node)s", { 'node': task.node.uuid, 'arch': arch }) # TODO(etingof): should we respect root device hints here? local_gb = self._detect_local_gb(task, system) if local_gb: inspected_properties['local_gb'] = str(local_gb) else: LOG.warning( "Could not provide a valid storage size configured " "for node %(node)s. Assuming this is a disk-less node", {'node': task.node.uuid}) inspected_properties['local_gb'] = '0' if system.boot.mode: if not drivers_utils.get_node_capability(task.node, 'boot_mode'): capabilities = utils.get_updated_capabilities( inspected_properties.get('capabilities', ''), {'boot_mode': BOOT_MODE_MAP[system.boot.mode]}) inspected_properties['capabilities'] = capabilities valid_keys = self.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(inspected_properties) if missing_keys: error = (_('Failed to discover the following properties: ' '%(missing_keys)s on node %(node)s'), { 'missing_keys': ', '.join(missing_keys), 'node': task.node.uuid }) raise exception.HardwareInspectionFailure(error=error) task.node.properties = inspected_properties task.node.save() LOG.debug( "Node properties for %(node)s are updated as " "%(properties)s", { 'properties': inspected_properties, 'node': task.node.uuid }) self._create_ports(task, system) return states.MANAGEABLE
def inspect_hardware(self, task): """Inspect hardware to get the hardware properties. Inspects hardware to get the essential properties. It fails if any of the essential properties are not received from the node. :param task: a TaskManager instance. :raises: HardwareInspectionFailure if essential properties could not be retrieved successfully. :returns: The resulting state of inspection. """ system = redfish_utils.get_system(task.node) # get the essential properties and update the node properties # with it. inspected_properties = task.node.properties if system.memory_summary and system.memory_summary.size_gib: inspected_properties['memory_mb'] = str( system.memory_summary.size_gib * units.Ki) if system.processors and system.processors.summary: cpus, arch = system.processors.summary if cpus: inspected_properties['cpus'] = cpus if arch: try: inspected_properties['cpu_arch'] = CPU_ARCH_MAP[arch] except KeyError: LOG.warning( "Unknown CPU arch %(arch)s discovered " "for node %(node)s", { 'node': task.node.uuid, 'arch': arch }) simple_storage_size = 0 try: LOG.debug( "Attempting to discover system simple storage size for " "node %(node)s", {'node': task.node.uuid}) if (system.simple_storage and system.simple_storage.disks_sizes_bytes): simple_storage_size = [ size for size in system.simple_storage.disks_sizes_bytes if size >= 4 * units.Gi ] or [0] simple_storage_size = simple_storage_size[0] except sushy.exceptions.SushyError as ex: LOG.debug( "No simple storage information discovered " "for node %(node)s: %(err)s", { 'node': task.node.uuid, 'err': ex }) storage_size = 0 try: LOG.debug( "Attempting to discover system storage volume size for " "node %(node)s", {'node': task.node.uuid}) if system.storage and system.storage.volumes_sizes_bytes: storage_size = [ size for size in system.storage.volumes_sizes_bytes if size >= 4 * units.Gi ] or [0] storage_size = storage_size[0] except sushy.exceptions.SushyError as ex: LOG.debug( "No storage volume information discovered " "for node %(node)s: %(err)s", { 'node': task.node.uuid, 'err': ex }) try: if not storage_size: LOG.debug( "Attempting to discover system storage drive size " "for node %(node)s", {'node': task.node.uuid}) if system.storage and system.storage.drives_sizes_bytes: storage_size = [ size for size in system.storage.drives_sizes_bytes if size >= 4 * units.Gi ] or [0] storage_size = storage_size[0] except sushy.exceptions.SushyError as ex: LOG.debug( "No storage drive information discovered " "for node %(node)s: %(err)s", { 'node': task.node.uuid, 'err': ex }) # NOTE(etingof): pick the smallest disk larger than 4G among available if simple_storage_size and storage_size: local_gb = min(simple_storage_size, storage_size) else: local_gb = max(simple_storage_size, storage_size) # Note(deray): Convert the received size to GiB and reduce the # value by 1 GB as consumers like Ironic requires the ``local_gb`` # to be returned 1 less than actual size. local_gb = max(0, int(local_gb / units.Gi - 1)) # TODO(etingof): should we respect root device hints here? if local_gb: inspected_properties['local_gb'] = str(local_gb) else: LOG.warning( "Could not provide a valid storage size configured " "for node %(node)s. Assuming this is a disk-less node", {'node': task.node.uuid}) inspected_properties['local_gb'] = '0' if system.boot.mode: if not drivers_utils.get_node_capability(task.node, 'boot_mode'): capabilities = utils.get_updated_capabilities( inspected_properties.get('capabilities', ''), {'boot_mode': BOOT_MODE_MAP[system.boot.mode]}) inspected_properties['capabilities'] = capabilities valid_keys = self.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(inspected_properties) if missing_keys: error = (_('Failed to discover the following properties: ' '%(missing_keys)s on node %(node)s'), { 'missing_keys': ', '.join(missing_keys), 'node': task.node.uuid }) raise exception.HardwareInspectionFailure(error=error) task.node.properties = inspected_properties task.node.save() LOG.debug( "Node properties for %(node)s are updated as " "%(properties)s", { 'properties': inspected_properties, 'node': task.node.uuid }) if (system.ethernet_interfaces and system.ethernet_interfaces.summary): macs = system.ethernet_interfaces.summary # Create ports for the discovered NICs being in 'enabled' state enabled_macs = { nic_mac: nic_state for nic_mac, nic_state in macs.items() if nic_state == sushy.STATE_ENABLED } if enabled_macs: inspect_utils.create_ports_if_not_exist( task, enabled_macs, get_mac_address=lambda x: x[0]) else: LOG.warning( "Not attempting to create any port as no NICs " "were discovered in 'enabled' state for node " "%(node)s: %(mac_data)s", { 'mac_data': macs, 'node': task.node.uuid }) else: LOG.warning("No NIC information discovered " "for node %(node)s", {'node': task.node.uuid}) return states.MANAGEABLE
def inspect_hardware(self, task): """Inspect hardware. Inspect hardware to obtain the essential & additional hardware properties. :param task: a TaskManager instance containing the node to act on. :raises: HardwareInspectionFailure, if unable to get essential hardware properties. :returns: states.MANAGEABLE """ node = task.node client = drac_common.get_drac_client(node) properties = {} try: properties['memory_mb'] = sum( [memory.size_mb for memory in client.list_memory()]) cpus = client.list_cpus() if cpus: properties['cpus'] = sum( [self._calculate_cpus(cpu) for cpu in cpus]) properties['cpu_arch'] = 'x86_64' if cpus[0].arch64 else 'x86' bios_settings = client.list_bios_settings() video_controllers = client.list_video_controllers() current_capabilities = node.properties.get('capabilities', '') new_capabilities = { 'boot_mode': bios_settings["BootMode"].current_value.lower(), 'pci_gpu_devices': self._calculate_gpus(video_controllers) } capabilties = utils.get_updated_capabilities( current_capabilities, new_capabilities) properties['capabilities'] = capabilties virtual_disks = client.list_virtual_disks() root_disk = self._guess_root_disk(virtual_disks) if root_disk: properties['local_gb'] = int(root_disk.size_mb / units.Ki) else: physical_disks = client.list_physical_disks() root_disk = self._guess_root_disk(physical_disks) if root_disk: properties['local_gb'] = int(root_disk.size_mb / units.Ki) except drac_exceptions.BaseClientException as exc: LOG.error( 'DRAC driver failed to introspect node ' '%(node_uuid)s. Reason: %(error)s.', { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) valid_keys = self.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(properties) if missing_keys: error = (_('Failed to discover the following properties: ' '%(missing_keys)s') % { 'missing_keys': ', '.join(missing_keys) }) raise exception.HardwareInspectionFailure(error=error) node.properties = dict(node.properties, **properties) node.save() try: nics = client.list_nics() except drac_exceptions.BaseClientException as exc: LOG.error( 'DRAC driver failed to introspect node ' '%(node_uuid)s. Reason: %(error)s.', { 'node_uuid': node.uuid, 'error': exc }) raise exception.HardwareInspectionFailure(error=exc) pxe_dev_nics = self._get_pxe_dev_nics(client, nics, node) if pxe_dev_nics is None: LOG.warning( 'No PXE enabled NIC was found for node ' '%(node_uuid)s.', {'node_uuid': node.uuid}) for nic in nics: try: port = objects.Port(task.context, address=nic.mac, node_id=node.id, pxe_enabled=(nic.id in pxe_dev_nics)) port.create() LOG.info( 'Port created with MAC address %(mac)s ' 'for node %(node_uuid)s during inspection', { 'mac': nic.mac, 'node_uuid': node.uuid }) except exception.MACAlreadyExists: LOG.warning( 'Failed to create a port with MAC address ' '%(mac)s when inspecting the node ' '%(node_uuid)s because the address is already ' 'registered', { 'mac': nic.mac, 'node_uuid': node.uuid }) LOG.info('Node %s successfully inspected.', node.uuid) return states.MANAGEABLE
def inspect_hardware(self, task): """Inspect hardware to get the hardware properties. Inspects hardware to get the essential properties. It fails if any of the essential properties are not received from the node. :param task: a TaskManager instance. :raises: HardwareInspectionFailure if essential properties could not be retrieved successfully. :returns: The resulting state of inspection. """ system = redfish_utils.get_system(task.node) # get the essential properties and update the node properties # with it. inspected_properties = task.node.properties if system.memory_summary and system.memory_summary.size_gib: inspected_properties['memory_mb'] = str( system.memory_summary.size_gib * units.Ki) if system.processors and system.processors.summary: cpus, arch = system.processors.summary if cpus: inspected_properties['cpus'] = cpus if arch: try: inspected_properties['cpu_arch'] = CPU_ARCH_MAP[arch] except KeyError: LOG.warning( _("Unknown CPU arch %(arch)s discovered " "for Node %(node)s"), { 'node': task.node.uuid, 'arch': arch }) simple_storage_size = 0 try: if (system.simple_storage and system.simple_storage.disks_sizes_bytes): simple_storage_size = [ size for size in system.simple_storage.disks_sizes_bytes if size >= 4 * units.Gi ] or [0] simple_storage_size = simple_storage_size[0] except sushy.SushyError: LOG.info( _("No simple storage information discovered " "for Node %(node)s"), {'node': task.node.uuid}) storage_size = 0 try: if system.storage and system.storage.volumes_sizes_bytes: storage_size = [ size for size in system.storage.volumes_sizes_bytes if size >= 4 * units.Gi ] or [0] storage_size = storage_size[0] except sushy.SushyError: LOG.info( _("No storage volume information discovered " "for Node %(node)s"), {'node': task.node.uuid}) local_gb = max(simple_storage_size, storage_size) # Note(deray): Convert the received size to GiB and reduce the # value by 1 GB as consumers like Ironic requires the ``local_gb`` # to be returned 1 less than actual size. local_gb = max(0, int(local_gb / units.Gi - 1)) if local_gb: inspected_properties['local_gb'] = str(local_gb) else: LOG.warning( _("Could not provide a valid storage size configured " "for Node %(node)s"), {'node': task.node.uuid}) valid_keys = self.ESSENTIAL_PROPERTIES missing_keys = valid_keys - set(inspected_properties) if missing_keys: error = (_('Failed to discover the following properties: ' '%(missing_keys)s on node %(node)s'), { 'missing_keys': ', '.join(missing_keys), 'node': task.node.uuid }) raise exception.HardwareInspectionFailure(error=error) task.node.properties = inspected_properties task.node.save() LOG.debug( _("Node properties for %(node)s are updated as " "%(properties)s"), { 'properties': inspected_properties, 'node': task.node.uuid }) if (system.ethernet_interfaces and system.ethernet_interfaces.eth_summary): macs = system.ethernet_interfaces.eth_summary # Create ports for the nics detected. deploy_utils.create_ports_if_not_exist(task, macs) else: LOG.info(_("No NIC information discovered " "for Node %(node)s"), {'node': task.node.uuid}) LOG.info(_("Node %(node)s inspected."), {'node': task.node.uuid}) return states.MANAGEABLE