Exemple #1
0
def _inspect_hardware(node):
    """Inspect the node and get hardware information.

    :param node: node object.
    :raises: HardwareInspectionFailure, if unable to get essential
             hardware properties.
    :returns: a pair of dictionary and list, the dictionary contains
              keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected
              values, the list contains mac addresses.
    """
    try:
        report = irmc_common.get_irmc_report(node)
        props = scci.get_essential_properties(report,
                                              IRMCInspect.ESSENTIAL_PROPERTIES)
        macs = _get_mac_addresses(node)
    except (scci.SCCIInvalidInputError, scci.SCCIClientError,
            exception.SNMPFailure) as e:
        error = (_("Inspection failed for node %(node_id)s "
                   "with the following error: %(error)s") % {
                       'node_id': node.uuid,
                       'error': e
                   })
        raise exception.HardwareInspectionFailure(error=error)

    return (props, macs)
Exemple #2
0
def _inspect_hardware(node, **kwargs):
    """Inspect the node and get hardware information.

    :param node: node object.
    :param kwargs: the dictionary of additional parameters.
    :raises: HardwareInspectionFailure, if unable to get essential
             hardware properties.
    :returns: a pair of dictionary and list, the dictionary contains
              keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected
              values, the list contains mac addresses.
    """
    capabilities_props = set(CAPABILITIES_PROPERTIES)

    # Remove all capabilities item which will be inspected in the existing
    # capabilities of node
    if 'capabilities' in node.properties:
        existing_cap = node.properties['capabilities'].split(',')
        for item in capabilities_props:
            for prop in existing_cap:
                if item == prop.split(':')[0]:
                    existing_cap.remove(prop)
        node.properties['capabilities'] = ",".join(existing_cap)

    # get gpu_ids in ironic configuration
    values = [gpu_id.lower() for gpu_id in CONF.irmc.gpu_ids]

    # if gpu_ids = [], pci_gpu_devices will not be inspected
    if len(values) == 0:
        capabilities_props.remove('pci_gpu_devices')

    try:
        report = irmc_common.get_irmc_report(node)
        props = scci.get_essential_properties(report,
                                              IRMCInspect.ESSENTIAL_PROPERTIES)
        d_info = irmc_common.parse_driver_info(node)
        capabilities = scci.get_capabilities_properties(
            d_info, capabilities_props, values, **kwargs)
        if capabilities:
            if capabilities.get('pci_gpu_devices') == 0:
                capabilities.pop('pci_gpu_devices')
            if capabilities.get('trusted_boot') is False:
                capabilities.pop('trusted_boot')
            capabilities = utils.get_updated_capabilities(
                node.properties.get('capabilities'), capabilities)
            if capabilities:
                props['capabilities'] = capabilities
        macs = _get_mac_addresses(node)
    except (scci.SCCIInvalidInputError, scci.SCCIClientError,
            exception.SNMPFailure) as e:
        error = (_("Inspection failed for node %(node_id)s "
                   "with the following error: %(error)s") % {
                       'node_id': node.uuid,
                       'error': e
                   })
        raise exception.HardwareInspectionFailure(error=error)

    return (props, macs)
Exemple #3
0
def _get_sensors_data(task):
    """Get sensors data method.

    It gets sensor data from the task's node via SCCI, and convert the data
    from XML to the dict format.

    :param task: A TaskManager instance.
    :raises: FailedToGetSensorData when getting the sensor data fails.
    :returns: Returns a consistent formatted dict of sensor data grouped
              by sensor type, which can be processed by Ceilometer.
    """

    try:
        report = irmc_common.get_irmc_report(task.node)
        sensor = scci.get_sensor_data(report)

    except (exception.InvalidParameterValue,
            exception.MissingParameterValue,
            scci.SCCIInvalidInputError,
            scci.SCCIClientError) as e:
        LOG.error(_LE("SCCI get sensor data failed for node %(node_id)s "
                  "with the following error: %(error)s"),
                  {'node_id': task.node.uuid, 'error': e})
        raise exception.FailedToGetSensorData(
            node=task.node.uuid, error=e)

    sensors_data = {}
    for sdr in sensor:
        sensor_type_name = sdr.find('./Data/Decoded/Sensor/TypeName')
        sensor_type_number = sdr.find('./Data/Decoded/Sensor/Type')
        entity_name = sdr.find('./Data/Decoded/Entity/Name')
        entity_id = sdr.find('./Data/Decoded/Entity/ID')

        if None in (sensor_type_name, sensor_type_number,
                    entity_name, entity_id):
            continue

        sensor_type = ('%s (%s)' %
                       (sensor_type_name.text, sensor_type_number.text))
        sensor_id = ('%s (%s)' %
                     (entity_name.text, entity_id.text))
        reading_value = sdr.find(
            './Data/Decoded/Sensor/Thresholds/*/Normalized')
        reading_value_text = "None" if (
            reading_value is None) else str(reading_value.text)
        reading_units = sdr.find('./Data/Decoded/Sensor/BaseUnitName')
        reading_units_text = "None" if (
            reading_units is None) else str(reading_units.text)
        sensor_reading = '%s %s' % (reading_value_text, reading_units_text)

        sensors_data.setdefault(sensor_type, {})[sensor_id] = {
            'Sensor Reading': sensor_reading,
            'Sensor ID': sensor_id,
            'Units': reading_units_text,
        }

    return sensors_data
Exemple #4
0
 def test_get_irmc_report(self, mock_scci):
     self.info['irmc_port'] = 80
     self.info['irmc_auth_method'] = 'digest'
     self.info['irmc_client_timeout'] = 60
     mock_scci.get_report.return_value = 'get_report'
     returned_mock_scci_get_report = irmc_common.get_irmc_report(self.node)
     mock_scci.get_report.assert_called_with(
         self.info['irmc_address'],
         self.info['irmc_username'],
         self.info['irmc_password'],
         port=self.info['irmc_port'],
         auth_method=self.info['irmc_auth_method'],
         client_timeout=self.info['irmc_client_timeout'])
     self.assertEqual('get_report', returned_mock_scci_get_report)
 def test_get_irmc_report(self, mock_scci):
     self.info['irmc_port'] = 80
     self.info['irmc_auth_method'] = 'digest'
     self.info['irmc_client_timeout'] = 60
     mock_scci.get_report.return_value = 'get_report'
     returned_mock_scci_get_report = irmc_common.get_irmc_report(self.node)
     mock_scci.get_report.assert_called_with(
         self.info['irmc_address'],
         self.info['irmc_username'],
         self.info['irmc_password'],
         port=self.info['irmc_port'],
         auth_method=self.info['irmc_auth_method'],
         client_timeout=self.info['irmc_client_timeout'])
     self.assertEqual('get_report', returned_mock_scci_get_report)
Exemple #6
0
 def test_get_irmc_report(self, mock_scci):
     self.info["irmc_port"] = 80
     self.info["irmc_auth_method"] = "digest"
     self.info["irmc_client_timeout"] = 60
     mock_scci.get_report.return_value = "get_report"
     returned_mock_scci_get_report = irmc_common.get_irmc_report(self.node)
     mock_scci.get_report.assert_called_with(
         self.info["irmc_address"],
         self.info["irmc_username"],
         self.info["irmc_password"],
         port=self.info["irmc_port"],
         auth_method=self.info["irmc_auth_method"],
         client_timeout=self.info["irmc_client_timeout"],
     )
     self.assertEqual("get_report", returned_mock_scci_get_report)
Exemple #7
0
def _get_sensors_data(task):
    """Get sensors data method.

    It gets sensor data from the task's node via SCCI, and convert the data
    from XML to the dict format.

    :param task: A TaskManager instance.
    :raises: FailedToGetSensorData when getting the sensor data fails.
    :returns: Returns a consistent formatted dict of sensor data grouped
              by sensor type, which can be processed by Ceilometer.
    """

    try:
        report = irmc_common.get_irmc_report(task.node)
        sensor = scci.get_sensor_data(report)

    except Exception as e:
        LOG.error(
            _LE("SCCI get sensor data failed for node %(node_id)s " "with the following error: %(error)s"),
            {"node_id": task.node.uuid, "error": e},
        )
        raise exception.FailedToGetSensorData(node=task.node.uuid, error=e)

    sensors_data = {}
    for sdr in sensor:
        sensor_type_name = sdr.find("./Data/Decoded/Sensor/TypeName")
        sensor_type_number = sdr.find("./Data/Decoded/Sensor/Type")
        entity_name = sdr.find("./Data/Decoded/Entity/Name")
        entity_id = sdr.find("./Data/Decoded/Entity/ID")

        if None in (sensor_type_name, sensor_type_number, entity_name, entity_id):
            continue

        sensor_type = "%s (%s)" % (sensor_type_name.text, sensor_type_number.text)
        sensor_id = "%s (%s)" % (entity_name.text, entity_id.text)
        reading_value = sdr.find("./Data/Decoded/Sensor/Thresholds/*/Normalized")
        reading_value_text = "None" if (reading_value is None) else str(reading_value.text)
        reading_units = sdr.find("./Data/Decoded/Sensor/BaseUnitName")
        reading_units_text = "None" if (reading_units is None) else str(reading_units.text)
        sensor_reading = "%s %s" % (reading_value_text, reading_units_text)

        sensors_data.setdefault(sensor_type, {})[sensor_id] = {
            "Sensor Reading": sensor_reading,
            "Sensor ID": sensor_id,
            "Units": reading_units_text,
        }

    return sensors_data
Exemple #8
0
def _inspect_hardware(node):
    """Inspect the node and get hardware information.

    :param node: node object.
    :raises: HardwareInspectionFailure, if unable to get essential
             hardware properties.
    :returns: a pair of dictionary and list, the dictionary contains
              keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected
              values, the list contains mac addresses.
    """
    try:
        report = irmc_common.get_irmc_report(node)
        props = scci.get_essential_properties(
            report, IRMCInspect.ESSENTIAL_PROPERTIES)
        macs = _get_mac_addresses(node)
    except (scci.SCCIInvalidInputError,
            scci.SCCIClientError,
            exception.SNMPFailure) as e:
        error = (_("Inspection failed for node %(node_id)s "
                   "with the following error: %(error)s") %
                 {'node_id': node.uuid, 'error': e})
        raise exception.HardwareInspectionFailure(error=error)

    return (props, macs)
Exemple #9
0
def _inspect_hardware(node, existing_traits=None, **kwargs):
    """Inspect the node and get hardware information.

    :param node: node object.
    :param existing_traits: existing traits list.
    :param kwargs: the dictionary of additional parameters.
    :raises: HardwareInspectionFailure, if unable to get essential
             hardware properties.
    :returns: a pair of dictionary and list, the dictionary contains
              keys as in IRMCInspect.ESSENTIAL_PROPERTIES and its inspected
              values, the list contains mac addresses.
    """
    capabilities_props = set(CAPABILITIES_PROPERTIES)
    new_traits = list(existing_traits) if existing_traits else []

    # Remove all capabilities item which will be inspected in the existing
    # capabilities of node
    if 'capabilities' in node.properties:
        existing_cap = node.properties['capabilities'].split(',')
        for item in capabilities_props:
            for prop in existing_cap:
                if item == prop.split(':')[0]:
                    existing_cap.remove(prop)
        node.properties['capabilities'] = ",".join(existing_cap)

    # get gpu_ids, fpga_ids in ironic configuration
    gpu_ids = [gpu_id.lower() for gpu_id in CONF.irmc.gpu_ids]
    fpga_ids = [fpga_id.lower() for fpga_id in CONF.irmc.fpga_ids]

    # if gpu_ids = [], pci_gpu_devices will not be inspected
    if len(gpu_ids) == 0:
        capabilities_props.remove('pci_gpu_devices')

    # if fpga_ids = [], cpu_fpga will not be inspected
    if len(fpga_ids) == 0:
        capabilities_props.remove('cpu_fpga')

    try:
        report = irmc_common.get_irmc_report(node)
        props = scci.get_essential_properties(
            report, IRMCInspect.ESSENTIAL_PROPERTIES)
        d_info = irmc_common.parse_driver_info(node)
        capabilities = scci.get_capabilities_properties(
            d_info,
            capabilities_props,
            gpu_ids,
            fpga_ids=fpga_ids,
            **kwargs)
        if capabilities:
            if capabilities.get('pci_gpu_devices') == 0:
                capabilities.pop('pci_gpu_devices')

            cpu_fpga = capabilities.pop('cpu_fpga', 0)
            if cpu_fpga == 0 and 'CUSTOM_CPU_FPGA' in new_traits:
                new_traits.remove('CUSTOM_CPU_FPGA')
            elif cpu_fpga != 0 and 'CUSTOM_CPU_FPGA' not in new_traits:
                new_traits.append('CUSTOM_CPU_FPGA')

            if capabilities.get('trusted_boot') is False:
                capabilities.pop('trusted_boot')
            capabilities = utils.get_updated_capabilities(
                node.properties.get('capabilities'), capabilities)
            if capabilities:
                props['capabilities'] = capabilities

        macs = _get_mac_addresses(node)
    except (scci.SCCIInvalidInputError,
            scci.SCCIClientError,
            exception.SNMPFailure) as e:
        error = (_("Inspection failed for node %(node_id)s "
                   "with the following error: %(error)s") %
                 {'node_id': node.uuid, 'error': e})
        raise exception.HardwareInspectionFailure(error=error)

    return props, macs, new_traits
Exemple #10
0
    def _query_raid_config_fgi_status(self, manager, context):
        """Periodic tasks to check the progress of running RAID config."""

        filters = {
            'reserved': False,
            'provision_state': states.CLEANWAIT,
            'maintenance': False
        }
        fields = ['raid_config']
        node_list = manager.iter_nodes(fields=fields, filters=filters)
        for (node_uuid, driver, conductor_group, raid_config) in node_list:
            try:
                lock_purpose = 'checking async RAID configuration tasks'
                with task_manager.acquire(context,
                                          node_uuid,
                                          purpose=lock_purpose,
                                          shared=True) as task:
                    node = task.node
                    node_uuid = task.node.uuid
                    if not isinstance(task.driver.raid, IRMCRAID):
                        continue
                    if task.node.target_raid_config is None:
                        continue
                    if not raid_config or raid_config.get('fgi_status'):
                        continue
                    task.upgrade_lock()
                    if node.provision_state != states.CLEANWAIT:
                        continue
                    # Avoid hitting clean_callback_timeout expiration
                    node.touch_provisioning()

                    try:
                        report = irmc_common.get_irmc_report(node)
                    except client.scci.SCCIInvalidInputError:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, RAID_FAILED)
                        continue
                    except client.scci.SCCIClientError:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, RAID_FAILED)
                        continue

                    fgi_status_dict = _get_fgi_status(report, node_uuid)
                    # Note(trungnv): Allow to check until RAID mechanism to be
                    # completed with RAID information in report.
                    if fgi_status_dict == 'completing':
                        continue
                    if not fgi_status_dict:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, fgi_status_dict)
                        continue
                    if all(fgi_status == 'Idle'
                           for fgi_status in fgi_status_dict.values()):
                        raid_config.update({'fgi_status': RAID_COMPLETED})
                        LOG.info(
                            'RAID configuration has completed on '
                            'node %(node)s with fgi_status is %(fgi)s', {
                                'node': node_uuid,
                                'fgi': RAID_COMPLETED
                            })
                        self._resume_cleaning(task)

            except exception.NodeNotFound:
                LOG.info(
                    'During query_raid_config_job_status, node '
                    '%(node)s was not found raid_config and presumed '
                    'deleted by another process.', {'node': node_uuid})
            except exception.NodeLocked:
                LOG.info(
                    'During query_raid_config_job_status, node '
                    '%(node)s was already locked by another process. '
                    'Skip.', {'node': node_uuid})
Exemple #11
0
    def _query_raid_config_fgi_status(self, manager, context):
        """Periodic tasks to check the progress of running RAID config."""

        filters = {'reserved': False, 'provision_state': states.CLEANWAIT,
                   'maintenance': False}
        fields = ['raid_config']
        node_list = manager.iter_nodes(fields=fields, filters=filters)
        for (node_uuid, driver, conductor_group, raid_config) in node_list:
            try:
                lock_purpose = 'checking async RAID configuration tasks'
                with task_manager.acquire(context, node_uuid,
                                          purpose=lock_purpose,
                                          shared=True) as task:
                    node = task.node
                    node_uuid = task.node.uuid
                    if not isinstance(task.driver.raid, IRMCRAID):
                        continue
                    if task.node.target_raid_config is None:
                        continue
                    if not raid_config or raid_config.get('fgi_status'):
                        continue
                    task.upgrade_lock()
                    if node.provision_state != states.CLEANWAIT:
                        continue
                    # Avoid hitting clean_callback_timeout expiration
                    node.touch_provisioning()

                    try:
                        report = irmc_common.get_irmc_report(node)
                    except client.scci.SCCIInvalidInputError:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, RAID_FAILED)
                        continue
                    except client.scci.SCCIClientError:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, RAID_FAILED)
                        continue

                    fgi_status_dict = _get_fgi_status(report, node_uuid)
                    # Note(trungnv): Allow to check until RAID mechanism to be
                    # completed with RAID information in report.
                    if fgi_status_dict == 'completing':
                        continue
                    if not fgi_status_dict:
                        raid_config.update({'fgi_status': RAID_FAILED})
                        raid_common.update_raid_info(node, raid_config)
                        self._set_clean_failed(task, fgi_status_dict)
                        continue
                    if all(fgi_status == 'Idle' for fgi_status in
                           fgi_status_dict.values()):
                        raid_config.update({'fgi_status': RAID_COMPLETED})
                        LOG.info('RAID configuration has completed on '
                                 'node %(node)s with fgi_status is %(fgi)s',
                                 {'node': node_uuid, 'fgi': RAID_COMPLETED})
                        self._resume_cleaning(task)

            except exception.NodeNotFound:
                LOG.info('During query_raid_config_job_status, node '
                         '%(node)s was not found raid_config and presumed '
                         'deleted by another process.', {'node': node_uuid})
            except exception.NodeLocked:
                LOG.info('During query_raid_config_job_status, node '
                         '%(node)s was already locked by another process. '
                         'Skip.', {'node': node_uuid})