Example #1
0
 def del_host(self, deregister=True):
     # Conductor deregistration fails if called on non-initialized
     # conductor (e.g. when rpc server is unreachable).
     if not hasattr(self, 'conductor'):
         return
     self._keepalive_evt.set()
     if deregister:
         try:
             # Inform the cluster that this conductor is shutting down.
             # Note that rebalancing will not occur immediately, but when
             # the periodic sync takes place.
             self.dbapi.unregister_conductor(self.host)
             LOG.info(_LI('Successfully stopped conductor with hostname '
                          '%(hostname)s.'),
                      {'hostname': self.host})
         except exception.ConductorNotFound:
             pass
     else:
         LOG.info(_LI('Not deregistering conductor with hostname '
                      '%(hostname)s.'),
                  {'hostname': self.host})
     # Waiting here to give workers the chance to finish. This has the
     # benefit of releasing locks workers placed on nodes, as well as
     # having work complete normally.
     self._periodic_tasks.stop()
     self._periodic_tasks.wait()
     self._executor.shutdown(wait=True)
     self._started = False
Example #2
0
    def inspect_hardware(self, task):
        """Inspect hardware.

        Inspect hardware to obtain the essential hardware properties and
        mac addresses.

        :param task: a task from TaskManager.
        :raises: HardwareInspectionFailure, if hardware inspection failed.
        :returns: states.MANAGEABLE, if hardware inspection succeeded.
        """
        node = task.node
        (props, macs) = _inspect_hardware(node)
        node.properties = dict(node.properties, **props)
        node.save()

        for mac in macs:
            try:
                new_port = objects.Port(task.context,
                                        address=mac, node_id=node.id)
                new_port.create()
                LOG.info(_LI("Port created for MAC address %(address)s "
                             "for node %(node_uuid)s during inspection"),
                         {'address': mac, 'node_uuid': node.uuid})
            except exception.MACAlreadyExists:
                LOG.warning(_LW("Port already existed for MAC address "
                                "%(address)s for node %(node_uuid)s "
                                "during inspection"),
                            {'address': mac, 'node_uuid': node.uuid})

        LOG.info(_LI("Node %s inspected"), node.uuid)
        return states.MANAGEABLE
Example #3
0
def set_boot_mode(node, boot_mode):
    """Sets the node to boot using boot_mode for the next boot.

    :param node: an ironic node object.
    :param boot_mode: Next boot mode.
    :raises: IloOperationError if setting boot mode failed.
    """
    ilo_object = get_ilo_object(node)

    try:
        p_boot_mode = ilo_object.get_pending_boot_mode()
    except ilo_error.IloCommandNotSupportedError:
        p_boot_mode = DEFAULT_BOOT_MODE

    if BOOT_MODE_ILO_TO_GENERIC[p_boot_mode.lower()] == boot_mode:
        LOG.info(_LI("Node %(uuid)s pending boot mode is %(boot_mode)s."),
                 {'uuid': node.uuid, 'boot_mode': boot_mode})
        return

    try:
        ilo_object.set_pending_boot_mode(
            BOOT_MODE_GENERIC_TO_ILO[boot_mode].upper())
    except ilo_error.IloError as ilo_exception:
        operation = _("Setting %s as boot mode") % boot_mode
        raise exception.IloOperationError(
            operation=operation, error=ilo_exception)

    LOG.info(_LI("Node %(uuid)s boot mode is set to %(boot_mode)s."),
             {'uuid': node.uuid, 'boot_mode': boot_mode})
Example #4
0
def _check_option_support(options):
    """Checks if the specific ipmitool options are supported on host.

    This method updates the module-level variables indicating whether
    an option is supported so that it is accessible by any driver
    interface class in this module. It is intended to be called from
    the __init__ method of such classes only.

    :param options: list of ipmitool options to be checked
    :raises: OSError
    """
    for opt in options:
        if _is_option_supported(opt) is None:
            try:
                cmd = ipmitool_command_options[opt]
                # NOTE(cinerama): use subprocess.check_call to
                # check options & suppress ipmitool output to
                # avoid alarming people
                with open(os.devnull, 'wb') as nullfile:
                    subprocess.check_call(cmd, stdout=nullfile,
                                          stderr=nullfile)
            except subprocess.CalledProcessError:
                LOG.info(_LI("Option %(opt)s is not supported by ipmitool"),
                         {'opt': opt})
                _is_option_supported(opt, False)
            else:
                LOG.info(_LI("Option %(opt)s is supported by ipmitool"),
                         {'opt': opt})
                _is_option_supported(opt, True)
Example #5
0
    def continue_cleaning(self, task, **kwargs):
        """Start the next cleaning step if the previous one is complete.

        In order to avoid errors and make agent upgrades painless, cleaning
        will check the version of all hardware managers during get_clean_steps
        at the beginning of cleaning and before executing each step in the
        agent. If the version has changed between steps, the agent is unable
        to tell if an ordering change will cause a cleaning issue. Therefore,
        we restart cleaning.
        """
        command = self._get_completed_cleaning_command(task)
        LOG.debug(
            "Cleaning command status for node %(node)s on step %(step)s:" " %(command)s",
            {"node": task.node.uuid, "step": task.node.clean_step, "command": command},
        )

        if not command:
            # Command is not done yet
            return

        if command.get("command_status") == "FAILED":
            msg = _("Agent returned error for clean step %(step)s on node " "%(node)s : %(err)s.") % {
                "node": task.node.uuid,
                "err": command.get("command_error"),
                "step": task.node.clean_step,
            }
            LOG.error(msg)
            return manager.cleaning_error_handler(task, msg)
        elif command.get("command_status") == "CLEAN_VERSION_MISMATCH":
            # Restart cleaning, agent must have rebooted to new version
            LOG.info(
                _LI("Node %s detected a clean version mismatch, " "resetting clean steps and rebooting the node."),
                task.node.uuid,
            )
            try:
                manager.set_node_cleaning_steps(task)
            except exception.NodeCleaningFailure:
                msg = _("Could not restart cleaning on node %(node)s: " "%(err)s.") % {
                    "node": task.node.uuid,
                    "err": command.get("command_error"),
                    "step": task.node.clean_step,
                }
                LOG.exception(msg)
                return manager.cleaning_error_handler(task, msg)
            self._notify_conductor_resume_clean(task)

        elif command.get("command_status") == "SUCCEEDED":
            LOG.info(
                _LI("Agent on node %s returned cleaning command success, " "moving to next clean step"), task.node.uuid
            )
            self._notify_conductor_resume_clean(task)
        else:
            msg = _("Agent returned unknown status for clean step %(step)s " "on node %(node)s : %(err)s.") % {
                "node": task.node.uuid,
                "err": command.get("command_status"),
                "step": task.node.clean_step,
            }
            LOG.error(msg)
            return manager.cleaning_error_handler(task, msg)
Example #6
0
    def continue_cleaning(self, task, **kwargs):
        """Start the next cleaning step if the previous one is complete.

        In order to avoid errors and make agent upgrades painless, cleaning
        will check the version of all hardware managers during get_clean_steps
        at the beginning of cleaning and before executing each step in the
        agent. If the version has changed between steps, the agent is unable
        to tell if an ordering change will cause a cleaning issue. Therefore,
        we restart cleaning.
        """
        command = self._get_completed_cleaning_command(task)
        LOG.debug('Cleaning command status for node %(node)s on step %(step)s:'
                  ' %(command)s', {'node': task.node.uuid,
                                   'step': task.node.clean_step,
                                   'command': command})

        if not command:
            # Command is not done yet
            return

        if command.get('command_status') == 'FAILED':
            msg = (_('Agent returned error for clean step %(step)s on node '
                     '%(node)s : %(err)s.') %
                   {'node': task.node.uuid,
                    'err': command.get('command_error'),
                    'step': task.node.clean_step})
            LOG.error(msg)
            return manager.cleaning_error_handler(task, msg)
        elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
            # Restart cleaning, agent must have rebooted to new version
            LOG.info(_LI('Node %s detected a clean version mismatch, '
                         'resetting clean steps and rebooting the node.'),
                     task.node.uuid)
            try:
                manager.set_node_cleaning_steps(task)
            except exception.NodeCleaningFailure:
                msg = (_('Could not restart cleaning on node %(node)s: '
                         '%(err)s.') %
                       {'node': task.node.uuid,
                        'err': command.get('command_error'),
                        'step': task.node.clean_step})
                LOG.exception(msg)
                return manager.cleaning_error_handler(task, msg)
            self._notify_conductor_resume_clean(task)

        elif command.get('command_status') == 'SUCCEEDED':
            LOG.info(_LI('Agent on node %s returned cleaning command success, '
                         'moving to next clean step'), task.node.uuid)
            self._notify_conductor_resume_clean(task)
        else:
            msg = (_('Agent returned unknown status for clean step %(step)s '
                     'on node %(node)s : %(err)s.') %
                   {'node': task.node.uuid,
                    'err': command.get('command_status'),
                    'step': task.node.clean_step})
            LOG.error(msg)
            return manager.cleaning_error_handler(task, msg)
 def v2_runner_on_ok(self, result):
     msg_dict = self.runner_msg_dict(result)
     if msg_dict['name'] == 'setup':
         LOG.info(i18n._LI(
             "Ansible task 'setup' complete on node %(node)s"),
             msg_dict)
     else:
         LOG.info(i18n._LI(
             "Ansible task %(name)s complete on node %(node)s: %(res)s"),
             msg_dict)
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node."""

        node = task.node
        task.process_event('resume')
        err_msg = _('Failed to continue deployment with Fuel Agent.')

        agent_status = kwargs.get('status')
        if agent_status != 'ready':
            LOG.error(_LE('Deploy failed for node %(node)s. Fuel Agent is not '
                      'in ready state, error: %(error)s'), {'node': node.uuid,
                      'error': kwargs.get('error_message')})
            deploy_utils.set_failed_state(task, err_msg)
            return

        params = _parse_driver_info(node)
        params['host'] = kwargs.get('address')
        cmd = ('%s --data_driver ironic  --config-file '
               '/etc/fuel-agent/fuel-agent.conf' % params.pop('script'))
        if CONF.debug:
            cmd += ' --debug'
        instance_info = node.instance_info

        try:
            deploy_data = _get_deploy_data(task.context,
                                           instance_info['image_source'])

            image_data = {"/": {"uri": instance_info['image_url'],
                                "format": "raw",
                                "container": "raw"}}

            deploy_data['ks_meta']['image_data'] = image_data

            ssh = utils.ssh_connect(params)
            sftp = ssh.open_sftp()
            _sftp_upload(sftp, json.dumps(deploy_data), '/tmp/provision.json')

            # swift configdrive store should be disabled
            configdrive = instance_info.get('configdrive')
            if configdrive is not None:
                _sftp_upload(sftp, configdrive, '/tmp/config-drive.img')

            _ssh_execute(ssh, cmd, params)
            LOG.info(_LI('Fuel Agent pass on node %s'), node.uuid)
            manager_utils.node_set_boot_device(task, boot_devices.DISK,
                                               persistent=True)
            manager_utils.node_power_action(task, states.REBOOT)
        except Exception as e:
            msg = (_('Deploy failed for node %(node)s. Error: %(error)s') %
                   {'node': node.uuid, 'error': e})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
        else:
            task.process_event('done')
            LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
Example #9
0
def _set_power_state(node, target_state):
    """Set power state of the AMT Client.

    :param node: a node object.
    :param target_state: desired power state.
    :raises: AMTFailure
    :raises: AMTConnectFailure
    """
    client = amt_common.get_wsman_client(node)

    method = 'RequestPowerStateChange'
    options = pywsman.ClientOptions()
    options.add_selector('Name', 'Intel(r) AMT Power Management Service')

    doc = _generate_power_action_input(AMT_POWER_MAP[target_state])
    try:
        client.wsman_invoke(options, resource_uris.CIM_PowerManagementService,
                            method, doc)
    except (exception.AMTFailure, exception.AMTConnectFailure) as e:
        with excutils.save_and_reraise_exception():
            LOG.exception(_LE("Failed to set power state %(state)s for "
                              "node %(node_id)s with error: %(error)s."),
                          {'state': target_state, 'node_id': node.uuid,
                           'error': e})
    else:
        LOG.info(_LI("Power state set to %(state)s for node %(node_id)s"),
                 {'state': target_state, 'node_id': node.uuid})
def _deploy(task, node_address):
    """Internal function for deployment to a node."""
    notags = ['wait'] if CONF.ansible.use_ramdisk_callback else []
    node = task.node
    LOG.debug('IP of node %(node)s is %(ip)s',
              {'node': node.uuid, 'ip': node_address})
    iwdi = node.driver_internal_info.get('is_whole_disk_image')
    variables = _prepare_variables(task)
    if iwdi:
        notags.append('parted')
    else:
        variables.update(_parse_partitioning_info(task.node))
    playbook, user, key = _parse_ansible_driver_info(task.node)
    node_list = [(node.uuid, node_address, user, node.extra)]
    extra_vars = _prepare_extra_vars(node_list, variables=variables)

    LOG.debug('Starting deploy on node %s', node.uuid)
    # any caller should manage exceptions raised from here
    _run_playbook(playbook, extra_vars, key, notags=notags)
    LOG.info(_LI('Ansible complete deploy on node %s'), node.uuid)

    LOG.debug('Rebooting node %s to instance', node.uuid)
    manager_utils.node_set_boot_device(task, 'disk', persistent=True)
    _reboot_and_finish_deploy(task)

    task.driver.boot.clean_up_ramdisk(task)
Example #11
0
    def _init_extension_manager(cls):
        # NOTE(deva): In case multiple greenthreads queue up on this lock
        #             before _extension_manager is initialized, prevent
        #             creation of multiple NameDispatchExtensionManagers.
        if cls._extension_manager:
            return

        # NOTE(deva): Drivers raise "DriverLoadError" if they are unable to be
        #             loaded, eg. due to missing external dependencies.
        #             We capture that exception, and, only if it is for an
        #             enabled driver, raise it from here. If enabled driver
        #             raises other exception type, it is wrapped in
        #             "DriverLoadError", providing the name of the driver that
        #             caused it, and raised. If the exception is for a
        #             non-enabled driver, we suppress it.
        def _catch_driver_not_found(mgr, ep, exc):
            # NOTE(deva): stevedore loads plugins *before* evaluating
            #             _check_func, so we need to check here, too.
            if ep.name in CONF.enabled_drivers:
                if not isinstance(exc, exception.DriverLoadError):
                    raise exception.DriverLoadError(driver=ep.name, reason=exc)
                raise exc

        def _check_func(ext):
            return ext.name in CONF.enabled_drivers

        cls._extension_manager = dispatch.NameDispatchExtensionManager(
            "ironic.drivers", _check_func, invoke_on_load=True, on_load_failure_callback=_catch_driver_not_found
        )
        LOG.info(_LI("Loaded the following drivers: %s"), cls._extension_manager.names())
Example #12
0
 def _on_periodic_tasks_stop(self, fut):
     try:
         fut.result()
     except Exception as exc:
         LOG.critical(_LC('Periodic tasks worker has failed: %s'), exc)
     else:
         LOG.info(_LI('Successfully shut down periodic tasks'))
Example #13
0
    def add_cleaning_network(self, task):
        """Add the cleaning network to a node.

        :param task: A TaskManager instance.
        :returns: a dictionary in the form {port.uuid: neutron_port['id']}
        :raises: NetworkError, InvalidParameterValue
        """
        if not uuidutils.is_uuid_like(CONF.neutron.cleaning_network_uuid):
            raise exception.InvalidParameterValue(
                _(
                    "You must provide a valid cleaning network UUID in "
                    "[neutron]cleaning_network_uuid configuration option."
                )
            )
        # If we have left over ports from a previous cleaning, remove them
        neutron.rollback_ports(task, CONF.neutron.cleaning_network_uuid)
        LOG.info(_LI("Adding cleaning network to node %s"), task.node.uuid)
        vifs = neutron.add_ports_to_network(task, CONF.neutron.cleaning_network_uuid, is_flat=True)
        for port in task.ports:
            if port.uuid in vifs:
                internal_info = port.internal_info
                internal_info["cleaning_vif_port_id"] = vifs[port.uuid]
                port.internal_info = internal_info
                port.save()
        return vifs
Example #14
0
def _setup_vmedia_for_boot(task, bootable_iso_filename, parameters=None):
    """Sets up the node to boot from the boot ISO image.

    This method attaches a boot_iso on the node and passes
    the required parameters to it via a virtual floppy image.

    :param task: a TaskManager instance containing the node to act on.
    :param bootable_iso_filename: a bootable ISO image to attach to.
        The iso file should be present in NFS/CIFS server.
    :param parameters: the parameters to pass in a virtual floppy image
        in a dictionary.  This is optional.
    :raises: ImageCreationFailed, if it failed while creating a floppy image.
    :raises: IRMCOperationError, if attaching a virtual media failed.
    """
    LOG.info(_LI("Setting up node %s to boot from virtual media"),
             task.node.uuid)

    _detach_virtual_cd(task.node)
    _detach_virtual_fd(task.node)

    if parameters:
        floppy_image_filename = _prepare_floppy_image(task, parameters)
        _attach_virtual_fd(task.node, floppy_image_filename)

    _attach_virtual_cd(task.node, bootable_iso_filename)
Example #15
0
def main():
    # Parse config file and command line options, then start logging
    ironic_service.prepare_service(sys.argv)

    # Enable object backporting via the conductor
    base.IronicObject.indirection_api = base.IronicObjectIndirectionAPI()

    # Build and start the WSGI app
    host = CONF.api.host_ip
    port = CONF.api.port
    wsgi = simple_server.make_server(
        host, port,
        app.VersionSelectorApplication(),
        server_class=ThreadedSimpleServer)

    LOG = log.getLogger(__name__)
    LOG.info(_LI("Serving on http://%(host)s:%(port)s"),
             {'host': host, 'port': port})
    LOG.debug("Configuration:")
    CONF.log_opt_values(LOG, logging.DEBUG)

    try:
        wsgi.serve_forever()
    except KeyboardInterrupt:
        pass
    def execute_clean_step(self, task, step):
        """Execute a clean step.

        :param task: a TaskManager object containing the node
        :param step: a clean step dictionary to execute
        :returns: None
        """
        node = task.node
        playbook, user, key = _parse_ansible_driver_info(
            task.node, action='clean')
        stepname = step['step']
        try:
            ip_addr = node.driver_internal_info['ansible_cleaning_ip']
        except KeyError:
            raise exception.NodeCleaningFailure(node=node.uuid,
                                                reason='undefined node IP '
                                                'addresses')
        node_list = [(node.uuid, ip_addr, user, node.extra)]
        extra_vars = _prepare_extra_vars(node_list)

        LOG.debug('Starting cleaning step %(step)s on node %(node)s',
                  {'node': node.uuid, 'step': stepname})
        step_tags = step['args'].get('tags', [])
        try:
            _run_playbook(playbook, extra_vars, key,
                          tags=step_tags)
        except exception.InstanceDeployFailure as e:
            LOG.error(_LE("Ansible failed cleaning step %(step)s "
                          "on node %(node)s."), {
                              'node': node.uuid, 'step': stepname})
            manager_utils.cleaning_error_handler(task, six.text_type(e))
        else:
            LOG.info(_LI('Ansible completed cleaning step %(step)s '
                         'on node %(node)s.'),
                     {'node': node.uuid, 'step': stepname})
Example #17
0
    def set_power_state(self, task, pstate):
        """Wakes the task's node on power on. Powering off is not supported.

        Wakes the task's node on. Wake-On-Lan does not support powering
        the task's node off so, just log it.

        :param task: a TaskManager instance containing the node to act on.
        :param pstate: The desired power state, one of ironic.common.states
            POWER_ON, POWER_OFF.
        :raises: InvalidParameterValue if parameters are invalid.
        :raises: MissingParameterValue if required parameters are missing.
        :raises: WolOperationError if an error occur when sending the
            magic packets

        """
        node = task.node
        params = _parse_parameters(task)
        if pstate == states.POWER_ON:
            _send_magic_packets(task, params['host'], params['port'])
        elif pstate == states.POWER_OFF:
            LOG.info(_LI('Power off called for node %s. Wake-On-Lan does not '
                         'support this operation. Manual intervention '
                         'required to perform this action.'), node.uuid)
        else:
            raise exception.InvalidParameterValue(_(
                "set_power_state called for Node %(node)s with invalid "
                "power state %(pstate)s.") % {'node': node.uuid,
                                              'pstate': pstate})
Example #18
0
def _attach_virtual_cd(node, bootable_iso_filename):
    """Attaches the given url as virtual media on the node.

    :param node: an ironic node object.
    :param bootable_iso_filename: a bootable ISO image to attach to.
        The iso file should be present in NFS/CIFS server.
    :raises: IRMCOperationError if attaching virtual media failed.
    """
    try:
        irmc_client = irmc_common.get_irmc_client(node)

        cd_set_params = scci.get_virtual_cd_set_params_cmd(
            CONF.irmc.remote_image_server,
            CONF.irmc.remote_image_user_domain,
            scci.get_share_type(CONF.irmc.remote_image_share_type),
            CONF.irmc.remote_image_share_name,
            bootable_iso_filename,
            CONF.irmc.remote_image_user_name,
            CONF.irmc.remote_image_user_password,
        )

        irmc_client(cd_set_params, async=False)
        irmc_client(scci.MOUNT_CD, async=False)

    except scci.SCCIClientError as irmc_exception:
        LOG.exception(
            _LE("Error while inserting virtual cdrom " "into node %(uuid)s. Error: %(error)s"),
            {"uuid": node.uuid, "error": irmc_exception},
        )
        operation = _("Inserting virtual cdrom")
        raise exception.IRMCOperationError(operation=operation, error=irmc_exception)

    LOG.info(_LI("Attached virtual cdrom successfully" " for node %s"), node.uuid)
Example #19
0
def _enable_boot_config(node):
    """Enable boot configuration of AMT Client.

    :param node: a node object
    :raises: AMTFailure
    :raises: AMTConnectFailure
    """
    client = amt_common.get_wsman_client(node)
    config = pywsman.EndPointReference(resource_uris.CIM_BootConfigSetting,
                                       None)
    config.add_selector('InstanceID', 'Intel(r) AMT: Boot Configuration 0')

    method = 'SetBootConfigRole'

    options = pywsman.ClientOptions()
    options.add_selector('Name', 'Intel(r) AMT Boot Service')

    options.add_property('Role', '1')
    options.add_property('BootConfigSetting', config)
    try:
        client.wsman_invoke(options, resource_uris.CIM_BootService, method)
    except (exception.AMTFailure, exception.AMTConnectFailure) as e:
        with excutils.save_and_reraise_exception():
            LOG.exception(_LE("Failed to enable boot config for node "
                              "%(node_id)s with error: %(error)s."),
                          {'node_id': node.uuid, 'error': e})
    else:
        LOG.info(_LI("Successfully enabled boot config for node %(node_id)s."),
                 {'node_id': node.uuid})
Example #20
0
def _check_status(task):
    """Check inspection status for node given by a task."""
    node = task.node
    if node.provision_state != states.INSPECTING:
        return
    if not isinstance(task.driver.inspect, Inspector):
        return

    LOG.debug('Calling to inspector to check status of node %s',
              task.node.uuid)

    # NOTE(dtantsur): periodic tasks do not have proper tokens in context
    task.context.auth_token = keystone.get_admin_auth_token()
    try:
        status = _call_inspector(client.get_status, node.uuid, task.context)
    except Exception:
        # NOTE(dtantsur): get_status should not normally raise
        # let's assume it's a transient failure and retry later
        LOG.exception(_LE('Unexpected exception while getting '
                          'inspection status for node %s, will retry later'),
                      node.uuid)
        return

    if status.get('error'):
        LOG.error(_LE('Inspection failed for node %(uuid)s '
                      'with error: %(err)s'),
                  {'uuid': node.uuid, 'err': status['error']})
        node.last_error = (_('ironic-inspector inspection failed: %s')
                           % status['error'])
        task.process_event('fail')
    elif status.get('finished'):
        LOG.info(_LI('Inspection finished successfully for node %s'),
                 node.uuid)
        task.process_event('done')
Example #21
0
def finish_deploy(task, address):
    """Notifies the ramdisk to reboot the node and makes the instance active.

    This method notifies the ramdisk to proceed to reboot and then
    makes the instance active.

    :param task: a TaskManager object.
    :param address: The IP address of the bare metal node.
    :raises: InstanceDeployFailure, if notifying ramdisk failed.
    """
    node = task.node
    try:
        deploy_utils.notify_ramdisk_to_proceed(address)
    except Exception as e:
        LOG.error(
            _LE("Deploy failed for instance %(instance)s. " "Error: %(error)s"),
            {"instance": node.instance_uuid, "error": e},
        )
        msg = _("Failed to notify ramdisk to reboot after bootloader " "installation. Error: %s") % e
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(msg)

    # TODO(lucasagomes): When deploying a node with the DIB ramdisk
    # Ironic will not power control the node at the end of the deployment,
    # it's the DIB ramdisk that reboots the node. But, for the SSH driver
    # some changes like setting the boot device only gets applied when the
    # machine is powered off and on again. So the code below is enforcing
    # it. For Liberty we need to change the DIB ramdisk so that Ironic
    # always controls the power state of the node for all drivers.
    if deploy_utils.get_boot_option(node) == "local" and "ssh" in node.driver:
        manager_utils.node_power_action(task, states.REBOOT)

    LOG.info(_LI("Deployment to node %s done"), node.uuid)
    task.process_event("done")
Example #22
0
    def reboot_to_instance(self, task, **kwargs):
        task.process_event('resume')
        node = task.node
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.info(_LI('Image successfully written to node %s'), node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        self.reboot_and_finish_deploy(task)

        # NOTE(TheJulia): If we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): Not all in-tree drivers using reboot_to_instance
        # have a boot interface. So include a check for now. Remove this
        # check once all in-tree drivers have a boot interface.
        if task.driver.boot:
            task.driver.boot.clean_up_ramdisk(task)
Example #23
0
    def reboot_to_instance(self, task, **kwargs):
        task.process_event('resume')
        node = task.node
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.info(_LI('Image successfully written to node %s'), node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        self.reboot_and_finish_deploy(task)
        # NOTE(TheJulia): If we we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): This shouldn't get called for virtual media deploy
        # drivers (iLO and iRMC).  This is just a hack, but it will be taken
        # care in boot/deploy interface separation.
        if (_driver_uses_pxe(task.driver) and
                node.driver_internal_info.get('is_whole_disk_image')):
            _clean_up_pxe(task)
Example #24
0
def _attach_virtual_fd(node, floppy_image_filename):
    """Attaches virtual floppy on the node.

    :param node: an ironic node object.
    :raises: IRMCOperationError if insert virtual floppy failed.
    """
    try:
        irmc_client = irmc_common.get_irmc_client(node)

        fd_set_params = scci.get_virtual_fd_set_params_cmd(
            CONF.irmc.remote_image_server,
            CONF.irmc.remote_image_user_domain,
            scci.get_share_type(CONF.irmc.remote_image_share_type),
            CONF.irmc.remote_image_share_name,
            floppy_image_filename,
            CONF.irmc.remote_image_user_name,
            CONF.irmc.remote_image_user_password,
        )

        irmc_client(fd_set_params, async=False)
        irmc_client(scci.MOUNT_FD, async=False)

    except scci.SCCIClientError as irmc_exception:
        LOG.exception(
            _LE("Error while inserting virtual floppy " "into node %(uuid)s. Error: %(error)s"),
            {"uuid": node.uuid, "error": irmc_exception},
        )
        operation = _("Inserting virtual floppy")
        raise exception.IRMCOperationError(operation=operation, error=irmc_exception)

    LOG.info(_LI("Attached virtual floppy successfully" " for node %s"), node.uuid)
 def _new_token(self):
     LOG.info(_LI("Using OneView credentials specified in synch.conf"))
     LOG.info(("Using OneView credentials specified in synch.conf"))
     url = '%s%s' % (self.oneview_conf.get_manager_url(),
                     oneview_uri.AUTHENTICATION_URL)
     body = {
         'password': self.oneview_conf.get_password(),
         'userName': self.oneview_conf.get_username()
     }
     headers = {'content-type': 'application/json'}
     verify_status = self._get_verify_connection_option()
     if verify_status is False:
         LOG.warn('Using insecure connection')
     json_response = None
     repeat = True
     while repeat:
         r = self._try_execute_request(url, 'POST', body, headers,
                                   verify_status)
         # NOTE: Workaround to fix JsonDecode problems
         try:
             json_response = r.json()
             repeat = self._check_request_status(r)
         except:
             repeat = True
     return json_response.get('sessionID')
Example #26
0
def _set_boot_device_order(node, boot_device):
    """Set boot device order configuration of AMT Client.

    :param node: a node object
    :param boot_device: the boot device
    :raises: AMTFailure
    :raises: AMTConnectFailure
    """
    amt_common.awake_amt_interface(node)
    client = amt_common.get_wsman_client(node)
    device = amt_common.BOOT_DEVICES_MAPPING[boot_device]
    doc = _generate_change_boot_order_input(device)

    method = 'ChangeBootOrder'

    options = pywsman.ClientOptions()
    options.add_selector('InstanceID', 'Intel(r) AMT: Boot Configuration 0')

    try:
        client.wsman_invoke(options, resource_uris.CIM_BootConfigSetting,
                            method, doc)
    except (exception.AMTFailure, exception.AMTConnectFailure) as e:
        with excutils.save_and_reraise_exception():
            LOG.exception(_LE("Failed to set boot device %(boot_device)s for "
                              "node %(node_id)s with error: %(error)s."),
                          {'boot_device': boot_device, 'node_id': node.uuid,
                           'error': e})
    else:
        LOG.info(_LI("Successfully set boot device %(boot_device)s for "
                     "node %(node_id)s"),
                 {'boot_device': boot_device, 'node_id': node.uuid})
Example #27
0
def _delete_master_path_if_stale(master_path, href, ctx):
    """Delete image from cache if it is not up to date with href contents.

    :param master_path: path to an image in master cache
    :param href: image href
    :param ctx: context to use
    :returns: True if master_path is up to date with href contents,
        False if master_path was stale and was deleted or it didn't exist
    """
    if service_utils.is_glance_image(href):
        # Glance image contents cannot be updated without changing image's UUID
        return os.path.exists(master_path)
    if os.path.exists(master_path):
        img_service = image_service.get_image_service(href, context=ctx)
        img_mtime = img_service.show(href).get('updated_at')
        if not img_mtime:
            # This means that href is not a glance image and doesn't have an
            # updated_at attribute
            LOG.warn(_LW("Image service couldn't determine last "
                         "modification time of %(href)s, considering "
                         "cached image up to date."), {'href': href})
            return True
        master_mtime = utils.unix_file_modification_datetime(master_path)
        if img_mtime <= master_mtime:
            return True
        # Delete image from cache as it is outdated
        LOG.info(_LI('Image %(href)s was last modified at %(remote_time)s. '
                     'Deleting the cached copy "%(cached_file)s since it was '
                     'last modified at %(local_time)s and may be outdated.'),
                 {'href': href, 'remote_time': img_mtime,
                  'local_time': master_mtime, 'cached_file': master_path})

        os.unlink(master_path)
    return False
Example #28
0
    def lookup(self, context, **kwargs):
        """Find a matching node for the agent.

        Method to be called the first time a ramdisk agent checks in. This
        can be because this is a node just entering decom or a node that
        rebooted for some reason. We will use the mac addresses listed in the
        kwargs to find the matching node, then return the node object to the
        agent. The agent can that use that UUID to use the node vendor
        passthru method.

        Currently, we don't handle the instance where the agent doesn't have
        a matching node (i.e. a brand new, never been in Ironic node).

        kwargs should have the following format::

         {
             "version": "2"
             "inventory": {
                 "interfaces": [
                     {
                         "name": "eth0",
                         "mac_address": "00:11:22:33:44:55",
                         "switch_port_descr": "port24",
                         "switch_chassis_descr": "tor1"
                     }, ...
                 ], ...
             },
             "node_uuid": "ab229209-0139-4588-bbe5-64ccec81dd6e"
         }

        The interfaces list should include a list of the non-IPMI MAC addresses
        in the form aa:bb:cc:dd:ee:ff.

        node_uuid argument is optional. If it's provided (e.g. as a result of
        inspection run before lookup), this method will just return a node and
        options.

        This method will also return the timeout for heartbeats. The driver
        will expect the agent to heartbeat before that timeout, or it will be
        considered down. This will be in a root level key called
        'heartbeat_timeout'

        :raises: NotFound if no matching node is found.
        :raises: InvalidParameterValue with unknown payload version
        """
        LOG.debug("Agent lookup using data %s", kwargs)
        uuid = kwargs.get("node_uuid")
        if uuid:
            node = objects.Node.get_by_uuid(context, uuid)
        else:
            inventory = kwargs.get("inventory")
            interfaces = self._get_interfaces(inventory)
            mac_addresses = self._get_mac_addresses(interfaces)

            node = self._find_node_by_macs(context, mac_addresses)

        LOG.info(_LI("Initial lookup for node %s succeeded, agent is running " "and waiting for commands"), node.uuid)

        return {"heartbeat_timeout": CONF.agent.heartbeat_timeout, "node": node.as_dict()}
Example #29
0
    def fetch_image(self, uuid, dest_path, ctx=None, force_raw=True):
        """Fetch image with given uuid to the destination path.

        Does nothing if destination path exists.
        Only creates a link if master image for this UUID is already in cache.
        Otherwise downloads an image and also stores it in cache.

        :param uuid: image UUID or href to fetch
        :param dest_path: destination file path
        :param ctx: context
        :param force_raw: boolean value, whether to convert the image to raw
                          format
        """
        img_download_lock_name = 'download-image'
        if self.master_dir is None:
            # NOTE(ghe): We don't share images between instances/hosts
            if not CONF.parallel_image_downloads:
                with lockutils.lock(img_download_lock_name, 'ironic-'):
                    _fetch(ctx, uuid, dest_path, self._image_service,
                           force_raw)
            else:
                _fetch(ctx, uuid, dest_path, self._image_service, force_raw)
            return

        # TODO(ghe): have hard links and counts the same behaviour in all fs

        master_file_name = service_utils.parse_image_ref(uuid)[0]
        master_path = os.path.join(self.master_dir, master_file_name)

        if CONF.parallel_image_downloads:
            img_download_lock_name = 'download-image:%s' % master_file_name

        # TODO(dtantsur): lock expiration time
        with lockutils.lock(img_download_lock_name, 'ironic-'):
            if os.path.exists(dest_path):
                LOG.debug("Destination %(dest)s already exists for "
                            "image %(uuid)s" %
                          {'uuid': uuid,
                           'dest': dest_path})
                return

            try:
                # NOTE(dtantsur): ensure we're not in the middle of clean up
                with lockutils.lock('master_image', 'ironic-'):
                    os.link(master_path, dest_path)
            except OSError:
                LOG.info(_LI("Master cache miss for image %(uuid)s, "
                             "starting download"),
                         {'uuid': uuid})
            else:
                LOG.debug("Master cache hit for image %(uuid)s",
                          {'uuid': uuid})
                return

            self._download_image(
                uuid, master_path, dest_path, ctx=ctx, force_raw=force_raw)

        # NOTE(dtantsur): we increased cache size - time to clean up
        self.clean_up()
Example #30
0
    def process_fw_on(self, node, expected_checksum):
        """Processes the firmware file from the url

        This is the template method which downloads the firmware file from
        url, verifies checksum and extracts the firmware and makes it ready
        for firmware update operation. ``_download_fw_to`` method is set in
        the firmware processor object creation factory method,
        ``get_fw_processor()``, based on the url type.
        :param node: a single Node.
        :param expected_checksum: checksum to be checked against.
        :returns: wrapper object of raw firmware image location
        :raises: IloOperationError, on failure to process firmware file.
        :raises: ImageDownloadFailed, on failure to download the original file.
        :raises: ImageRefValidationFailed, on failure to verify the checksum.
        :raises: SwiftOperationError, if upload to Swift fails.
        :raises: ImageUploadFailed, if upload to web server fails.
        """
        filename = os.path.basename(self.parsed_url.path)
        # create a temp directory where firmware file will be downloaded
        temp_dir = tempfile.mkdtemp()
        target_file = os.path.join(temp_dir, filename)

        # Note(deray): Operations performed in here:
        #
        #    1. Download the firmware file to the target file.
        #    2. Verify the checksum of the downloaded file.
        #    3. Extract the raw firmware file from its compact format
        #
        try:
            LOG.debug("For firmware update, downloading firmware file "
                      "%(src_file)s to: %(target_file)s ...",
                      {'src_file': self.parsed_url.geturl(),
                       'target_file': target_file})
            self._download_fw_to(target_file)
            LOG.debug("For firmware update, verifying checksum of file: "
                      "%(target_file)s ...", {'target_file': target_file})
            ilo_common.verify_image_checksum(target_file, expected_checksum)
            # Extracting raw firmware file from target_file ...
            fw_image_location_obj, is_different_file = (_extract_fw_from_file(
                node, target_file))
        except exception.IronicException:
            with excutils.save_and_reraise_exception():
                # delete the target file along with temp dir and
                # re-raise the exception
                shutil.rmtree(temp_dir, ignore_errors=True)

        # Note(deray): In case of raw (no need for extraction) firmware files,
        # the same firmware file is returned from the extract method.
        # Hence, don't blindly delete the firmware file which gets passed on
        # to extraction operation after successful extract. Check whether the
        # file is same or not and then go ahead deleting it.
        if is_different_file:
            # delete the entire downloaded content along with temp dir.
            shutil.rmtree(temp_dir, ignore_errors=True)

        LOG.info(_LI("Final processed firmware location: %s"),
                 fw_image_location_obj.fw_image_location)
        return fw_image_location_obj
Example #31
0
    def start(self):
        super(RPCService, self).start()
        admin_context = context.RequestContext('admin', 'admin', is_admin=True)
        self.manager.init_host()
        self.tg.add_dynamic_timer(
                self.manager.periodic_tasks,
                periodic_interval_max=cfg.CONF.periodic_interval,
                context=admin_context)

        target = messaging.Target(topic=self.topic, server=self.host)
        endpoints = [self.manager]
        serializer = objects_base.IronicObjectSerializer()
        self.rpcserver = rpc.get_server(target, endpoints, serializer)
        self.rpcserver.start()
        LOG.info(_LI('Created RPC server for service %(service)s on host '
                     '%(host)s.'),
                 {'service': self.topic, 'host': self.host})
Example #32
0
def _check_status(task):
    """Check inspection status for node given by a task."""
    node = task.node
    if node.provision_state != states.INSPECTING:
        return
    if not isinstance(task.driver.inspect, Inspector):
        return

    LOG.debug('Calling to inspector to check status of node %s',
              task.node.uuid)

    # NOTE(dtantsur): periodic tasks do not have proper tokens in context
    if CONF.auth_strategy == 'keystone':
        task.context.auth_token = keystone.get_admin_auth_token()

    try:
        status = _call_inspector(client.get_status, node.uuid, task.context)
    except Exception:
        # NOTE(dtantsur): get_status should not normally raise
        # let's assume it's a transient failure and retry later
        LOG.exception(_LE('Unexpected exception while getting '
                          'inspection status for node %s, will retry later'),
                      node.uuid)
        return

    error = status.get('error')
    finished = status.get('finished')
    if not error and not finished:
        return

    # If the inspection has finished or failed, we need to update the node, so
    # upgrade our lock to an exclusive one.
    task.upgrade_lock()
    node = task.node

    if error:
        LOG.error(_LE('Inspection failed for node %(uuid)s '
                      'with error: %(err)s'),
                  {'uuid': node.uuid, 'err': error})
        node.last_error = (_('ironic-inspector inspection failed: %s')
                           % error)
        task.process_event('fail')
    elif finished:
        LOG.info(_LI('Inspection finished successfully for node %s'),
                 node.uuid)
        task.process_event('done')
Example #33
0
def continue_deploy(task, **kwargs):
    """Resume a deployment upon getting POST data from deploy ramdisk.

    This method raises no exceptions because it is intended to be
    invoked asynchronously as a callback from the deploy ramdisk.

    :param task: a TaskManager instance containing the node to act on.
    :param kwargs: the kwargs to be passed to deploy.
    :raises: InvalidState if the event is not allowed by the associated
             state machine.
    :returns: UUID of the root partition or None on error.
    """
    node = task.node

    params = get_deploy_info(node, **kwargs)
    ramdisk_error = kwargs.get('error')

    if ramdisk_error:
        LOG.error(_LE('Error returned from deploy ramdisk: %s'), ramdisk_error)
        deploy_utils.set_failed_state(task, _('Failure in deploy ramdisk.'))
        destroy_images(node.uuid)
        return

    LOG.info(_LI('Continuing deployment for node %(node)s, params %(params)s'),
             {
                 'node': node.uuid,
                 'params': params
             })

    root_uuid = None
    try:
        root_uuid = deploy_utils.deploy(**params)
    except Exception as e:
        LOG.error(
            _LE('Deploy failed for instance %(instance)s. '
                'Error: %(error)s'), {
                    'instance': node.instance_uuid,
                    'error': e
                })
        deploy_utils.set_failed_state(
            task, _('Failed to continue '
                    'iSCSI deployment.'))

    destroy_images(node.uuid)
    return root_uuid
Example #34
0
def _start_inspection(node_uuid, context):
    """Call to discoverd to start inspection."""
    try:
        _call_discoverd(client.introspect, node_uuid, context)
    except Exception as exc:
        LOG.exception(
            _LE('Exception during contacting ironic-discoverd '
                'for inspection of node %(node)s: %(err)s'), {
                    'node': node_uuid,
                    'err': exc
                })
        # NOTE(dtantsur): if acquire fails our last option is to rely on
        # timeout
        with task_manager.acquire(context, node_uuid) as task:
            task.node.last_error = _('Failed to start inspection: %s') % exc
            task.process_event('fail')
    else:
        LOG.info(_LI('Node %s was sent to inspection to ironic-discoverd'),
                 node_uuid)
Example #35
0
    def reboot_and_finish_deploy(self, task):
        """Helper method to trigger reboot on the node and finish deploy.

        This method initiates a reboot on the node. On success, it
        marks the deploy as complete. On failure, it logs the error
        and marks deploy as failure.

        :param task: a TaskManager object containing the node
        :raises: InstanceDeployFailure, if node reboot failed.
        """
        try:
            manager_utils.node_power_action(task, states.REBOOT)
        except Exception as e:
            msg = (_('Error rebooting node %(node)s. Error: %(error)s') %
                   {'node': task.node.uuid, 'error': e})
            self._log_and_raise_deployment_error(task, msg)

        task.process_event('done')
        LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
Example #36
0
def _start_inspection(node_uuid, context):
    """Call to inspector to start inspection."""
    context.ensure_thread_contain_context()
    try:
        _call_inspector(client.introspect, node_uuid, context)
    except Exception as exc:
        LOG.exception(_LE('Exception during contacting ironic-inspector '
                          'for inspection of node %(node)s: %(err)s'),
                      {'node': node_uuid, 'err': exc})
        # NOTE(dtantsur): if acquire fails our last option is to rely on
        # timeout
        lock_purpose = 'recording hardware inspection error'
        with task_manager.acquire(context, node_uuid,
                                  purpose=lock_purpose) as task:
            task.node.last_error = _('Failed to start inspection: %s') % exc
            task.process_event('fail')
    else:
        LOG.info(_LI('Node %s was sent to inspection to ironic-inspector'),
                 node_uuid)
Example #37
0
def setup_vmedia_for_boot(task, boot_iso, parameters=None):
    """Sets up the node to boot from the given ISO image.

    This method attaches the given boot_iso on the node and passes
    the required parameters to it via virtual floppy image.

    :param task: a TaskManager instance containing the node to act on.
    :param boot_iso: a bootable ISO image to attach to. Should be either
        of below:
        * A Swift object - It should be of format 'swift:<object-name>'.
          It is assumed that the image object is present in
          CONF.ilo.swift_ilo_container;
        * A Glance image - It should be format 'glance://<glance-image-uuid>'
          or just <glance-image-uuid>;
        * An HTTP(S) URL.
    :param parameters: the parameters to pass in the virtual floppy image
        in a dictionary.  This is optional.
    :raises: ImageCreationFailed, if it failed while creating the floppy image.
    :raises: SwiftOperationError, if any operation with Swift fails.
    :raises: IloOperationError, if attaching virtual media failed.
    """
    LOG.info(_LI("Setting up node %s to boot from virtual media"),
             task.node.uuid)

    if parameters:
        floppy_image_temp_url = _prepare_floppy_image(task, parameters)
        attach_vmedia(task.node, 'FLOPPY', floppy_image_temp_url)

    boot_iso_url = None
    parsed_ref = urlparse.urlparse(boot_iso)
    if parsed_ref.scheme == 'swift':
        swift_api = swift.SwiftAPI()
        container = CONF.ilo.swift_ilo_container
        object_name = parsed_ref.path
        timeout = CONF.ilo.swift_object_expiry_timeout
        boot_iso_url = swift_api.get_temp_url(container, object_name,
                timeout)
    elif service_utils.is_glance_image(boot_iso):
        boot_iso_url = images.get_temp_url_for_glance_image(task.context,
                boot_iso)

    attach_vmedia(task.node, 'CDROM', boot_iso_url or boot_iso)
Example #38
0
    def reset_ilo_credential(self, task):
        """Resets the iLO password.

        :param task: a task from TaskManager.
        :raises: NodeCleaningFailure, on failure to execute step.
        """
        info = task.node.driver_info
        password = info.pop('ilo_change_password', None)

        if not password:
            LOG.info(_LI("Missing 'ilo_change_password' parameter in "
                         "driver_info. Clean step 'reset_ilo_credential' is "
                         "not performed on node %s."), task.node.uuid)
            return

        _execute_ilo_clean_step(task.node, 'reset_ilo_credential', password)

        info['ilo_password'] = password
        task.node.driver_info = info
        task.node.save()
def set_node_power_state(driver_info, state, press_type='MomentaryPress'):
    LOG.debug(
        _LI('Setting power state of %(sh_uri)s to %(state)s ')
        % {'sh_uri': driver_info.get('server_hardware_uri'),
           'state': state
           }
    )

    body = {'powerState': state, 'powerControl': press_type}
    power_state_uri = driver_info.get('server_hardware_uri') + '/powerState'
    prepare_and_do_request(uri=power_state_uri, body=body, request_type='PUT')

    current_state = get_node_power_state(driver_info)
    while current_state not in [POWER_STATE_ONEVIEW_TO_IRONIC.get(state)]:
        if current_state is states.ERROR:
            raise exception.OneViewErrorStateSettingPowerState()
        time.sleep(10)
        current_state = get_node_power_state(driver_info)

    return get_node_power_state(driver_info)
Example #40
0
def remove_neutron_ports(task, params):
    """Deletes the neutron ports matched by params.

    :param task: a TaskManager instance.
    :param params: Dict of params to filter ports.
    :raises: NetworkError
    """
    client = get_client()
    node_uuid = task.node.uuid

    try:
        response = client.list_ports(**params)
    except neutron_exceptions.NeutronClientException as e:
        msg = (_('Could not get given network VIF for %(node)s '
                 'from neutron, possible network issue. %(exc)s') %
               {'node': node_uuid, 'exc': e})
        LOG.exception(msg)
        raise exception.NetworkError(msg)

    ports = response.get('ports', [])
    if not ports:
        LOG.debug('No ports to remove for node %s', node_uuid)
        return

    for port in ports:
        LOG.debug('Deleting neutron port %(vif_port_id)s of node '
                  '%(node_id)s.',
                  {'vif_port_id': port['id'], 'node_id': node_uuid})

        try:
            client.delete_port(port['id'])
        except neutron_exceptions.NeutronClientException as e:
            msg = (_('Could not remove VIF %(vif)s of node %(node)s, possibly '
                     'a network issue: %(exc)s') %
                   {'vif': port['id'], 'node': node_uuid, 'exc': e})
            LOG.exception(msg)
            raise exception.NetworkError(msg)

    LOG.info(_LI('Successfully removed node %(node_uuid)s neutron ports.'),
             {'node_uuid': node_uuid})
Example #41
0
    def reboot_to_instance(self, task, **kwargs):
        task.process_event('resume')
        node = task.node
        iwdi = task.node.driver_internal_info.get('is_whole_disk_image')
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') % {
                'node': node.uuid,
                'error': error
            })
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return
        if not iwdi:
            root_uuid = self._get_uuid_from_result(task, 'root_uuid')
            if deploy_utils.get_boot_mode_for_deploy(node) == 'uefi':
                efi_sys_uuid = (self._get_uuid_from_result(
                    task, 'efi_system_partition_uuid'))
            else:
                efi_sys_uuid = None
            task.node.driver_internal_info['root_uuid_or_disk_id'] = root_uuid
            task.node.save()
            self.prepare_instance_to_boot(task, root_uuid, efi_sys_uuid)
        LOG.info(_LI('Image successfully written to node %s'), node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)
        if iwdi:
            manager_utils.node_set_boot_device(task, 'disk', persistent=True)

        self.reboot_and_finish_deploy(task)

        # NOTE(TheJulia): If we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): Not all in-tree drivers using reboot_to_instance
        # have a boot interface. So include a check for now. Remove this
        # check once all in-tree drivers have a boot interface.
        if task.driver.boot and iwdi:
            task.driver.boot.clean_up_ramdisk(task)