Beispiel #1
0
    def rescue(self, task):
        """Boot a rescue ramdisk on the node.

        :param task: a TaskManager instance.
        :raises: NetworkError if the tenant ports cannot be removed.
        :raises: InvalidParameterValue when the wrong power state is specified
             or the wrong driver info is specified for power management.
        :raises: other exceptions by the node's power driver if something
             wrong occurred during the power action.
        :raises: any boot interface's prepare_ramdisk exceptions.
        :returns: Returns states.RESCUEWAIT
        """
        manager_utils.node_power_action(task, states.POWER_OFF)
        # NOTE(TheJulia): Revealing that the power is off at any time can
        # cause external power sync to decide that the node must be off.
        # This may result in a post-rescued instance being turned off
        # unexpectedly after rescue has started.
        # TODO(TheJulia): Once we have power/state callbacks to nova,
        # the reset of the power_state can be removed.
        task.node.power_state = states.POWER_ON
        task.node.save()

        task.driver.boot.clean_up_instance(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.unconfigure_tenant_networks(task)
            task.driver.network.add_rescuing_network(task)
        if CONF.agent.manage_agent_boot:
            ramdisk_opts = deploy_utils.build_agent_options(task.node)
            # prepare_ramdisk will set the boot device
            task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
        manager_utils.node_power_action(task, states.POWER_ON)

        return states.RESCUEWAIT
Beispiel #2
0
def _tear_down_managed_boot(task):
    errors = []

    ironic_manages_boot = utils.pop_node_nested_field(
        task.node, 'driver_internal_info', _IRONIC_MANAGES_BOOT)
    if not ironic_manages_boot:
        return errors

    try:
        task.driver.boot.clean_up_ramdisk(task)
    except Exception as exc:
        errors.append(_('unable to clean up ramdisk boot: %s') % exc)
        LOG.exception('Unable to clean up ramdisk boot for node %s',
                      task.node.uuid)
    try:
        with cond_utils.power_state_for_network_configuration(task):
            task.driver.network.remove_inspection_network(task)
    except Exception as exc:
        errors.append(_('unable to remove inspection ports: %s') % exc)
        LOG.exception('Unable to remove inspection network for node %s',
                      task.node.uuid)

    if CONF.inspector.power_off:
        try:
            cond_utils.node_power_action(task, states.POWER_OFF)
        except Exception as exc:
            errors.append(_('unable to power off the node: %s') % exc)
            LOG.exception('Unable to power off node %s', task.node.uuid)

    return errors
Beispiel #3
0
    def unrescue(self, task):
        """Attempt to move a rescued node back to active state.

        :param task: a TaskManager instance.
        :raises: NetworkError if the rescue ports cannot be removed.
        :raises: InvalidParameterValue when the wrong power state is specified
             or the wrong driver info is specified for power management.
        :raises: other exceptions by the node's power driver if something
             wrong occurred during the power action.
        :raises: any boot interface's prepare_instance exceptions.
        :returns: Returns states.ACTIVE
        """
        manager_utils.node_power_action(task, states.POWER_OFF)

        # NOTE(TheJulia): Revealing that the power is off at any time can
        # cause external power sync to decide that the node must be off.
        # This may result in a post-rescued insance being turned off
        # unexpectedly after unrescue.
        # TODO(TheJulia): Once we have power/state callbacks to nova,
        # the reset of the power_state can be removed.
        task.node.power_state = states.POWER_ON
        task.node.save()

        self.clean_up(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.configure_tenant_networks(task)
        task.driver.boot.prepare_instance(task)
        manager_utils.node_power_action(task, states.POWER_ON)

        return states.ACTIVE
Beispiel #4
0
    def _finalize_rescue(self, task):
        """Call ramdisk to prepare rescue mode and verify result.

        :param task: A TaskManager instance
        :raises: InstanceRescueFailure, if rescuing failed
        """
        node = task.node
        try:
            result = self._client.finalize_rescue(node)
        except exception.IronicException as e:
            raise exception.InstanceRescueFailure(node=node.uuid,
                                                  instance=node.instance_uuid,
                                                  reason=e)
        if ((not result.get('command_status'))
                or result.get('command_status') != 'SUCCEEDED'):
            # NOTE(mariojv) Caller will clean up failed rescue in exception
            # handler.
            fail_reason = (_('Agent returned bad result for command '
                             'finalize_rescue: %(result)s') %
                           {'result': result.get('command_error')})
            raise exception.InstanceRescueFailure(node=node.uuid,
                                                  instance=node.instance_uuid,
                                                  reason=fail_reason)
        task.process_event('resume')
        task.driver.rescue.clean_up(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.configure_tenant_networks(task)
        task.process_event('done')
Beispiel #5
0
def _start_managed_inspection(task):
    """Start inspection managed by ironic."""
    try:
        client = _get_client(task.context)
        endpoint = _get_callback_endpoint(client)
        params = dict(_parse_kernel_params(),
                      **{'ipa-inspection-callback-url': endpoint})
        if CONF.deploy.fast_track:
            params['ipa-api-url'] = deploy_utils.get_ironic_api_url()

        cond_utils.node_power_action(task, states.POWER_OFF)
        with cond_utils.power_state_for_network_configuration(task):
            task.driver.network.add_inspection_network(task)
        task.driver.boot.prepare_ramdisk(task, ramdisk_params=params)
        client.start_introspection(task.node.uuid, manage_boot=False)
        cond_utils.node_power_action(task, states.POWER_ON)
    except Exception as exc:
        LOG.exception(
            'Unable to start managed inspection for node %(uuid)s: '
            '%(err)s', {
                'uuid': task.node.uuid,
                'err': exc
            })
        error = _('unable to start inspection: %s') % exc
        _inspection_error_handler(task, error, raise_exc=True)
Beispiel #6
0
    def deploy(self, task):
        if ('configdrive' in task.node.instance_info
                and 'ramdisk_boot_configdrive' not in
                task.driver.boot.capabilities):
            # TODO(dtantsur): make it an actual error?
            LOG.warning('A configuration drive is present in the ramdisk '
                        'deployment request of node %(node)s with boot '
                        'interface %(drv)s. The configuration drive will be '
                        'ignored for this deployment.',
                        {'node': task.node, 'drv': task.node.boot_interface})
        manager_utils.node_power_action(task, states.POWER_OFF)
        # Tenant neworks must enable connectivity to the boot
        # location, as reboot() can otherwise be very problematic.
        # IDEA(TheJulia): Maybe a "trusted environment" mode flag
        # that we otherwise fail validation on for drivers that
        # require explicit security postures.
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.configure_tenant_networks(task)

        # calling boot.prepare_instance will also set the node
        # to PXE boot, and update PXE templates accordingly
        task.driver.boot.prepare_instance(task)

        # Power-on the instance, with PXE prepared, we're done.
        manager_utils.node_power_action(task, states.POWER_ON)
        LOG.info('Deployment setup for node %s done', task.node.uuid)
        return None
Beispiel #7
0
    def deploy(self, task):
        manager_utils.node_power_action(task, states.POWER_OFF)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.configure_tenant_networks(task)

        # calling boot.prepare_instance will also set the node
        # to PXE boot, and update PXE templates accordingly
        task.driver.boot.prepare_instance(task)

        # Power-on the instance, with PXE prepared, we're done.
        manager_utils.node_power_action(task, states.POWER_ON)
        LOG.info('Deployment setup for node %s done', task.node.uuid)
        return None
Beispiel #8
0
    def deploy(self, task):
        """Start deployment of the task's node.

        Fetches instance image, updates the DHCP port options for next boot,
        and issues a reboot request to the power driver.
        This causes the node to boot into the deployment ramdisk and triggers
        the next phase of PXE-based deployment via agent heartbeats.

        :param task: a TaskManager instance containing the node to act on.
        :returns: deploy state DEPLOYWAIT.
        """
        node = task.node
        if manager_utils.is_fast_track(task):
            LOG.debug('Performing a fast track deployment for %(node)s.',
                      {'node': task.node.uuid})
            deploy_utils.cache_instance_image(task.context, node)
            check_image_size(task)
            # Update the database for the API and the task tracking resumes
            # the state machine state going from DEPLOYWAIT -> DEPLOYING
            task.process_event('wait')
            self.continue_deploy(task)
        elif task.driver.storage.should_write_image(task):
            # Standard deploy process
            deploy_utils.cache_instance_image(task.context, node)
            check_image_size(task)
            # Check if the driver has already performed a reboot in a previous
            # deploy step.
            if not task.node.driver_internal_info.get('deployment_reboot',
                                                      False):
                manager_utils.node_power_action(task, states.REBOOT)
            info = task.node.driver_internal_info
            info.pop('deployment_reboot', None)
            task.node.driver_internal_info = info
            task.node.save()

            return states.DEPLOYWAIT
        else:
            # Boot to an Storage Volume

            # TODO(TheJulia): At some point, we should de-dupe this code
            # as it is nearly identical to the agent deploy interface.
            # This is not being done now as it is expected to be
            # refactored in the near future.
            manager_utils.node_power_action(task, states.POWER_OFF)
            with manager_utils.power_state_for_network_configuration(task):
                task.driver.network.remove_provisioning_network(task)
                task.driver.network.configure_tenant_networks(task)
            task.driver.boot.prepare_instance(task)
            manager_utils.node_power_action(task, states.POWER_ON)

            return None
Beispiel #9
0
    def clean_up(self, task):
        """Clean up after RESCUEWAIT timeout/failure or finishing rescue.

        Rescue password should be removed from the node and ramdisk boot
        environment should be cleaned if Ironic is managing the ramdisk boot.

        :param task: a TaskManager instance with the node.
        :raises: NetworkError if the rescue ports cannot be removed.
        """
        manager_utils.remove_node_rescue_password(task.node, save=True)
        if CONF.agent.manage_agent_boot:
            task.driver.boot.clean_up_ramdisk(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.remove_rescuing_network(task)
Beispiel #10
0
    def deploy(self, task):
        """Perform a deployment to a node.

        Perform the necessary work to deploy an image onto the specified node.
        This method will be called after prepare(), which may have already
        performed any preparatory steps, such as pre-caching some data for the
        node.

        :param task: a TaskManager instance.
        :returns: status of the deploy. One of ironic.common.states.
        """
        if manager_utils.is_fast_track(task):
            LOG.debug('Performing a fast track deployment for %(node)s.',
                      {'node': task.node.uuid})
            # Update the database for the API and the task tracking resumes
            # the state machine state going from DEPLOYWAIT -> DEPLOYING
            task.process_event('wait')
            self.continue_deploy(task)
        elif task.driver.storage.should_write_image(task):
            # Check if the driver has already performed a reboot in a previous
            # deploy step.
            if not task.node.driver_internal_info.get('deployment_reboot'):
                manager_utils.node_power_action(task, states.REBOOT)
            info = task.node.driver_internal_info
            info.pop('deployment_reboot', None)
            task.node.driver_internal_info = info
            task.node.save()
            return states.DEPLOYWAIT
        else:
            # TODO(TheJulia): At some point, we should de-dupe this code
            # as it is nearly identical to the iscsi deploy interface.
            # This is not being done now as it is expected to be
            # refactored in the near future.
            manager_utils.node_power_action(task, states.POWER_OFF)
            with manager_utils.power_state_for_network_configuration(task):
                task.driver.network.remove_provisioning_network(task)
                task.driver.network.configure_tenant_networks(task)
            task.driver.boot.prepare_instance(task)
            manager_utils.node_power_action(task, states.POWER_ON)
            LOG.info('Deployment to node %s done', task.node.uuid)
            return None
Beispiel #11
0
    def tear_down(self, task):
        """Tear down a previous deployment on the task's node.

        :param task: a TaskManager instance.
        :returns: status of the deploy. One of ironic.common.states.
        :raises: NetworkError if the cleaning ports cannot be removed.
        :raises: InvalidParameterValue when the wrong power state is specified
             or the wrong driver info is specified for power management.
        :raises: StorageError when the storage interface attached volumes fail
             to detach.
        :raises: other exceptions by the node's power driver if something
             wrong occurred during the power action.
        """
        manager_utils.node_power_action(task, states.POWER_OFF)
        task.driver.storage.detach_volumes(task)
        deploy_utils.tear_down_storage_configuration(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.unconfigure_tenant_networks(task)
            # NOTE(mgoddard): If the deployment was unsuccessful the node may
            # have ports on the provisioning network which were not deleted.
            task.driver.network.remove_provisioning_network(task)
        return states.DELETED
Beispiel #12
0
    def tear_down(self, task):
        """Tear down a previous deployment on the task's node.

        Power off the node. All actual clean-up is done in the clean_up()
        method which should be called separately.

        :param task: a TaskManager instance containing the node to act on.
        :returns: deploy state DELETED.
        :raises: NetworkError if the cleaning ports cannot be removed.
        :raises: InvalidParameterValue when the wrong state is specified
             or the wrong driver info is specified.
        :raises: StorageError when volume detachment fails.
        :raises: other exceptions by the node's power driver if something
             wrong occurred during the power action.
        """
        manager_utils.node_power_action(task, states.POWER_OFF)
        task.driver.storage.detach_volumes(task)
        deploy_utils.tear_down_storage_configuration(task)
        with manager_utils.power_state_for_network_configuration(task):
            task.driver.network.unconfigure_tenant_networks(task)
            # NOTE(mgoddard): If the deployment was unsuccessful the node may
            # have ports on the provisioning network which were not deleted.
            task.driver.network.remove_provisioning_network(task)
        return states.DELETED
Beispiel #13
0
    def reboot_and_finish_deploy(self, task):
        """Helper method to trigger reboot on the node and finish deploy.

        This method initiates a reboot on the node. On success, it
        marks the deploy as complete. On failure, it logs the error
        and marks deploy as failure.

        :param task: a TaskManager object containing the node
        :raises: InstanceDeployFailure, if node reboot failed.
        """
        wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000
        attempts = CONF.agent.post_deploy_get_power_state_retries + 1

        @retrying.retry(
            stop_max_attempt_number=attempts,
            retry_on_result=lambda state: state != states.POWER_OFF,
            wait_fixed=wait
        )
        def _wait_until_powered_off(task):
            return task.driver.power.get_power_state(task)

        node = task.node

        if CONF.agent.deploy_logs_collect == 'always':
            driver_utils.collect_ramdisk_logs(node)

        # Whether ironic should power off the node via out-of-band or
        # in-band methods
        oob_power_off = strutils.bool_from_string(
            node.driver_info.get('deploy_forces_oob_reboot', False))

        try:
            if not oob_power_off:
                try:
                    self._client.power_off(node)
                    _wait_until_powered_off(task)
                except Exception as e:
                    LOG.warning('Failed to soft power off node %(node_uuid)s '
                                'in at least %(timeout)d seconds. '
                                '%(cls)s: %(error)s',
                                {'node_uuid': node.uuid,
                                 'timeout': (wait * (attempts - 1)) / 1000,
                                 'cls': e.__class__.__name__, 'error': e},
                                exc_info=not isinstance(
                                    e, exception.IronicException))
                    manager_utils.node_power_action(task, states.POWER_OFF)
            else:
                # Flush the file system prior to hard rebooting the node
                result = self._client.sync(node)
                error = result.get('faultstring')
                if error:
                    if 'Unknown command' in error:
                        error = _('The version of the IPA ramdisk used in '
                                  'the deployment do not support the '
                                  'command "sync"')
                    LOG.warning(
                        'Failed to flush the file system prior to hard '
                        'rebooting the node %(node)s. Error: %(error)s',
                        {'node': node.uuid, 'error': error})

                manager_utils.node_power_action(task, states.POWER_OFF)
        except Exception as e:
            msg = (_('Error rebooting node %(node)s after deploy. '
                     '%(cls)s: %(error)s') %
                   {'node': node.uuid, 'cls': e.__class__.__name__,
                    'error': e})
            log_and_raise_deployment_error(task, msg, exc=e)

        try:
            with manager_utils.power_state_for_network_configuration(task):
                task.driver.network.remove_provisioning_network(task)
                task.driver.network.configure_tenant_networks(task)
            manager_utils.node_power_action(task, states.POWER_ON)
        except Exception as e:
            msg = (_('Error rebooting node %(node)s after deploy. '
                     '%(cls)s: %(error)s') %
                   {'node': node.uuid, 'cls': e.__class__.__name__,
                    'error': e})
            # NOTE(mgoddard): Don't collect logs since the node has been
            # powered off.
            log_and_raise_deployment_error(task, msg, collect_logs=False,
                                           exc=e)

        if not node.deploy_step:
            # TODO(rloo): delete this 'if' part after deprecation period, when
            # we expect all (out-of-tree) drivers to support deploy steps.
            # After which we will always notify_conductor_resume_deploy().
            task.process_event('done')
            LOG.info('Deployment to node %s done', task.node.uuid)
        else:
            manager_utils.notify_conductor_resume_deploy(task)