예제 #1
0
def _power_off(driver_info):
    """Turn the power to this node OFF."""

    # use mutable objects so the looped method can change them
    state = [None]
    retries = [0]

    def _wait_for_power_off(state, retries):
        """Called at an interval until the node's power is off."""

        state[0] = _power_status(driver_info)
        if state[0] == states.POWER_OFF:
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.ipmi_power_retry:
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()
        try:
            retries[0] += 1
            _exec_ipmitool(driver_info, "power off")
        except Exception:
            # Log failures but keep trying
            LOG.warning(
                _("IPMI power off failed for node %s.") % driver_info['uuid'])

    timer = loopingcall.FixedIntervalLoopingCall(_wait_for_power_off,
                                                 state=state,
                                                 retries=retries)
    timer.start(interval=1).wait()
    return state[0]
예제 #2
0
def _wait_for_state_change(node, target_state):
    """Wait for the power state change to get reflected."""
    state = [None]
    retries = [0]

    def _wait(state):

        state[0] = _get_power_state(node)

        # NOTE(rameshg87): For reboot operations, initially the state
        # will be same as the final state. So defer the check for one retry.
        if retries[0] != 0 and state[0] == target_state:
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.ilo.power_retry:
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()

        retries[0] += 1

    # Start a timer and wait for the operation to complete.
    timer = loopingcall.FixedIntervalLoopingCall(_wait, state)
    timer.start(interval=CONF.ilo.power_wait).wait()

    return state[0]
예제 #3
0
파일: snmp.py 프로젝트: erhan-ekici/ironic
    def _snmp_wait_for_state(self, goal_state):
        """Wait for the power state of the PDU outlet to change.

        :param goal_state: The power state to wait for, one of
            :class:`ironic.common.states`.
        :raises: SNMPFailure if an SNMP request fails.
        :returns: power state. One of :class:`ironic.common.states`.
        """

        def _poll_for_state(mutable):
            """Called at an interval until the node's power is consistent.

            :param mutable: dict object containing "state" and "next_time"
            :raises: SNMPFailure if an SNMP request fails.
            """
            mutable["state"] = self._snmp_power_state()
            if mutable["state"] == goal_state:
                raise loopingcall.LoopingCallDone()

            mutable["next_time"] += self.retry_interval
            if mutable["next_time"] >= CONF.snmp.power_timeout:
                mutable["state"] = states.ERROR
                raise loopingcall.LoopingCallDone()

        # Pass state to the looped function call in a mutable form.
        state = {"state": None, "next_time": 0}
        timer = loopingcall.FixedIntervalLoopingCall(_poll_for_state,
                                                     state)
        timer.start(interval=self.retry_interval).wait()
        LOG.debug("power state '%s'", state["state"])
        return state["state"]
예제 #4
0
 def add_timer(self,
               interval,
               callback,
               initial_delay=None,
               *args,
               **kwargs):
     pulse = loopingcall.FixedIntervalLoopingCall(callback, *args, **kwargs)
     pulse.start(interval=interval, initial_delay=initial_delay)
     self.timers.append(pulse)
예제 #5
0
def _ssh_execute(ssh, cmd, ssh_params):
    # NOTE(yuriyz): this ugly code is work-around against paramiko with
    # eventlet issues
    LOG.debug('Running cmd (SSH): %s', cmd)
    stdin_stream, stdout_stream, stderr_stream = ssh.exec_command(cmd)
    paramiko_channel = stdout_stream.channel
    paramiko_channel.setblocking(0)
    stdout_io = six.moves.StringIO()
    stderr_io = six.moves.StringIO()

    def _wait_execution(mutable, channel):
        try:
            stdout_data = channel.recv(1048576)
        except Exception:
            LOG.debug('No data from SSH stdout.')
        else:
            LOG.debug('Got %d from SSH stdout.', len(stdout_data))
            stdout_io.write(stdout_data)

        try:
            stderr_data = channel.recv_stderr(1048576)
        except Exception:
            LOG.debug('No data from SSH stderr.')
        else:
            LOG.debug('Got %d from SSH stderr.', len(stderr_data))
            stderr_io.write(stderr_data)

        if channel.exit_status_ready():
            raise loopingcall.LoopingCallDone()

        try:
            ssh = utils.ssh_connect(ssh_params)
        except exception.SSHConnectFailed:
            mutable['error'] = True
            raise loopingcall.LoopingCallDone()
        else:
            ssh.close()

    error = {'error': False}
    timer = loopingcall.FixedIntervalLoopingCall(_wait_execution, error,
                                                 paramiko_channel)
    timer.start(interval=60).wait()
    stdout = stdout_io.getvalue()
    stderr = stderr_io.getvalue()
    LOG.debug('SSH stdout is: "%s"', stdout)
    LOG.debug('SSH stderr is: "%s"', stderr)

    if error['error']:
        message = _('connection to the node lost')
        raise exception.SSHCommandFailed(cmd=message)

    exit_status = paramiko_channel.recv_exit_status()
    if exit_status != 0:
        message = _('wrong exit status %d') % exit_status
        raise exception.SSHCommandFailed(cmd=message)

    return stdout, stderr
예제 #6
0
    def commit(self):
        """Write to the disk."""
        LOG.debug("Committing partitions to disk.")
        cmd_args = ['mklabel', self._disk_label]
        # NOTE(lucasagomes): Lead in with 1MiB to allow room for the
        #                    partition table itself.
        start = 1
        for num, part in self.get_partitions():
            end = start + part['size']
            cmd_args.extend([
                'mkpart', part['type'], part['fs_type'],
                str(start),
                str(end)
            ])
            if part['bootable']:
                cmd_args.extend(['set', str(num), 'boot', 'on'])
            start = end

        self._exec(*cmd_args)

        retries = [0]
        pids = ['']
        fuser_err = ['']
        interval = CONF.disk_partitioner.check_device_interval
        max_retries = CONF.disk_partitioner.check_device_max_retries

        timer = loopingcall.FixedIntervalLoopingCall(
            self._wait_for_disk_to_become_available, retries, max_retries,
            pids, fuser_err)
        timer.start(interval=interval).wait()

        if retries[0] > max_retries:
            if pids[0]:
                raise exception.InstanceDeployFailure(
                    _('Disk partitioning failed on device %(device)s. '
                      'Processes with the following PIDs are holding it: '
                      '%(pids)s. Time out waiting for completion.') % {
                          'device': self._device,
                          'pids': pids[0]
                      })
            else:
                raise exception.InstanceDeployFailure(
                    _('Disk partitioning failed on device %(device)s. Fuser '
                      'exited with "%(fuser_err)s". Time out waiting for '
                      'completion.') % {
                          'device': self._device,
                          'fuser_err': fuser_err[0]
                      })
예제 #7
0
파일: xcat_pxe.py 프로젝트: zjqac/xcat-core
    def _wait_for_node_deploy(self, task):
        """Wait for xCAT node deployment to complete."""
        locals = {'errstr': ''}
        driver_info = _parse_deploy_info(task.node)
        node_mac_addrsses = driver_utils.get_node_mac_addresses(task)
        i_info = task.node.instance_info

        def _wait_for_deploy():
            out, err = xcat_util.exec_xcatcmd(driver_info, 'nodels',
                                              'nodelist.status')
            if err:
                locals['errstr'] = _(
                    "Error returned when quering node status"
                    " for node %s:%s") % (driver_info['xcat_node'], err)
                LOG.warning(locals['errstr'])
                raise loopingcall.LoopingCallDone()

            if out:
                node, status = out.split(": ")
                status = status.strip()
                if status == "booted":
                    LOG.info(
                        _("Deployment for node %s completed.") %
                        driver_info['xcat_node'])
                    raise loopingcall.LoopingCallDone()

            if (CONF.xcat.deploy_timeout and timeutils.utcnow() > expiration):
                locals['errstr'] = _(
                    "Timeout while waiting for"
                    " deployment of node %s.") % driver_info['xcat_node']
                LOG.warning(locals['errstr'])
                raise loopingcall.LoopingCallDone()

        expiration = timeutils.utcnow() + datetime.timedelta(
            seconds=CONF.xcat.deploy_timeout)
        timer = loopingcall.FixedIntervalLoopingCall(_wait_for_deploy)
        # default check every 10 seconds
        timer.start(interval=CONF.xcat.deploy_checking_interval).wait()

        if locals['errstr']:
            raise xcat_exception.xCATDeploymentFailure(locals['errstr'])
        # deploy end, delete the dhcp rule for xcat
        self._ssh_delete_dhcp_rule(CONF.xcat.network_node_ip,
                                   CONF.xcat.ssh_port, CONF.xcat.ssh_user,
                                   CONF.xcat.ssh_password,
                                   i_info['network_id'], node_mac_addrsses[0])
예제 #8
0
파일: pxe.py 프로젝트: schatt/ironic
    def activate_node(self, context, node, instance):
        """Wait for PXE deployment to complete."""

        locals = {'error': '', 'started': False}

        def _wait_for_deploy():
            """Called at an interval until the deployment completes."""
            try:
                row = db.bm_node_get(context, node['id'])
                if instance['uuid'] != row.get('instance_uuid'):
                    locals['error'] = _("Node associated with another instance"
                                        " while waiting for deploy of %s")
                    raise loopingcall.LoopingCallDone()

                status = row.get('task_state')
                if (status == states.DEPLOYING and locals['started'] is False):
                    LOG.info(
                        _("PXE deploy started for instance %s") %
                        instance['uuid'])
                    locals['started'] = True
                elif status in (states.DEPLOYDONE, states.ACTIVE):
                    LOG.info(
                        _("PXE deploy completed for instance %s") %
                        instance['uuid'])
                    raise loopingcall.LoopingCallDone()
                elif status == states.DEPLOYFAIL:
                    locals['error'] = _("PXE deploy failed for instance %s")
            except exception.NodeNotFound:
                locals['error'] = _("Baremetal node deleted while waiting "
                                    "for deployment of instance %s")

            if (CONF.pxe_deploy_timeout and timeutils.utcnow() > expiration):
                locals['error'] = _("Timeout reached while waiting for "
                                    "PXE deploy of instance %s")
            if locals['error']:
                raise loopingcall.LoopingCallDone()

        expiration = timeutils.utcnow() + datetime.timedelta(
            seconds=CONF.pxe_deploy_timeout)
        timer = loopingcall.FixedIntervalLoopingCall(_wait_for_deploy)
        timer.start(interval=1).wait()

        if locals['error']:
            raise exception.InstanceDeployFailure(locals['error'] %
                                                  instance['uuid'])
예제 #9
0
파일: power.py 프로젝트: johalee/Ironic
def _wait_for_state_change(target_state, ucs_power_handle):
    """Wait and check for the power state change."""
    state = [None]
    retries = [0]

    def _wait(state, retries):
        state[0] = ucs_power_handle.get_power_state()
        if ((retries[0] != 0)
                and (UCS_TO_IRONIC_POWER_STATE.get(state[0]) == target_state)):
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.cisco_ucs.max_retry:
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()

        retries[0] += 1

    timer = loopingcall.FixedIntervalLoopingCall(_wait, state, retries)
    timer.start(interval=CONF.cisco_ucs.action_interval).wait()
    return UCS_TO_IRONIC_POWER_STATE.get(state[0], states.ERROR)
예제 #10
0
파일: pxe.py 프로젝트: varunarya10/ironic
def _get_image(ctx, path, uuid, master_path=None, image_service=None):
    #TODO(ghe): Revise this logic and cdocument process Bug #1199665
    # When master_path defined, we save the images in this dir using the iamge
    # uuid as the file name. Deployments that use this images, creates a hard
    # link to keep track of this. When the link count of a master image is
    # equal to 1, can be deleted.
    #TODO(ghe): have hard links and count links the same behaviour in all fs

    #TODO(ghe): timeout and retry for downloads
    def _wait_for_download():
        if not os.path.exists(lock_file):
            raise loopingcall.LoopingCallDone()

    # If the download of the image needed is in progress (lock file present)
    # we wait until the locks disappears and create the link.

    if master_path is None:
        #NOTE(ghe): We don't share images between instances/hosts
        images.fetch_to_raw(ctx, uuid, path, image_service)

    else:
        master_uuid = os.path.join(master_path,
                                   service_utils.parse_image_ref(uuid)[0])
        lock_file = os.path.join(master_path, master_uuid + '.lock')
        _link_master_image(master_uuid, path)
        if not os.path.exists(path):
            fileutils.ensure_tree(master_path)
            if not _download_in_progress(lock_file):
                with fileutils.remove_path_on_error(lock_file):
                    #TODO(ghe): logging when image cannot be created
                    fd, tmp_path = tempfile.mkstemp(dir=master_path)
                    os.close(fd)
                    images.fetch_to_raw(ctx, uuid, tmp_path, image_service)
                    _create_master_image(tmp_path, master_uuid, path)
                _remove_download_in_progress_lock(lock_file)
            else:
                #TODO(ghe): expiration time
                timer = loopingcall.FixedIntervalLoopingCall(
                    _wait_for_download)
                timer.start(interval=1).wait()
                _link_master_image(master_uuid, path)
예제 #11
0
파일: ipmi.py 프로젝트: schatt/ironic
    def _power_off(self):
        """Turn the power to this node OFF."""
        def _wait_for_power_off():
            """Called at an interval until the node's power is off."""

            self._update_state()
            if self.state == states.POWER_OFF:
                raise loopingcall.LoopingCallDone()

            if self.retries > CONF.ipmi_power_retry:
                self.state = states.ERROR
                raise loopingcall.LoopingCallDone()
            try:
                self.retries += 1
                self._exec_ipmitool("power off")
            except Exception:
                LOG.exception(_("IPMI power off failed"))

        self.retries = 0
        timer = loopingcall.FixedIntervalLoopingCall(_wait_for_power_off)
        timer.start(interval=1).wait()
예제 #12
0
파일: seamicro.py 프로젝트: froyobin/ironic
def _reboot(node, timeout=None):
    """Reboot this node.

    :param node: Ironic node one of :class:`ironic.db.models.Node`
    :param timeout: Time in seconds to wait till reboot is compelete
    :raises: InvalidParameterValue if a seamicro parameter is invalid.
    :raises: MissingParameterValue if required seamicro parameters are
        missing.
    :returns: Power state of the given node
    """
    if timeout is None:
        timeout = CONF.seamicro.action_timeout
    state = [None]
    retries = [0]
    seamicro_info = _parse_driver_info(node)
    server = _get_server(seamicro_info)

    def _wait_for_reboot(state, retries):
        """Called at an interval until the node is rebooted successfully."""

        state[0] = _get_power_status(node)
        if state[0] == states.POWER_ON:
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.seamicro.max_retry:
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()

        try:
            retries[0] += 1
            server.reset()
        except seamicro_client_exception.ClientException:
            LOG.warning(_LW("Reboot failed for node %s."),
                        node.uuid)

    timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot,
                                                 state, retries)
    server.reset()
    timer.start(interval=timeout).wait()
    return state[0]
예제 #13
0
def _power_off(driver_info):
    """Turn the power OFF for this node.

    :param driver_info: the ipmitool parameters for accessing a node.
    :returns: one of ironic.common.states POWER_OFF or ERROR.
    :raises: IPMIFailure on an error from ipmitool (from _power_status call).

    """
    # use mutable objects so the looped method can change them
    state = [None]
    retries = [0]

    def _wait_for_power_off(state, retries):
        """Called at an interval until the node's power is off."""

        state[0] = _power_status(driver_info)
        if state[0] == states.POWER_OFF:
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.ipmi.retry_timeout:
            LOG.error(_('IPMI power off timed out after %(tries)s retries.'),
                      {'tries': retries[0]})
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()
        try:
            # only issue "power off" once
            if retries[0] == 0:
                _exec_ipmitool(driver_info, "power off")
            retries[0] += 1
        except Exception:
            # Log failures but keep trying
            LOG.warning(
                _("IPMI power off failed for node %s.") % driver_info['uuid'])

    timer = loopingcall.FixedIntervalLoopingCall(_wait_for_power_off,
                                                 state=state,
                                                 retries=retries)
    timer.start(interval=1.0).wait()
    return state[0]
예제 #14
0
파일: seamicro.py 프로젝트: froyobin/ironic
def _power_on(node, timeout=None):
    """Power ON this node

    :param node: An Ironic node object.
    :param timeout: Time in seconds to wait till power on is complete.
    :raises: InvalidParameterValue if a seamicro parameter is invalid.
    :raises: MissingParameterValue if required seamicro parameters are
        missing.
    :returns: Power state of the given node.
    """
    if timeout is None:
        timeout = CONF.seamicro.action_timeout
    state = [None]
    retries = [0]
    seamicro_info = _parse_driver_info(node)
    server = _get_server(seamicro_info)

    def _wait_for_power_on(state, retries):
        """Called at an interval until the node is powered on."""

        state[0] = _get_power_status(node)
        if state[0] == states.POWER_ON:
            raise loopingcall.LoopingCallDone()

        if retries[0] > CONF.seamicro.max_retry:
            state[0] = states.ERROR
            raise loopingcall.LoopingCallDone()
        try:
            retries[0] += 1
            server.power_on()
        except seamicro_client_exception.ClientException:
            LOG.warning(_LW("Power-on failed for node %s."),
                        node.uuid)

    timer = loopingcall.FixedIntervalLoopingCall(_wait_for_power_on,
                                                 state, retries)
    timer.start(interval=timeout).wait()
    return state[0]
예제 #15
0
def start_shellinabox_console(node_uuid, port, console_cmd):
    """Open the serial console for a node.

    :param node_uuid: the uuid for the node.
    :param port: the terminal port for the node.
    :param console_cmd: the shell command that gets the console.
    :raises: ConsoleError if the directory for the PID file cannot be created.
    :raises: ConsoleSubprocessFailed when invoking the subprocess failed.
    """

    # make sure that the old console for this node is stopped
    # and the files are cleared
    try:
        _stop_console(node_uuid)
    except exception.NoConsolePid:
        pass
    except processutils.ProcessExecutionError as exc:
        LOG.warning(_LW("Failed to kill the old console process "
                "before starting a new shellinabox console "
                "for node %(node)s. Reason: %(err)s"),
                {'node': node_uuid, 'err': exc})

    _ensure_console_pid_dir_exists()
    pid_file = _get_console_pid_file(node_uuid)

    # put together the command and arguments for invoking the console
    args = []
    args.append(CONF.console.terminal)
    if CONF.console.terminal_cert_dir:
        args.append("-c")
        args.append(CONF.console.terminal_cert_dir)
    else:
        args.append("-t")
    args.append("-p")
    args.append(str(port))
    args.append("--background=%s" % pid_file)
    args.append("-s")
    args.append(console_cmd)

    # run the command as a subprocess
    try:
        LOG.debug('Running subprocess: %s', ' '.join(args))
        # use pipe here to catch the error in case shellinaboxd
        # failed to start.
        obj = subprocess.Popen(args,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    except (OSError, ValueError) as e:
        error = _("%(exec_error)s\n"
                  "Command: %(command)s") % {'exec_error': str(e),
                                             'command': ' '.join(args)}
        LOG.warning(error)
        raise exception.ConsoleSubprocessFailed(error=error)

    def _wait(node_uuid, popen_obj):
        locals['returncode'] = popen_obj.poll()

        # check if the console pid is created.
        # if it is, then the shellinaboxd is invoked successfully as a daemon.
        # otherwise check the error.
        if locals['returncode'] is not None:
            if locals['returncode'] == 0 and os.path.exists(pid_file):
                raise loopingcall.LoopingCallDone()
            else:
                (stdout, stderr) = popen_obj.communicate()
                locals['errstr'] = _("Command: %(command)s.\n"
                        "Exit code: %(return_code)s.\n"
                        "Stdout: %(stdout)r\n"
                        "Stderr: %(stderr)r") % {'command': ' '.join(args),
                                'return_code': locals['returncode'],
                                'stdout': stdout,
                                'stderr': stderr}
                LOG.warning(locals['errstr'])
                raise loopingcall.LoopingCallDone()

        if (time.time() > expiration):
            locals['errstr'] = _("Timeout while waiting for console"
                    " subprocess to start for node %s.") % node_uuid
            LOG.warning(locals['errstr'])
            raise loopingcall.LoopingCallDone()

    locals = {'returncode': None, 'errstr': ''}
    expiration = time.time() + CONF.console.subprocess_timeout
    timer = loopingcall.FixedIntervalLoopingCall(_wait, node_uuid, obj)
    timer.start(interval=CONF.console.subprocess_checking_interval).wait()

    if locals['errstr']:
        raise exception.ConsoleSubprocessFailed(error=locals['errstr'])
예제 #16
0
def _set_and_wait(task, target_state):
    """Helper function for DynamicLoopingCall.

    This method changes the power state and polls AMT until the desired
    power state is reached.

    :param task: a TaskManager instance contains the target node.
    :param target_state: desired power state.
    :returns: one of ironic.common.states.
    :raises: PowerStateFailure if cannot set the node to target_state.
    :raises: AMTFailure.
    :raises: AMTConnectFailure
    :raises: InvalidParameterValue
    """
    node = task.node
    driver = task.driver
    if target_state not in (states.POWER_ON, states.POWER_OFF):
        raise exception.InvalidParameterValue(
            _('Unsupported target_state: %s') % target_state)
    elif target_state == states.POWER_ON:
        boot_device = node.driver_internal_info.get('amt_boot_device')
        if boot_device and boot_device != amt_common.DEFAULT_BOOT_DEVICE:
            driver.management.ensure_next_boot_device(node, boot_device)

    def _wait(status):
        status['power'] = _power_status(node)
        if status['power'] == target_state:
            raise loopingcall.LoopingCallDone()

        if status['iter'] >= CONF.amt.max_attempts:
            status['power'] = states.ERROR
            LOG.warning(
                _LW("AMT failed to set power state %(state)s after "
                    "%(tries)s retries on node %(node_id)s."), {
                        'state': target_state,
                        'tries': status['iter'],
                        'node_id': node.uuid
                    })
            raise loopingcall.LoopingCallDone()

        try:
            _set_power_state(node, target_state)
        except Exception:
            # Log failures but keep trying
            LOG.warning(
                _LW("AMT set power state %(state)s for node %(node)s "
                    "- Attempt %(attempt)s times of %(max_attempt)s "
                    "failed."), {
                        'state': target_state,
                        'node': node.uuid,
                        'attempt': status['iter'] + 1,
                        'max_attempt': CONF.amt.max_attempts
                    })
        status['iter'] += 1

    status = {'power': None, 'iter': 0}

    timer = loopingcall.FixedIntervalLoopingCall(_wait, status)
    timer.start(interval=CONF.amt.action_wait).wait()

    if status['power'] != target_state:
        raise exception.PowerStateFailure(pstate=target_state)

    return status['power']