예제 #1
0
 def _poll_and_check(self, osc, bay):
     poller = HeatPoller(osc, bay)
     lc = loopingcall.FixedIntervalLoopingCall(f=poller.poll_and_check)
     lc.start(cfg.CONF.bay_heat.wait_interval, True)
예제 #2
0
    def test_repeat(self):
        self.useFixture(fixture.SleepFixture())
        self.num_runs = 2

        timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_zero)
        self.assertFalse(timer.start(interval=0.5).wait())
예제 #3
0
def start_shellinabox_console(node_uuid, port, console_cmd):
    """Open the serial console for a node.

    :param node_uuid: the uuid for the node.
    :param port: the terminal port for the node.
    :param console_cmd: the shell command that gets the console.
    :raises: ConsoleError if the directory for the PID file cannot be created.
    :raises: ConsoleSubprocessFailed when invoking the subprocess failed.
    """

    # make sure that the old console for this node is stopped
    # and the files are cleared
    try:
        _stop_console(node_uuid)
    except exception.NoConsolePid:
        pass
    except processutils.ProcessExecutionError as exc:
        LOG.warning(
            _LW("Failed to kill the old console process "
                "before starting a new shellinabox console "
                "for node %(node)s. Reason: %(err)s"), {
                    'node': node_uuid,
                    'err': exc
                })

    _ensure_console_pid_dir_exists()
    pid_file = _get_console_pid_file(node_uuid)

    # put together the command and arguments for invoking the console
    args = []
    args.append(CONF.console.terminal)
    if CONF.console.terminal_cert_dir:
        args.append("-c")
        args.append(CONF.console.terminal_cert_dir)
    else:
        args.append("-t")
    args.append("-p")
    args.append(str(port))
    args.append("--background=%s" % pid_file)
    args.append("-s")
    args.append(console_cmd)

    # run the command as a subprocess
    try:
        LOG.debug('Running subprocess: %s', ' '.join(args))
        # use pipe here to catch the error in case shellinaboxd
        # failed to start.
        obj = subprocess.Popen(args,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    except (OSError, ValueError) as e:
        error = _("%(exec_error)s\n"
                  "Command: %(command)s") % {
                      'exec_error': str(e),
                      'command': ' '.join(args)
                  }
        LOG.warning(error)
        raise exception.ConsoleSubprocessFailed(error=error)

    def _wait(node_uuid, popen_obj):
        locals['returncode'] = popen_obj.poll()

        # check if the console pid is created.
        # if it is, then the shellinaboxd is invoked successfully as a daemon.
        # otherwise check the error.
        if locals['returncode'] is not None:
            if locals['returncode'] == 0 and os.path.exists(pid_file):
                raise loopingcall.LoopingCallDone()
            else:
                (stdout, stderr) = popen_obj.communicate()
                locals['errstr'] = _("Command: %(command)s.\n"
                                     "Exit code: %(return_code)s.\n"
                                     "Stdout: %(stdout)r\n"
                                     "Stderr: %(stderr)r") % {
                                         'command': ' '.join(args),
                                         'return_code': locals['returncode'],
                                         'stdout': stdout,
                                         'stderr': stderr
                                     }
                LOG.warning(locals['errstr'])
                raise loopingcall.LoopingCallDone()

        if (time.time() > expiration):
            locals['errstr'] = _("Timeout while waiting for console subprocess"
                                 "to start for node %s.") % node_uuid
            LOG.warning(locals['errstr'])
            raise loopingcall.LoopingCallDone()

    locals = {'returncode': None, 'errstr': ''}
    expiration = time.time() + CONF.console.subprocess_timeout
    timer = loopingcall.FixedIntervalLoopingCall(_wait, node_uuid, obj)
    timer.start(interval=CONF.console.subprocess_checking_interval).wait()

    if locals['errstr']:
        raise exception.ConsoleSubprocessFailed(error=locals['errstr'])
예제 #4
0
 def add_task(self, call_function, interval, initial_delay=0):
     looping_call = loopingcall.FixedIntervalLoopingCall(call_function)
     task = LoopingTask(looping_call, interval, initial_delay)
     self.tasks.append(task)
예제 #5
0
    def backup(self, backup, volume_file, backup_metadata=True):
        """Backup the given volume.

           If backup['parent_id'] is given, then an incremental backup
           is performed.
        """
        if self.chunk_size_bytes % self.sha_block_size_bytes:
            err = _('Chunk size is not multiple of '
                    'block size for creating hash.')
            raise exception.InvalidBackup(reason=err)

        # Read the shafile of the parent backup if backup['parent_id']
        # is given.
        parent_backup_shafile = None
        parent_backup = None
        if backup.parent_id:
            parent_backup = objects.Backup.get_by_id(self.context,
                                                     backup.parent_id)
            parent_backup_shafile = self._read_sha256file(parent_backup)
            parent_backup_shalist = parent_backup_shafile['sha256s']
            if (parent_backup_shafile['chunk_size'] !=
                    self.sha_block_size_bytes):
                err = (_('Hash block size has changed since the last '
                         'backup. New hash block size: %(new)s. Old hash '
                         'block size: %(old)s. Do a full backup.')
                       % {'old': parent_backup_shafile['chunk_size'],
                          'new': self.sha_block_size_bytes})
                raise exception.InvalidBackup(reason=err)
            # If the volume size increased since the last backup, fail
            # the incremental backup and ask user to do a full backup.
            if backup.size > parent_backup.size:
                err = _('Volume size increased since the last '
                        'backup. Do a full backup.')
                raise exception.InvalidBackup(reason=err)

        if sys.platform == 'win32':
            # When dealing with Windows physical disks, we need the exact
            # size of the disk. Attempting to read passed this boundary will
            # lead to an IOError exception. At the same time, we cannot
            # seek to the end of file.
            win32_disk_size = self._get_win32_phys_disk_size(volume_file.name)

        (object_meta, object_sha256, extra_metadata, container,
         volume_size_bytes) = self._prepare_backup(backup)

        counter = 0
        total_block_sent_num = 0

        # There are two mechanisms to send the progress notification.
        # 1. The notifications are periodically sent in a certain interval.
        # 2. The notifications are sent after a certain number of chunks.
        # Both of them are working simultaneously during the volume backup,
        # when "chunked" backup drivers are deployed.
        def _notify_progress():
            self._send_progress_notification(self.context, backup,
                                             object_meta,
                                             total_block_sent_num,
                                             volume_size_bytes)
        timer = loopingcall.FixedIntervalLoopingCall(
            _notify_progress)
        if self.enable_progress_timer:
            timer.start(interval=self.backup_timer_interval)

        sha256_list = object_sha256['sha256s']
        shaindex = 0
        is_backup_canceled = False
        while True:
            # First of all, we check the status of this backup. If it
            # has been changed to delete or has been deleted, we cancel the
            # backup process to do forcing delete.
            with backup.as_read_deleted():
                backup.refresh()
            if backup.status in (fields.BackupStatus.DELETING,
                                 fields.BackupStatus.DELETED):
                is_backup_canceled = True
                # To avoid the chunk left when deletion complete, need to
                # clean up the object of chunk again.
                self.delete_backup(backup)
                LOG.debug('Cancel the backup process of %s.', backup.id)
                break
            data_offset = volume_file.tell()

            if sys.platform == 'win32':
                read_bytes = min(self.chunk_size_bytes,
                                 win32_disk_size - data_offset)
            else:
                read_bytes = self.chunk_size_bytes
            data = volume_file.read(read_bytes)

            if data == b'':
                break

            # Calculate new shas with the datablock.
            shalist = eventlet.tpool.execute(self._calculate_sha, data)
            sha256_list.extend(shalist)

            # If parent_backup is not None, that means an incremental
            # backup will be performed.
            if parent_backup:
                # Find the extent that needs to be backed up.
                extent_off = -1
                for idx, sha in enumerate(shalist):
                    if sha != parent_backup_shalist[shaindex]:
                        if extent_off == -1:
                            # Start of new extent.
                            extent_off = idx * self.sha_block_size_bytes
                    else:
                        if extent_off != -1:
                            # We've reached the end of extent.
                            extent_end = idx * self.sha_block_size_bytes
                            segment = data[extent_off:extent_end]
                            self._backup_chunk(backup, container, segment,
                                               data_offset + extent_off,
                                               object_meta,
                                               extra_metadata)
                            extent_off = -1
                    shaindex += 1

                # The last extent extends to the end of data buffer.
                if extent_off != -1:
                    extent_end = len(data)
                    segment = data[extent_off:extent_end]
                    self._backup_chunk(backup, container, segment,
                                       data_offset + extent_off,
                                       object_meta, extra_metadata)
                    extent_off = -1
            else:  # Do a full backup.
                self._backup_chunk(backup, container, data, data_offset,
                                   object_meta, extra_metadata)

            # Notifications
            total_block_sent_num += self.data_block_num
            counter += 1
            if counter == self.data_block_num:
                # Send the notification to Ceilometer when the chunk
                # number reaches the data_block_num.  The backup percentage
                # is put in the metadata as the extra information.
                self._send_progress_notification(self.context, backup,
                                                 object_meta,
                                                 total_block_sent_num,
                                                 volume_size_bytes)
                # Reset the counter
                counter = 0

        # Stop the timer.
        timer.stop()
        # If backup has been cancelled we have nothing more to do
        # but timer.stop().
        if is_backup_canceled:
            return
        # All the data have been sent, the backup_percent reaches 100.
        self._send_progress_end(self.context, backup, object_meta)

        object_sha256['sha256s'] = sha256_list
        if backup_metadata:
            try:
                self._backup_metadata(backup, object_meta)
            # Whatever goes wrong, we want to log, cleanup, and re-raise.
            except Exception:
                with excutils.save_and_reraise_exception():
                    LOG.exception("Backup volume metadata failed.")
                    self.delete_backup(backup)

        self._finalize_backup(backup, container, object_meta, object_sha256)
예제 #6
0
    def _unlink_volume(self,
                       array,
                       source_device_id,
                       target_device_id,
                       snap_name,
                       extra_specs,
                       snap_id=None,
                       list_volume_pairs=None,
                       loop=True):
        """Unlink a target volume from its source volume.

        :param array: the array serial number
        :param source_device_id: the source device id
        :param target_device_id: the target device id
        :param snap_name: the snap name
        :param extra_specs: extra specifications
        :param snap_id: the unique snap id of the SnapVX
        :param list_volume_pairs: list of volume pairs, optional
        :param loop: if looping call is required for handling retries
        :returns: return code
        """
        def _unlink_vol():
            """Called at an interval until the synchronization is finished.

            :raises: loopingcall.LoopingCallDone
            """
            retries = kwargs['retries']
            try:
                kwargs['retries'] = retries + 1
                if not kwargs['modify_vol_success']:
                    self.rest.modify_volume_snap(
                        array,
                        source_device_id,
                        target_device_id,
                        snap_name,
                        extra_specs,
                        snap_id=snap_id,
                        unlink=True,
                        list_volume_pairs=list_volume_pairs)
                    kwargs['modify_vol_success'] = True
            except exception.VolumeBackendAPIException:
                pass

            if kwargs['retries'] > UNLINK_RETRIES:
                LOG.error("_unlink_volume failed after %(retries)d "
                          "tries.", {'retries': retries})
                raise loopingcall.LoopingCallDone(retvalue=30)
            if kwargs['modify_vol_success']:
                raise loopingcall.LoopingCallDone()

        if not loop:
            self.rest.modify_volume_snap(array,
                                         source_device_id,
                                         target_device_id,
                                         snap_name,
                                         extra_specs,
                                         snap_id=snap_id,
                                         unlink=True,
                                         list_volume_pairs=list_volume_pairs)
        else:
            kwargs = {'retries': 0, 'modify_vol_success': False}
            timer = loopingcall.FixedIntervalLoopingCall(_unlink_vol)
            rc = timer.start(interval=UNLINK_INTERVAL).wait()
            return rc
예제 #7
0
 def _wait_vdisk_copy_completed(self, vdisk_name):
     timer = loopingcall.FixedIntervalLoopingCall(
         self._is_vdisk_copy_in_progress, vdisk_name)
     timer.start(interval=self._check_lock_interval).wait()
예제 #8
0
 def _init_periodic_resync(self):
     self.resync_thread = loopingcall.FixedIntervalLoopingCall(
         self._periodic_resync)
     self.resync_thread.start(interval=self.RESYNC_TRY_INTERVAL)
예제 #9
0
    def create_cloned_volume(self, volume, src_vref):
        """Create a clone of the specified volume."""
        LOG.debug("Creating clone of volume: %s.", src_vref['id'])
        name = self.configuration.volume_name_prefix, volume['id']
        vol_name = ''.join(name)
        vol_size = volume['size'] * units.Ki
        src_vol_id = src_vref['provider_location']
        LOG.debug(
            "Clone volume : "
            "[name] %(name)s - [source] %(source)s - [size] %(size)s.", {
                'name': vol_name,
                'source': src_vol_id,
                'size': six.text_type(vol_size)
            })
        reply = self.client.service.volumeClone(src_vol_id, vol_name)
        status = reply['status']
        result = reply['result']
        LOG.debug("Clone volume : [status] %(stat)s - [result] %(res)s.", {
            'stat': six.text_type(status),
            'res': result
        })

        if status != 0:
            msg = (_("Error while creating volume "
                     "[status] %(stat)s - [result] %(res)s.") % {
                         'stat': six.text_type(status),
                         'res': result
                     })
            LOG.error(msg)
            raise exception.VolumeBackendAPIException(data=msg)

        # Monitor the status until it becomes
        # either success, fail or timeout
        params = {'clone_id': int(result), 'vol_name': vol_name}
        start_time = int(time.time())

        timer = loopingcall.FixedIntervalLoopingCall(
            self._retry_get_detail, start_time,
            self.configuration.clone_check_timeout, 'clone_detail', params)
        reply = timer.start(interval=self.configuration.retry_interval).wait()

        reply = self.client.service.volumeDetailByName(vol_name)
        status = reply['status']
        new_vol_id = reply['volumeInfoResult']['volumeId']

        if status != 0:
            msg = (_("Error[%(stat)s - %(res)s] "
                     "while getting volume id."), {
                         'stat': six.text_type(status),
                         'res': result
                     })
            LOG.error(msg)
            raise exception.VolumeBackendAPIException(data=msg)

        LOG.debug(
            "clone done : "
            "[status] %(stat)s - [volume id] %(vol_id)s.", {
                'stat': status,
                'vol_id': six.text_type(new_vol_id)
            })
        return {'provider_location': new_vol_id}
예제 #10
0
def _set_and_wait(task, target_state):
    """Helper function for DynamicLoopingCall.

    This method changes the power state and polls AMT until the desired
    power state is reached.

    :param task: a TaskManager instance contains the target node.
    :param target_state: desired power state.
    :returns: one of ironic.common.states.
    :raises: PowerStateFailure if cannot set the node to target_state.
    :raises: AMTFailure.
    :raises: AMTConnectFailure
    :raises: InvalidParameterValue
    """
    node = task.node
    driver = task.driver
    if target_state not in (states.POWER_ON, states.POWER_OFF):
        raise exception.InvalidParameterValue(
            _('Unsupported target_state: %s') % target_state)
    elif target_state == states.POWER_ON:
        boot_device = node.driver_internal_info.get('amt_boot_device')
        if boot_device and boot_device != amt_common.DEFAULT_BOOT_DEVICE:
            driver.management.ensure_next_boot_device(node, boot_device)

    def _wait(status):
        status['power'] = _power_status(node)
        if status['power'] == target_state:
            raise loopingcall.LoopingCallDone()

        if status['iter'] >= CONF.amt.max_attempts:
            status['power'] = states.ERROR
            LOG.warning(
                _LW("AMT failed to set power state %(state)s after "
                    "%(tries)s retries on node %(node_id)s."), {
                        'state': target_state,
                        'tries': status['iter'],
                        'node_id': node.uuid
                    })
            raise loopingcall.LoopingCallDone()

        try:
            _set_power_state(node, target_state)
        except Exception:
            # Log failures but keep trying
            LOG.warning(
                _LW("AMT set power state %(state)s for node %(node)s "
                    "- Attempt %(attempt)s times of %(max_attempt)s "
                    "failed."), {
                        'state': target_state,
                        'node': node.uuid,
                        'attempt': status['iter'] + 1,
                        'max_attempt': CONF.amt.max_attempts
                    })
        status['iter'] += 1

    status = {'power': None, 'iter': 0}

    timer = loopingcall.FixedIntervalLoopingCall(_wait, status)
    timer.start(interval=CONF.amt.action_wait).wait()

    if status['power'] != target_state:
        raise exception.PowerStateFailure(pstate=target_state)

    return status['power']
예제 #11
0
    def create_volume_from_snapshot(self, volume, snapshot):
        """Create a volume from a snapshot."""
        name = self.configuration.volume_name_prefix, volume['id']
        snap_id = snapshot['provider_location']
        vol_name = ''.join(name)
        # Trigger an asynchronous restore operation
        LOG.debug(
            "[start] Create volume from snapshot : "
            "%(snap_id)s - name : %(vol_name)s.", {
                'snap_id': snap_id,
                'vol_name': vol_name
            })
        reply = self.client.service.restoreFromSnapshot(snap_id, vol_name)
        status = reply['status']
        result = reply['result']
        LOG.debug(
            "Restore  volume from snapshot "
            "[status] %(stat)s - [result] %(res)s.", {
                'stat': six.text_type(status),
                'res': result
            })

        if status != 0:
            msg = (_("Error[%(stat)s - %(res)s] while restoring snapshot "
                     "[%(snap_id)s] into volume [%(vol)s].") % {
                         'stat': six.text_type(status),
                         'res': result,
                         'snap_id': snap_id,
                         'vol': vol_name
                     })
            LOG.error(msg)
            raise exception.VolumeBackendAPIException(data=msg)

        # Monitor the status until it becomes
        # either success, fail or timeout
        params = {'restore_id': int(result)}
        start_time = int(time.time())

        timer = loopingcall.FixedIntervalLoopingCall(
            self._retry_get_detail, start_time,
            self.configuration.restore_check_timeout, 'restore_detail', params)
        reply = timer.start(interval=self.configuration.retry_interval).wait()

        reply = self.client.service.volumeDetailByName(vol_name)
        status = reply['status']
        new_vol_id = reply['volumeInfoResult']['volumeId']

        if status != 0:
            msg = (_("Error[status] %(stat)s - [result] %(res)s] "
                     "while getting volume id.") % {
                         'stat': six.text_type(status),
                         'res': result
                     })
            LOG.error(msg)
            raise exception.VolumeBackendAPIException(data=msg)
        LOG.debug(
            "Restore done [status] %(stat)s - "
            "[volume id] %(vol_id)s.", {
                'stat': status,
                'vol_id': six.text_type(new_vol_id)
            })
        return {'provider_location': new_vol_id}
예제 #12
0
 def __init__(self):
     self.timer = loopingcall.FixedIntervalLoopingCall(self.execute_ops)
     self.maintenance_interval = cfg.CONF.ml2_odl.maintenance_interval
     self.maintenance_ops = []
예제 #13
0
 def _start_periodic_tasks(self):
     self.loop = loopingcall.FixedIntervalLoopingCall(self.process_services)
     self.loop.start(interval=self.conf.cfg_agent.rpc_loop_interval)
예제 #14
0
    def test_return_false(self):
        def _raise_it():
            raise loopingcall.LoopingCallDone(False)

        timer = loopingcall.FixedIntervalLoopingCall(_raise_it)
        self.assertFalse(timer.start(interval=0.5).wait())
예제 #15
0
    def execute(self, instance_uuid):
        """Stop the instance for recovery."""
        instance = self.novaclient.get_server(self.context, instance_uuid)

        ha_enabled_key = CONF.instance_failure.ha_enabled_instance_metadata_key

        # If an instance is not HA_Enabled and "process_all_instances" config
        # option is also disabled, then there is no need to take any recovery
        # action.
        if not CONF.instance_failure.process_all_instances and not (
                strutils.bool_from_string(
                    instance.metadata.get(ha_enabled_key, False))):
            msg = ("Skipping recovery for instance: %(instance_uuid)s as it is"
                   " not Ha_Enabled") % {
                       'instance_uuid': instance_uuid
                   }
            LOG.info(msg)
            self.update_details(msg, 1.0)
            raise exception.SkipInstanceRecoveryException()

        vm_state = getattr(instance, 'OS-EXT-STS:vm_state')
        if vm_state in ['paused', 'rescued']:
            msg = ("Recovery of instance '%(instance_uuid)s' is ignored as it "
                   "is in '%(vm_state)s' state.") % {
                       'instance_uuid': instance_uuid,
                       'vm_state': vm_state
                   }
            LOG.warning(msg)
            self.update_details(msg, 1.0)
            raise exception.IgnoreInstanceRecoveryException(msg)

        if vm_state != 'stopped':
            if vm_state == 'resized':
                self.novaclient.reset_instance_state(self.context, instance.id,
                                                     'active')

            msg = "Stopping instance: %s" % instance_uuid
            self.update_details(msg)

            self.novaclient.stop_server(self.context, instance.id)

        def _wait_for_power_off():
            new_instance = self.novaclient.get_server(self.context,
                                                      instance_uuid)
            vm_state = getattr(new_instance, 'OS-EXT-STS:vm_state')
            if vm_state == 'stopped':
                raise loopingcall.LoopingCallDone()

        periodic_call = loopingcall.FixedIntervalLoopingCall(
            _wait_for_power_off)

        try:
            # add a timeout to the periodic call.
            periodic_call.start(interval=CONF.verify_interval)
            etimeout.with_timeout(CONF.wait_period_after_power_off,
                                  periodic_call.wait)
            msg = "Stopped instance: '%s'" % instance_uuid
            self.update_details(msg, 1.0)
        except etimeout.Timeout:
            msg = "Failed to stop instance %(instance)s" % {
                'instance': instance.id
            }
            self.update_details(msg, 1.0)
            raise exception.InstanceRecoveryFailureException(message=msg)
        finally:
            # stop the periodic call, in case of exceptions or Timeout.
            periodic_call.stop()
예제 #16
0
    def test_terminate_on_exception(self):
        def _raise_it():
            raise RuntimeError()

        timer = loopingcall.FixedIntervalLoopingCall(_raise_it)
        self.assertRaises(RuntimeError, timer.start(interval=0.5).wait)
예제 #17
0
 def _setup_backlog_handling(self):
     LOG.debug('Activating periodic backlog processor')
     self._heartbeat = loopingcall.FixedIntervalLoopingCall(
         self._process_backlogged_routers)
     self._heartbeat.start(
         interval=cfg.CONF.general.backlog_processing_interval)
예제 #18
0
 def _setup_backlog_handling(self):
     self._heartbeat = loopingcall.FixedIntervalLoopingCall(
         self._process_backlogged_routers)
     self._heartbeat.start(
         interval=cfg.CONF.general.backlog_processing_interval)
예제 #19
0
파일: service.py 프로젝트: ikhere/cinder
    def start(self):
        version_string = version.version_string()
        LOG.info('Starting %(topic)s node (version %(version_string)s)', {
            'topic': self.topic,
            'version_string': version_string
        })
        self.model_disconnected = False

        if self.coordination:
            coordination.COORDINATOR.start()

        self.manager.init_host(added_to_cluster=self.added_to_cluster,
                               service_id=Service.service_id)

        LOG.debug("Creating RPC server for service %s", self.topic)

        ctxt = context.get_admin_context()
        endpoints = [self.manager]
        endpoints.extend(self.manager.additional_endpoints)
        obj_version_cap = objects.Service.get_minimum_obj_version(ctxt)
        LOG.debug("Pinning object versions for RPC server serializer to %s",
                  obj_version_cap)
        serializer = objects_base.CinderObjectSerializer(obj_version_cap)

        target = messaging.Target(topic=self.topic, server=self.host)
        self.rpcserver = rpc.get_server(target, endpoints, serializer)
        self.rpcserver.start()

        # NOTE(dulek): Kids, don't do that at home. We're relying here on
        # oslo.messaging implementation details to keep backward compatibility
        # with pre-Ocata services. This will not matter once we drop
        # compatibility with them.
        if self.topic == constants.VOLUME_TOPIC:
            target = messaging.Target(topic='%(topic)s.%(host)s' % {
                'topic': self.topic,
                'host': self.host
            },
                                      server=vol_utils.extract_host(
                                          self.host, 'host'))
            self.backend_rpcserver = rpc.get_server(target, endpoints,
                                                    serializer)
            self.backend_rpcserver.start()

        if self.cluster:
            LOG.info(
                'Starting %(topic)s cluster %(cluster)s (version '
                '%(version)s)', {
                    'topic': self.topic,
                    'version': version_string,
                    'cluster': self.cluster
                })
            target = messaging.Target(
                topic='%s.%s' % (self.topic, self.cluster),
                server=vol_utils.extract_host(self.cluster, 'host'))
            serializer = objects_base.CinderObjectSerializer(obj_version_cap)
            self.cluster_rpcserver = rpc.get_server(target, endpoints,
                                                    serializer)
            self.cluster_rpcserver.start()

        self.manager.init_host_with_rpc()

        if self.report_interval:
            pulse = loopingcall.FixedIntervalLoopingCall(self.report_state)
            pulse.start(interval=self.report_interval,
                        initial_delay=self.report_interval)
            self.timers.append(pulse)

        if self.periodic_interval:
            if self.periodic_fuzzy_delay:
                initial_delay = random.randint(0, self.periodic_fuzzy_delay)
            else:
                initial_delay = None

            periodic = loopingcall.FixedIntervalLoopingCall(
                self.periodic_tasks)
            periodic.start(interval=self.periodic_interval,
                           initial_delay=initial_delay)
            self.timers.append(periodic)
예제 #20
0
 def execute(self, stack_id, heat_client):
     LOG.info(_LI('Syncing Heat stack status, stack_id: %s'), stack_id)
     sync_status_loop = loopingcall.FixedIntervalLoopingCall(
         self._sync_status, heat_client, stack_id)
     sync_status_loop.start(interval=CONF.sync_status_interval)
     sync_status_loop.wait()
예제 #21
0
    def rebuild(self, context, instance, image_meta, injected_files,
                admin_password, bdms, detach_block_devices,
                attach_block_devices, network_info=None,
                recreate=False, block_device_info=None,
                preserve_ephemeral=False):
        """Rebuild/redeploy an instance.

        This version of rebuild() allows for supporting the option to
        preserve the ephemeral partition. We cannot call spawn() from
        here because it will attempt to set the instance_uuid value
        again, which is not allowed by the Ironic API. It also requires
        the instance to not have an 'active' provision state, but we
        cannot safely change that. Given that, we implement only the
        portions of spawn() we need within rebuild().

        :param context: The security context.
        :param instance: The instance object.
        :param image_meta: Image object returned by nova.image.glance
            that defines the image from which to boot this instance. Ignored
            by this driver.
        :param injected_files: User files to inject into instance. Ignored
            by this driver.
        :param admin_password: Administrator password to set in
            instance. Ignored by this driver.
        :param bdms: block-device-mappings to use for rebuild. Ignored
            by this driver.
        :param detach_block_devices: function to detach block devices. See
            nova.compute.manager.ComputeManager:_rebuild_default_impl for
            usage. Ignored by this driver.
        :param attach_block_devices: function to attach block devices. See
            nova.compute.manager.ComputeManager:_rebuild_default_impl for
            usage. Ignored by this driver.
        :param network_info: Instance network information. Ignored by
            this driver.
        :param recreate: Boolean value; if True the instance is
            recreated on a new hypervisor - all the cleanup of old state is
            skipped. Ignored by this driver.
        :param block_device_info: Instance block device
            information. Ignored by this driver.
        :param preserve_ephemeral: Boolean value; if True the ephemeral
            must be preserved on rebuild.

        """
        LOG.debug('Rebuild called for instance', instance=instance)

        instance.task_state = task_states.REBUILD_SPAWNING
        instance.save(expected_task_state=[task_states.REBUILDING])

        node_uuid = instance.node
        node = self._get_node(node_uuid)

        self._add_driver_fields(node, instance, image_meta, instance.flavor,
                                preserve_ephemeral)

        # Trigger the node rebuild/redeploy.
        try:
            self.ironicclient.call("node.set_provision_state",
                              node_uuid, ironic_states.REBUILD)
        except (exception.NovaException,         # Retry failed
                ironic.exc.InternalServerError,  # Validations
                ironic.exc.BadRequest) as e:     # Maintenance
            msg = (_("Failed to request Ironic to rebuild instance "
                     "%(inst)s: %(reason)s") % {'inst': instance.uuid,
                                                'reason': six.text_type(e)})
            raise exception.InstanceDeployFailure(msg)

        # Although the target provision state is REBUILD, it will actually go
        # to ACTIVE once the redeploy is finished.
        timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
                                                     instance)
        timer.start(interval=CONF.ironic.api_retry_interval).wait()
        LOG.info(_LI('Instance was successfully rebuilt'), instance=instance)
예제 #22
0
def start_socat_console(node_uuid, port, console_cmd):
    """Open the serial console for a node.

    :param node_uuid: the uuid of the node
    :param port: the terminal port for the node
    :param console_cmd: the shell command that will be executed by socat to
        establish console to the node
    :raises ConsoleError: if the directory for the PID file or the PID file
        cannot be created
    :raises ConsoleSubprocessFailed: when invoking the subprocess failed
    """
    # Make sure that the old console for this node is stopped.
    # If no console is running, we may get exception NoConsolePid.
    try:
        _stop_console(node_uuid)
    except exception.NoConsolePid:
        pass

    _ensure_console_pid_dir_exists()
    pid_file = _get_console_pid_file(node_uuid)

    # put together the command and arguments for invoking the console
    args = ['socat']
    # set timeout check for user's connection. If the timeout value
    # is not 0, after timeout seconds of inactivity on the client side,
    # the connection will be closed.
    if CONF.console.terminal_timeout > 0:
        args.append('-T%d' % CONF.console.terminal_timeout)
    args.append('-L%s' % pid_file)

    console_host = CONF.console.socat_address
    if netutils.is_valid_ipv6(console_host):
        arg = 'TCP6-LISTEN:%(port)s,bind=[%(host)s],reuseaddr'
    else:
        arg = 'TCP4-LISTEN:%(port)s,bind=%(host)s,reuseaddr'
    args.append(arg % {'host': console_host,
                       'port': port})

    args.append('EXEC:"%s",pty,stderr' % console_cmd)

    # run the command as a subprocess
    try:
        LOG.debug('Running subprocess: %s', ' '.join(args))
        # Use pipe here to catch the error in case socat
        # fails to start. Note that socat uses stdout as transferring
        # data, so we only capture stderr for checking if it fails.
        obj = subprocess.Popen(args, stderr=subprocess.PIPE)
    except (OSError, ValueError) as e:
        error = _("%(exec_error)s\n"
                  "Command: %(command)s") % {'exec_error': str(e),
                                             'command': ' '.join(args)}
        LOG.exception('Unable to start socat console')
        raise exception.ConsoleSubprocessFailed(error=error)

    # NOTE: we need to check if socat fails to start here.
    # If it starts successfully, it will run in non-daemon mode and
    # will not return until the console session is stopped.

    def _wait(node_uuid, popen_obj):
        wait_state['returncode'] = popen_obj.poll()

        # socat runs in non-daemon mode, so it should not return now
        if wait_state['returncode'] is None:
            # If the pid file is created and the process is running,
            # we stop checking it periodically.
            if (os.path.exists(pid_file)
                    and psutil.pid_exists(_get_console_pid(node_uuid))):
                raise loopingcall.LoopingCallDone()
        else:
            # socat returned, it failed to start.
            # We get the error (out should be None in this case).
            (_out, err) = popen_obj.communicate()
            wait_state['errstr'] = _(
                "Command: %(command)s.\n"
                "Exit code: %(return_code)s.\n"
                "Stderr: %(error)r") % {
                    'command': ' '.join(args),
                    'return_code': wait_state['returncode'],
                    'error': err}
            LOG.error(wait_state['errstr'])
            raise loopingcall.LoopingCallDone()

        if time.time() > expiration:
            wait_state['errstr'] = (_("Timeout while waiting for console "
                                      "subprocess to start for node %s.") %
                                    node_uuid)
            LOG.error(wait_state['errstr'])
            raise loopingcall.LoopingCallDone()

    wait_state = {'returncode': None, 'errstr': ''}
    expiration = time.time() + CONF.console.subprocess_timeout
    timer = loopingcall.FixedIntervalLoopingCall(_wait, node_uuid, obj)
    timer.start(interval=CONF.console.subprocess_checking_interval).wait()

    if wait_state['errstr']:
        raise exception.ConsoleSubprocessFailed(error=wait_state['errstr'])
예제 #23
0
    def spawn(self, context, instance, image_meta, injected_files,
              admin_password, network_info=None, block_device_info=None):
        """Deploy an instance.

        :param context: The security context.
        :param instance: The instance object.
        :param image_meta: Image dict returned by nova.image.glance
            that defines the image from which to boot this instance.
        :param injected_files: User files to inject into instance.
        :param admin_password: Administrator password to set in
            instance.
        :param network_info: Instance network information.
        :param block_device_info: Instance block device
            information. Ignored by this driver.
        """
        LOG.debug('Spawn called for instance', instance=instance)

        # The compute manager is meant to know the node uuid, so missing uuid
        # is a significant issue. It may mean we've been passed the wrong data.
        node_uuid = instance.get('node')
        if not node_uuid:
            raise ironic.exc.BadRequest(
                _("Ironic node uuid not supplied to "
                  "driver for instance %s.") % instance.uuid)

        node = self._get_node(node_uuid)
        flavor = instance.flavor

        self._add_driver_fields(node, instance, image_meta, flavor)

        # NOTE(Shrews): The default ephemeral device needs to be set for
        # services (like cloud-init) that depend on it being returned by the
        # metadata server. Addresses bug https://launchpad.net/bugs/1324286.
        if flavor.ephemeral_gb:
            instance.default_ephemeral_device = '/dev/sda1'
            instance.save()

        # validate we are ready to do the deploy
        validate_chk = self.ironicclient.call("node.validate", node_uuid)
        if (not validate_chk.deploy.get('result')
                or not validate_chk.power.get('result')):
            # something is wrong. undo what we have done
            self._cleanup_deploy(node, instance, network_info)
            raise exception.ValidationError(_(
                "Ironic node: %(id)s failed to validate."
                " (deploy: %(deploy)s, power: %(power)s)")
                % {'id': node.uuid,
                   'deploy': validate_chk.deploy,
                   'power': validate_chk.power})

        # prepare for the deploy
        try:
            self._plug_vifs(node, instance, network_info)
            self._start_firewall(instance, network_info)
        except Exception:
            with excutils.save_and_reraise_exception():
                LOG.error(_LE("Error preparing deploy for instance "
                              "%(instance)s on baremetal node %(node)s."),
                          {'instance': instance.uuid,
                           'node': node_uuid})
                self._cleanup_deploy(node, instance, network_info)

        # Config drive
        configdrive_value = None
        if configdrive.required_by(instance):
            extra_md = {}
            if admin_password:
                extra_md['admin_pass'] = admin_password

            try:
                configdrive_value = self._generate_configdrive(
                    instance, node, network_info, extra_md=extra_md,
                    files=injected_files)
            except Exception as e:
                with excutils.save_and_reraise_exception():
                    msg = (_LE("Failed to build configdrive: %s") %
                           six.text_type(e))
                    LOG.error(msg, instance=instance)
                    self._cleanup_deploy(node, instance, network_info)

            LOG.info(_LI("Config drive for instance %(instance)s on "
                         "baremetal node %(node)s created."),
                         {'instance': instance['uuid'], 'node': node_uuid})

        # trigger the node deploy
        try:
            self.ironicclient.call("node.set_provision_state", node_uuid,
                                   ironic_states.ACTIVE,
                                   configdrive=configdrive_value)
        except Exception as e:
            with excutils.save_and_reraise_exception():
                msg = (_LE("Failed to request Ironic to provision instance "
                           "%(inst)s: %(reason)s"),
                           {'inst': instance.uuid,
                            'reason': six.text_type(e)})
                LOG.error(msg)
                self._cleanup_deploy(node, instance, network_info)

        timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
                                                     instance)
        try:
            timer.start(interval=CONF.ironic.api_retry_interval).wait()
            LOG.info(_LI('Successfully provisioned Ironic node %s'),
                     node.uuid, instance=instance)
        except Exception:
            with excutils.save_and_reraise_exception():
                LOG.error(_LE("Error deploying instance %(instance)s on "
                              "baremetal node %(node)s."),
                             {'instance': instance.uuid,
                              'node': node_uuid})
예제 #24
0
 def execute(self, stack_id):
     LOG.info(_("syncing stack status, stack_id:%s"), stack_id)
     sync_status_loop = loopingcall.FixedIntervalLoopingCall(
         self._sync_status, self._checkpoint, stack_id)
     sync_status_loop.start(interval=CONF.sync_status_interval)
예제 #25
0
 def start(self):
     super(AgentStatusCheckWorker, self).start()
     if self._loop is None:
         self._loop = loopingcall.FixedIntervalLoopingCall(self._check_func)
         self._loop.start(interval=self._interval,
                          initial_delay=self._initial_delay)
예제 #26
0
    def connect_volume(self, connection_properties):
        """Attach the volume to instance_name.

        :param connection_properties: The dictionary that describes all
                                      of the target volume attributes.
        :type connection_properties: dict
        :returns: dict

        connection_properties for Fibre Channel must include:
        target_wwn - World Wide Name
        target_lun - LUN id of the volume
        """
        device_info = {'type': 'block'}

        connection_properties = self._add_targets_to_connection_properties(
            connection_properties)

        hbas = self._linuxfc.get_fc_hbas_info()
        if not hbas:
            LOG.warning("We are unable to locate any Fibre Channel devices.")
            raise exception.NoFibreChannelHostsFound()

        host_devices = self._get_possible_volume_paths(connection_properties,
                                                       hbas)

        # The /dev/disk/by-path/... node is not always present immediately
        # We only need to find the first device.  Once we see the first device
        # multipath will have any others.
        def _wait_for_device_discovery(host_devices):
            for device in host_devices:
                LOG.debug("Looking for Fibre Channel dev %(device)s",
                          {'device': device})
                if os.path.exists(device) and self.check_valid_device(device):
                    self.host_device = device
                    # get the /dev/sdX device.  This is used
                    # to find the multipath device.
                    self.device_name = os.path.realpath(device)
                    raise loopingcall.LoopingCallDone()

            if self.tries >= self.device_scan_attempts:
                LOG.error("Fibre Channel volume device not found.")
                raise exception.NoFibreChannelVolumeDeviceFound()

            LOG.info(
                "Fibre Channel volume device not yet found. "
                "Will rescan & retry.  Try number: %(tries)s.",
                {'tries': self.tries})

            self._linuxfc.rescan_hosts(hbas, connection_properties)
            self.tries = self.tries + 1

        self.host_device = None
        self.device_name = None
        self.tries = 0
        timer = loopingcall.FixedIntervalLoopingCall(
            _wait_for_device_discovery, host_devices)
        timer.start(interval=2).wait()

        LOG.debug(
            "Found Fibre Channel volume %(name)s "
            "(after %(tries)s rescans.)", {
                'name': self.device_name,
                'tries': self.tries
            })

        # find out the WWN of the device
        device_wwn = self._linuxscsi.get_scsi_wwn(self.host_device)
        LOG.debug("Device WWN = '%(wwn)s'", {'wwn': device_wwn})
        device_info['scsi_wwn'] = device_wwn

        # see if the new drive is part of a multipath
        # device.  If so, we'll use the multipath device.
        if self.use_multipath:
            (device_path, multipath_id) = (super(FibreChannelConnector,
                                                 self)._discover_mpath_device(
                                                     device_wwn,
                                                     connection_properties,
                                                     self.device_name))
            if multipath_id:
                # only set the multipath_id if we found one
                device_info['multipath_id'] = multipath_id

        else:
            device_path = self.host_device

        device_info['path'] = device_path
        return device_info
예제 #27
0
    def _wait_for_export_state(self,
                               volume_name,
                               snapshot_name=None,
                               state=False):
        """Polls backend to verify volume's export state.

        XG sets/queries following a request to create or delete a lun
        export may fail on the backend if vshared is still processing
        the export action (or times out).  We can check whether it is
        done by polling the export binding for a lun to ensure it is
        created or deleted.

        This function will try to verify the creation or removal of
        export state on both gateway nodes of the array every 5
        seconds.

        Arguments:
            volume_name   -- name of volume
            snapshot_name -- name of volume's snapshot
            state         -- True to poll for existence, False for lack of

        Returns:
            True if the export state was correctly added or removed
            (depending on 'state' param)
        """
        if not snapshot_name:
            bn = "/vshare/state/local/container/%s/lun/%s/usn_id" \
                % (self.container, volume_name)
        else:
            bn = "/vshare/state/snapshot/container/%s/lun/%s/snap/%s/usn_id" \
                % (self.container, volume_name, snapshot_name)

        def _loop_func(state):
            status = [False, False]
            mg_conns = [self.mga, self.mgb]

            LOG.debug("Entering _wait_for_export_state loop: state=%s.", state)

            # TODO(rlucio): May need to handle situations where export
            # fails, i.e., HBAs go offline and the array is in
            # degraded mode.
            #
            for node_id in range(2):
                resp = mg_conns[node_id].basic.get_node_values(bn)

                if state:
                    # Verify export was added.  Validates when the usn_id is
                    # altered to a non-default binding string.
                    #
                    if resp[bn] != "(not exported)":
                        status[node_id] = True
                else:
                    # Verify export was removed.  Validates when the usn_id is
                    # reset to the default binding string.
                    #
                    if resp[bn] == "(not exported)":
                        status[node_id] = True

            if status[0] and status[1]:
                LOG.debug("_wait_for_export_state loopingcall complete.")
                raise loopingcall.LoopingCallDone(retvalue=True)

        timer = loopingcall.FixedIntervalLoopingCall(_loop_func, state)
        success = timer.start(interval=5).wait()

        return success