Esempio n. 1
0
    def stop(self, worker_context):
        """Attempts to destroy the instance with configured timeout.

        :param worker_context:
        :returns:
        """
        self._ensure_cache(worker_context)
        self.log.info(_LI('Destroying instance'))

        if not self.instance_info:
            self.log.info(_LI('Instance already destroyed.'))
            return

        try:
            worker_context.nova_client.destroy_instance(self.instance_info)
        except Exception:
            self.log.exception(_LE('Error deleting router instance'))

        start = time.time()
        while time.time() - start < cfg.CONF.boot_timeout:
            if not worker_context.nova_client.\
                    get_instance_by_id(self.instance_info.id_):
                if self.state != states.GONE:
                    self.state = states.DOWN
                return self.state
            self.log.debug('Router has not finished stopping')
            time.sleep(cfg.CONF.retry_delay)
        self.log.error(_LE(
            'Router failed to stop within %d secs'),
            cfg.CONF.boot_timeout)
Esempio n. 2
0
    def start(self, interval, initial_delay=None):
        self._running = True
        done = event.Event()

        def _inner():
            if initial_delay:
                greenthread.sleep(initial_delay)

            try:
                while self._running:
                    start = timeutils.utcnow()
                    self.f(*self.args, **self.kw)
                    end = timeutils.utcnow()
                    if not self._running:
                        break
                    delay = interval - timeutils.delta_seconds(start, end)
                    if delay <= 0:
                        LOG.warning(_LW(
                            'task run outlasted interval by %s sec'), -delay
                        )
                    greenthread.sleep(delay if delay > 0 else 0)
            except LoopingCallDone, e:
                self.stop()
                done.send(e.retvalue)
            except Exception:
                LOG.exception(_LE('in looping call'))
                done.send_exception(*sys.exc_info())
                return
Esempio n. 3
0
    def run_periodic_tasks(self, context, raise_on_error=False):
        """Tasks to be run at a periodic interval."""
        for task_name, task in self._periodic_tasks:
            full_task_name = '.'.join([self.__class__.__name__, task_name])

            ticks_to_skip = self._ticks_to_skip[task_name]
            if ticks_to_skip > 0:
                LOG.debug("Skipping %(full_task_name)s, %(ticks_to_skip)s"
                          " ticks left until next run",
                          dict(full_task_name=full_task_name,
                               ticks_to_skip=ticks_to_skip))
                self._ticks_to_skip[task_name] -= 1
                continue

            self._ticks_to_skip[task_name] = task._ticks_between_runs
            LOG.debug("Running periodic task %(full_task_name)s",
                      dict(full_task_name=full_task_name))

            try:
                task(self, context)
            except Exception:
                if raise_on_error:
                    raise
                LOG.exception(_LE("Error during %(full_task_name)s:"),
                              dict(full_task_name=full_task_name))
Esempio n. 4
0
def get_bridge_for_iface(root_helper, iface):
    args = ["ovs-vsctl", "--timeout=2", "iface-to-br", iface]
    try:
        return utils.execute(args, root_helper=root_helper).strip()
    except Exception:
        LOG.exception(_LE("Interface %s not found."), iface)
        return None
Esempio n. 5
0
def get_bridges(root_helper):
    args = ["ovs-vsctl", "--timeout=2", "list-br"]
    try:
        return utils.execute(args, root_helper=root_helper).strip().split("\n")
    except Exception:
        LOG.exception(_LE("Unable to retrieve bridges."))
        return []
Esempio n. 6
0
    def __call__(self, req):
        try:
            if req.method != 'PUT':
                return webob.exc.HTTPMethodNotAllowed()

            args = filter(None, req.path.split('/'))
            if not args:
                return webob.exc.HTTPNotFound()

            command, _, _ = self.ctl.command_manager.find_command(args)
            if command.interactive:
                return webob.exc.HTTPNotImplemented()

            return str(self.ctl.run(['--debug'] + args))
        except SystemExit:
            # cliff invokes -h (help) on argparse failure
            # (which in turn results in sys.exit call)
            return webob.exc.HTTPBadRequest()
        except ValueError:
            return webob.exc.HTTPNotFound()
        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = _('An unknown error has occurred. '
                    'Please try your request again.')
            return webob.exc.HTTPInternalServerError(explanation=unicode(msg))
Esempio n. 7
0
    def update(self, worker_context):
        "Called when the router config should be changed"
        while self._queue:
            while True:
                if self.deleted:
                    self.log.debug("skipping update because the router is being deleted")
                    return

                try:
                    self.log.debug("%s.execute(%s) instance.state=%s", self.state, self.action, self.instance.state)
                    self.action = self.state.execute(self.action, worker_context)
                    self.log.debug("%s.execute -> %s instance.state=%s", self.state, self.action, self.instance.state)
                except:
                    self.log.exception(_LE("%s.execute() failed for action: %s"), self.state, self.action)

                old_state = self.state
                self.state = self.state.transition(self.action, worker_context)
                self.log.debug(
                    "%s.transition(%s) -> %s instance.state=%s", old_state, self.action, self.state, self.instance.state
                )

                # Yield control each time we stop to figure out what
                # to do next.
                if isinstance(self.state, CalcAction):
                    return  # yield

                # We have reached the exit state, so the router has
                # been deleted somehow.
                if isinstance(self.state, Exit):
                    self._do_delete()
                    return
Esempio n. 8
0
    def start(self, interval, initial_delay=None):
        self._running = True
        done = event.Event()

        def _inner():
            if initial_delay:
                greenthread.sleep(initial_delay)

            try:
                while self._running:
                    start = timeutils.utcnow()
                    self.f(*self.args, **self.kw)
                    end = timeutils.utcnow()
                    if not self._running:
                        break
                    delay = interval - timeutils.delta_seconds(start, end)
                    if delay <= 0:
                        LOG.warning(
                            _LW('task run outlasted interval by %s sec'),
                            -delay)
                    greenthread.sleep(delay if delay > 0 else 0)
            except LoopingCallDone, e:
                self.stop()
                done.send(e.retvalue)
            except Exception:
                LOG.exception(_LE('in looping call'))
                done.send_exception(*sys.exc_info())
                return
Esempio n. 9
0
def get_bridge_for_iface(root_helper, iface):
    args = ["ovs-vsctl", "--timeout=2", "iface-to-br", iface]
    try:
        return utils.execute(args, root_helper=root_helper).strip()
    except Exception:
        LOG.exception(_LE("Interface %s not found."), iface)
        return None
Esempio n. 10
0
    def run_periodic_tasks(self, context, raise_on_error=False):
        """Tasks to be run at a periodic interval."""
        for task_name, task in self._periodic_tasks:
            full_task_name = '.'.join([self.__class__.__name__, task_name])

            ticks_to_skip = self._ticks_to_skip[task_name]
            if ticks_to_skip > 0:
                LOG.debug(
                    "Skipping %(full_task_name)s, %(ticks_to_skip)s"
                    " ticks left until next run",
                    dict(full_task_name=full_task_name,
                         ticks_to_skip=ticks_to_skip))
                self._ticks_to_skip[task_name] -= 1
                continue

            self._ticks_to_skip[task_name] = task._ticks_between_runs
            LOG.debug("Running periodic task %(full_task_name)s",
                      dict(full_task_name=full_task_name))

            try:
                task(self, context)
            except Exception:
                if raise_on_error:
                    raise
                LOG.exception(_LE("Error during %(full_task_name)s:"),
                              dict(full_task_name=full_task_name))
Esempio n. 11
0
def get_bridges(root_helper):
    args = ["ovs-vsctl", "--timeout=2", "list-br"]
    try:
        return utils.execute(args, root_helper=root_helper).strip().split("\n")
    except Exception:
        LOG.exception(_LE("Unable to retrieve bridges."))
        return []
Esempio n. 12
0
 def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
     """Unplug the interface."""
     device = ip_lib.IPDevice(device_name, self.root_helper, namespace)
     try:
         device.link.delete()
         LOG.debug("Unplugged interface '%s'", device_name)
     except RuntimeError:
         LOG.exception(_LE("Failed unplugging interface '%s'"), device_name)
Esempio n. 13
0
 def shutdown(self):
     LOG.info('shutting down')
     for resource_id, sm in self.state_machines.items():
         try:
             sm.service_shutdown()
         except Exception:
             LOG.exception(_LE('Failed to shutdown state machine for %s'),
                           resource_id)
Esempio n. 14
0
 def shutdown(self):
     LOG.info('shutting down')
     for rid, sm in self.state_machines.items():
         try:
             sm.service_shutdown()
         except Exception:
             LOG.exception(_LE(
                 'Failed to shutdown state machine for %s'), rid
             )
Esempio n. 15
0
 def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
     """Unplug the interface."""
     device = ip_lib.IPDevice(device_name, self.root_helper, namespace)
     try:
         device.link.delete()
         LOG.debug("Unplugged interface '%s'", device_name)
     except RuntimeError:
         LOG.exception(_LE(
             "Failed unplugging interface '%s'"), device_name)
Esempio n. 16
0
 def run_ofctl(self, cmd, args):
     full_args = ["ovs-ofctl", cmd, self.br_name] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': full_args,
                 'exception': e
             })
Esempio n. 17
0
 def run_vsctl(self, args):
     full_args = ["ovs-vsctl", "--timeout=2"] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': full_args,
                 'exception': e
             })
Esempio n. 18
0
def notify(context, publisher_id, event_type, priority, payload):
    """Sends a notification using the specified driver

    :param publisher_id: the source worker_type.host of the message
    :param event_type:   the literal type of event (ex. Instance Creation)
    :param priority:     patterned after the enumeration of Python logging
                         levels in the set (DEBUG, WARN, INFO, ERROR, CRITICAL)
    :param payload:       A python dictionary of attributes

    Outgoing message format includes the above parameters, and appends the
    following:

    message_id
      a UUID representing the id for this notification

    timestamp
      the GMT timestamp the notification was sent at

    The composite message will be constructed as a dictionary of the above
    attributes, which will then be sent via the transport mechanism defined
    by the driver.

    Message example::

        {'message_id': str(uuid.uuid4()),
         'publisher_id': 'compute.host1',
         'timestamp': timeutils.utcnow(),
         'priority': 'WARN',
         'event_type': 'compute.create_instance',
         'payload': {'instance_id': 12, ... }}

    """
    if priority not in log_levels:
        raise BadPriorityException(
            _('%s not in valid priorities') % priority)

    # Ensure everything is JSON serializable.
    payload = jsonutils.to_primitive(payload, convert_instances=True)

    msg = dict(message_id=str(uuid.uuid4()),
               publisher_id=publisher_id,
               event_type=event_type,
               priority=priority,
               payload=payload,
               timestamp=str(timeutils.utcnow()))

    for driver in _get_drivers():
        try:
            driver.notify(context, msg)
        except Exception as e:
            LOG.exception(_LE("Problem '%(e)s' attempting to "
                              "send to notification system. "
                              "Payload=%(payload)s"),
                          dict(e=e, payload=payload))
Esempio n. 19
0
def notify(context, publisher_id, event_type, priority, payload):
    """Sends a notification using the specified driver

    :param publisher_id: the source worker_type.host of the message
    :param event_type:   the literal type of event (ex. Instance Creation)
    :param priority:     patterned after the enumeration of Python logging
                         levels in the set (DEBUG, WARN, INFO, ERROR, CRITICAL)
    :param payload:       A python dictionary of attributes

    Outgoing message format includes the above parameters, and appends the
    following:

    message_id
      a UUID representing the id for this notification

    timestamp
      the GMT timestamp the notification was sent at

    The composite message will be constructed as a dictionary of the above
    attributes, which will then be sent via the transport mechanism defined
    by the driver.

    Message example::

        {'message_id': str(uuid.uuid4()),
         'publisher_id': 'compute.host1',
         'timestamp': timeutils.utcnow(),
         'priority': 'WARN',
         'event_type': 'compute.create_instance',
         'payload': {'instance_id': 12, ... }}

    """
    if priority not in log_levels:
        raise BadPriorityException(_('%s not in valid priorities') % priority)

    # Ensure everything is JSON serializable.
    payload = jsonutils.to_primitive(payload, convert_instances=True)

    msg = dict(message_id=str(uuid.uuid4()),
               publisher_id=publisher_id,
               event_type=event_type,
               priority=priority,
               payload=payload,
               timestamp=str(timeutils.utcnow()))

    for driver in _get_drivers():
        try:
            driver.notify(context, msg)
        except Exception as e:
            LOG.exception(
                _LE("Problem '%(e)s' attempting to "
                    "send to notification system. "
                    "Payload=%(payload)s"), dict(e=e, payload=payload))
    def stop(self, worker_context):
        self._ensure_cache(worker_context)
        if self.state == GONE:
            self.log.info(_LI("Destroying router neutron has deleted"))
        else:
            self.log.info(_LI("Destroying router"))

        try:
            nova_client = worker_context.nova_client
            nova_client.destroy_instance(self.instance_info)
        except Exception:
            self.log.exception(_LE("Error deleting router instance"))

        start = time.time()
        while time.time() - start < cfg.CONF.boot_timeout:
            if not nova_client.get_instance_by_id(self.instance_info.id_):
                if self.state != GONE:
                    self.state = DOWN
                return
            self.log.debug("Router has not finished stopping")
            time.sleep(cfg.CONF.retry_delay)
        self.log.error(_LE("Router failed to stop within %d secs"), cfg.CONF.boot_timeout)
Esempio n. 21
0
 def get_xapi_iface_id(self, xs_vif_uuid):
     args = [
         "xe", "vif-param-get", "param-name=other-config",
         "param-key=nicira-iface-id",
         "uuid=%s" % xs_vif_uuid
     ]
     try:
         return utils.execute(args, root_helper=self.root_helper).strip()
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': args,
                 'exception': e
             })
Esempio n. 22
0
    def __call__(self, req):
        try:
            LOG.debug("Request: %s", req)

            instance_id = self._get_instance_id(req)
            if instance_id:
                return self._proxy_request(instance_id, req)
            else:
                return webob.exc.HTTPNotFound()

        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = "An unknown error has occurred. " "Please try your request again."
            return webob.exc.HTTPInternalServerError(explanation=unicode(msg))
Esempio n. 23
0
    def __call__(self, req):
        try:
            LOG.debug("Request: %s", req)

            instance_id = self._get_instance_id(req)
            if instance_id:
                return self._proxy_request(instance_id, req)
            else:
                return webob.exc.HTTPNotFound()

        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = ('An unknown error has occurred. '
                   'Please try your request again.')
            return webob.exc.HTTPInternalServerError(explanation=unicode(msg))
Esempio n. 24
0
def add_driver(notification_driver):
    """Add a notification driver at runtime."""
    # Make sure the driver list is initialized.
    _get_drivers()
    if isinstance(notification_driver, basestring):
        # Load and add
        try:
            driver = importutils.import_module(notification_driver)
            _drivers[notification_driver] = driver
        except ImportError:
            LOG.exception(_LE("Failed to load notifier %s. "
                              "These notifications will not be sent."),
                          notification_driver)
    else:
        # Driver is already loaded; just add the object.
        _drivers[notification_driver] = notification_driver
Esempio n. 25
0
def add_driver(notification_driver):
    """Add a notification driver at runtime."""
    # Make sure the driver list is initialized.
    _get_drivers()
    if isinstance(notification_driver, basestring):
        # Load and add
        try:
            driver = importutils.import_module(notification_driver)
            _drivers[notification_driver] = driver
        except ImportError:
            LOG.exception(
                _LE("Failed to load notifier %s. "
                    "These notifications will not be sent."),
                notification_driver)
    else:
        # Driver is already loaded; just add the object.
        _drivers[notification_driver] = notification_driver
    def boot(self, worker_context, router_image_uuid):
        self._ensure_cache(worker_context)
        if self.state == GONE:
            self.log.info(_LI("Not booting deleted router"))
            return

        self.log.info(_LI("Booting router"))
        self.state = DOWN
        self._boot_counter.start()

        def make_vrrp_ports():
            mgt_port = worker_context.neutron.create_management_port(self.router_obj.id)

            # FIXME(mark): ideally this should be ordered and de-duped
            instance_ports = [
                worker_context.neutron.create_vrrp_port(self.router_obj.id, n)
                for n in (p.network_id for p in self.router_obj.ports)
            ]

            return mgt_port, instance_ports

        try:
            # TODO(mark): make this pluggable
            self._ensure_provider_ports(self.router_obj, worker_context)

            # TODO(mark): make this handle errors more gracefully on cb fail
            # TODO(mark): checkout from a pool - boot on demand for now
            instance_info = worker_context.nova_client.boot_instance(
                self.instance_info, self.router_obj.id, router_image_uuid, make_vrrp_ports
            )
            if not instance_info:
                self.log.info(_LI("Previous router is deleting"))
                # Reset the VM manager, causing the state machine to start
                # again with a new VM.
                self.reset_boot_counter()
                self.instance_info = None
                return
        except:
            self.log.exception(_LE("Router failed to start boot"))
            # TODO(mark): attempt clean-up of failed ports
            return
        else:
            # We have successfully started a (re)boot attempt so
            # record the timestamp so we can report how long it takes.
            self.state = BOOTING
            self.instance_info = instance_info
Esempio n. 27
0
    def _start_child(self, wrap):
        if len(wrap.forktimes) > wrap.workers:
            # Limit ourselves to one process a second (over the period of
            # number of workers * 1 second). This will allow workers to
            # start up quickly but ensure we don't fork off children that
            # die instantly too quickly.
            if time.time() - wrap.forktimes[0] < wrap.workers:
                LOG.info(_LI('Forking too fast, sleeping'))
                time.sleep(1)

            wrap.forktimes.pop(0)

        wrap.forktimes.append(time.time())

        pid = os.fork()
        if pid == 0:
            # NOTE(johannes): All exceptions are caught to ensure this
            # doesn't fallback into the loop spawning children. It would
            # be bad for a child to spawn more children.
            status = 0
            try:
                self._child_process(wrap.service)
            except SignalExit as exc:
                signame = {
                    signal.SIGTERM: 'SIGTERM',
                    signal.SIGINT: 'SIGINT'
                }[exc.signo]
                LOG.info(_LI('Caught %s, exiting'), signame)
                status = exc.code
            except SystemExit as exc:
                status = exc.code
            except BaseException:
                LOG.exception(_LE('Unhandled exception'))
                status = 2
            finally:
                wrap.service.stop()

            os._exit(status)

        LOG.info(_LI('Started child %d'), pid)

        wrap.children.add(pid)
        self.children[pid] = wrap

        return pid
Esempio n. 28
0
 def _send(self, ready):
     """Deliver notification messages from the in-process queue
     to the appropriate topic via the AMQP service.
     """
     # setup notifier driver ahead a time
     self.get_notifier()
     # Tell the start() method that we have set up the AMQP
     # communication stuff and are ready to do some work.
     ready.set()
     while True:
         msg = self._q.get()
         if msg is None:
             break
         LOG.debug('sending notification %r', msg)
         try:
             self.send(event_type=msg['event_type'], message=msg['payload'])
         except Exception:
             LOG.exception(_LE('could not publish notification'))
Esempio n. 29
0
 def _send(self, ready):
     """Deliver notification messages from the in-process queue
     to the appropriate topic via the AMQP service.
     """
     # setup notifier driver ahead a time
     self.get_notifier()
     # Tell the start() method that we have set up the AMQP
     # communication stuff and are ready to do some work.
     ready.set()
     while True:
         msg = self._q.get()
         if msg is None:
             break
         LOG.debug('sending notification %r', msg)
         try:
             self.send(event_type=msg['event_type'], message=msg['payload'])
         except Exception:
             LOG.exception(_LE('could not publish notification'))
Esempio n. 30
0
    def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
        """Unplug the interface."""
        if not bridge:
            bridge = self.conf.ovs_integration_bridge

        tap_name = self._get_tap_name(device_name, prefix)
        self.check_bridge_exists(bridge)
        ovs = ovs_lib.OVSBridge(bridge, self.root_helper)

        try:
            ovs.delete_port(tap_name)
            if self.conf.ovs_use_veth:
                device = ip_lib.IPDevice(device_name, self.root_helper,
                                         namespace)
                device.link.delete()
                LOG.debug(_("Unplugged interface '%s'"), device_name)
        except RuntimeError:
            LOG.exception(_LE("Failed unplugging interface '%s'"), device_name)
Esempio n. 31
0
    def update(self, worker_context):
        "Called when the router config should be changed"
        while self._queue:
            while True:
                if self.deleted:
                    self.driver.log.debug(
                        'skipping update because the router is being deleted')
                    return

                try:
                    self.driver.log.debug('%s.execute(%s) instance.state=%s',
                                          self.state, self.action,
                                          self.instance.state)
                    self.action = self.state.execute(
                        self.action,
                        worker_context,
                    )
                    self.driver.log.debug('%s.execute -> %s instance.state=%s',
                                          self.state, self.action,
                                          self.instance.state)
                except:
                    self.driver.log.exception(
                        _LE('%s.execute() failed for action: %s'), self.state,
                        self.action)

                old_state = self.state
                self.state = self.state.transition(
                    self.action,
                    worker_context,
                )
                self.driver.log.debug(
                    '%s.transition(%s) -> %s instance.state=%s', old_state,
                    self.action, self.state, self.instance.state)

                # Yield control each time we stop to figure out what
                # to do next.
                if isinstance(self.state, CalcAction):
                    return  # yield

                # We have reached the exit state, so the router has
                # been deleted somehow.
                if isinstance(self.state, Exit):
                    self._do_delete()
                    return
Esempio n. 32
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
Esempio n. 33
0
    def _start_child(self, wrap):
        if len(wrap.forktimes) > wrap.workers:
            # Limit ourselves to one process a second (over the period of
            # number of workers * 1 second). This will allow workers to
            # start up quickly but ensure we don't fork off children that
            # die instantly too quickly.
            if time.time() - wrap.forktimes[0] < wrap.workers:
                LOG.info(_LI('Forking too fast, sleeping'))
                time.sleep(1)

            wrap.forktimes.pop(0)

        wrap.forktimes.append(time.time())

        pid = os.fork()
        if pid == 0:
            # NOTE(johannes): All exceptions are caught to ensure this
            # doesn't fallback into the loop spawning children. It would
            # be bad for a child to spawn more children.
            status = 0
            try:
                self._child_process(wrap.service)
            except SignalExit as exc:
                signame = {signal.SIGTERM: 'SIGTERM',
                           signal.SIGINT: 'SIGINT'}[exc.signo]
                LOG.info(_LI('Caught %s, exiting'), signame)
                status = exc.code
            except SystemExit as exc:
                status = exc.code
            except BaseException:
                LOG.exception(_LE('Unhandled exception'))
                status = 2
            finally:
                wrap.service.stop()

            os._exit(status)

        LOG.info(_LI('Started child %d'), pid)

        wrap.children.add(pid)
        self.children[pid] = wrap

        return pid
Esempio n. 34
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
Esempio n. 35
0
    def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
        """Unplug the interface."""
        if not bridge:
            bridge = self.conf.ovs_integration_bridge

        tap_name = self._get_tap_name(device_name, prefix)
        self.check_bridge_exists(bridge)
        ovs = ovs_lib.OVSBridge(bridge, self.root_helper)

        try:
            ovs.delete_port(tap_name)
            if self.conf.ovs_use_veth:
                device = ip_lib.IPDevice(device_name,
                                         self.root_helper,
                                         namespace)
                device.link.delete()
                LOG.debug(_("Unplugged interface '%s'"), device_name)
        except RuntimeError:
            LOG.exception(_LE("Failed unplugging interface '%s'"), device_name)
Esempio n. 36
0
    def boot(self, worker_context):
        """Boots the instance with driver pre/post boot hooks.

        :returns: None
        """
        self._ensure_cache(worker_context)

        self.log.info('Booting %s' % self.driver.RESOURCE_NAME)
        self.state = states.DOWN
        self._boot_counter.start()

        # driver preboot hook
        self.driver.pre_boot(worker_context)

        # try to boot the instance
        try:
            instance_info = worker_context.nova_client.boot_instance(
                self.instance_info,
                self.driver.name,
                self.driver.image_uuid,
                self.driver.flavor,
                self.driver.make_ports(worker_context)
            )
            if not instance_info:
                self.log.info(_LI('Previous instance is still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                self.instance_info = None
                return
        except:
            self.log.exception(_LE('Instance failed to start boot'))
            return
        else:
            # We have successfully started a (re)boot attempt so
            # record the timestamp so we can report how long it takes.
            self.state = states.BOOTING
            self.instance_info = instance_info

        # driver post boot hook
        self.driver.post_boot(worker_context)
Esempio n. 37
0
def save_and_reraise_exception():
    """Save current exception, run some code and then re-raise.

    In some cases the exception context can be cleared, resulting in None
    being attempted to be re-raised after an exception handler is run. This
    can happen when eventlet switches greenthreads or when running an
    exception handler, code raises and catches an exception. In both
    cases the exception context will be cleared.

    To work around this, we save the exception state, run handler code, and
    then re-raise the original exception. If another exception occurs, the
    saved exception is logged and the new exception is re-raised.
    """
    type_, value, tb = sys.exc_info()
    try:
        yield
    except Exception:
        logging.error(_LE('Original exception being dropped: %s'),
                      traceback.format_exception(type_, value, tb))
        raise
    raise type_, value, tb
Esempio n. 38
0
def save_and_reraise_exception():
    """Save current exception, run some code and then re-raise.

    In some cases the exception context can be cleared, resulting in None
    being attempted to be re-raised after an exception handler is run. This
    can happen when eventlet switches greenthreads or when running an
    exception handler, code raises and catches an exception. In both
    cases the exception context will be cleared.

    To work around this, we save the exception state, run handler code, and
    then re-raise the original exception. If another exception occurs, the
    saved exception is logged and the new exception is re-raised.
    """
    type_, value, tb = sys.exc_info()
    try:
        yield
    except Exception:
        logging.error(_LE('Original exception being dropped: %s'),
                      traceback.format_exception(type_, value, tb))
        raise
    raise type_, value, tb
Esempio n. 39
0
def _worker(inq, worker_factory):
    """Scheduler's worker process main function.
    """
    daemon.ignore_signals()
    LOG.debug('starting worker process')
    worker = worker_factory()
    while True:
        try:
            data = inq.get()
        except IOError:
            # NOTE(dhellmann): Likely caused by a signal arriving
            # during processing, especially SIGCHLD.
            data = None
        if data is None:
            target, message = None, None
        else:
            target, message = data
        try:
            worker.handle_message(target, message)
        except Exception:
            LOG.exception(_LE('Error processing data %s'), unicode(data))
        if data is None:
            break
    LOG.debug('exiting')
Esempio n. 40
0
def _worker(inq, worker_factory):
    """Scheduler's worker process main function.
    """
    daemon.ignore_signals()
    LOG.debug('starting worker process')
    worker = worker_factory()
    while True:
        try:
            data = inq.get()
        except IOError:
            # NOTE(dhellmann): Likely caused by a signal arriving
            # during processing, especially SIGCHLD.
            data = None
        if data is None:
            target, message = None, None
        else:
            target, message = data
        try:
            worker.handle_message(target, message)
        except Exception:
            LOG.exception(_LE('Error processing data %s'), unicode(data))
        if data is None:
            break
    LOG.debug('exiting')
Esempio n. 41
0
    def __call__(self, req):
        try:
            if req.method != "PUT":
                return webob.exc.HTTPMethodNotAllowed()

            args = filter(None, req.path.split("/"))
            if not args:
                return webob.exc.HTTPNotFound()

            command, _, _ = self.ctl.command_manager.find_command(args)
            if command.interactive:
                return webob.exc.HTTPNotImplemented()

            return str(self.ctl.run(["--debug"] + args))
        except SystemExit:
            # cliff invokes -h (help) on argparse failure
            # (which in turn results in sys.exit call)
            return webob.exc.HTTPBadRequest()
        except ValueError:
            return webob.exc.HTTPNotFound()
        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = _("An unknown error has occurred. " "Please try your request again.")
            return webob.exc.HTTPInternalServerError(explanation=unicode(msg))
Esempio n. 42
0
    def configure(self, worker_context,
                  failure_state=states.RESTART, attempts=None):
        """Pushes config to instance

        :param worker_context:
        :param failure_state:
        :param attempts:
        :returns:
        """
        self.log.debug('Begin instance config')
        self.state = states.UP
        attempts = attempts or cfg.CONF.max_retries

        self._ensure_cache(worker_context)
        if self.driver.get_state(worker_context) == states.GONE:
            return

        interfaces = self.driver.get_interfaces(
            self.instance_info.management_address)

        if not self._verify_interfaces(self.driver.ports, interfaces):
            # FIXME: Need a states.REPLUG state when we support hot-plugging
            # interfaces.
            self.log.debug("Interfaces aren't plugged as expected.")
            self.state = states.REPLUG
            return

        # TODO(mark): We're in the first phase of VRRP, so we need
        # map the interface to the network ID.
        # Eventually we'll send VRRP data and real interface data
        port_mac_to_net = {
            p.mac_address: p.network_id
            for p in self.instance_info.ports
        }
        # Add in the management port
        mgt_port = self.instance_info.management_port
        port_mac_to_net[mgt_port.mac_address] = mgt_port.network_id
        # this is a network to logical interface id
        iface_map = {
            port_mac_to_net[i['lladdr']]: i['ifname']
            for i in interfaces if i['lladdr'] in port_mac_to_net
        }

        # sending all the standard config over to the driver for final updates
        config = self.driver.build_config(
            worker_context,
            mgt_port,
            iface_map
        )
        self.log.debug('preparing to update config to %r', config)

        for i in xrange(attempts):
            try:
                self.driver.update_config(
                    self.instance_info.management_address,
                    config)
            except Exception:
                if i == attempts - 1:
                    # Only log the traceback if we encounter it many times.
                    self.log.exception(_LE('failed to update config'))
                else:
                    self.log.debug(
                        'failed to update config, attempt %d',
                        i
                    )
                time.sleep(cfg.CONF.retry_delay)
            else:
                self.state = states.CONFIGURED
                self.log.info('Instance config updated')
                return
        else:
            self.state = failure_state
Esempio n. 43
0
    def _dispatch_command(self, target, message):
        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        # NOTE(adam_g): Drop 'router-debug' compat in M.
        elif (instructions['command'] == commands.RESOURCE_DEBUG or
              instructions['command'] == commands.ROUTER_DEBUG):

            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to debug resource with no id'))
                return
            reason = instructions.get('reason')
            if resource_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all resources with %r'),
                    resource_id)
            else:
                LOG.info(_LI('Placing router %s in debug mode (reason: %s)'),
                         resource_id, reason)
                self.db_api.enable_resource_debug(resource_id, reason)

        elif (instructions['command'] == commands.RESOURCE_MANAGE or
              instructions['command'] == commands.ROUTER_MANAGE):
            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to manage resource with no id'))
                return
            try:
                self.db_api.disable_resource_debug(resource_id)
                LOG.info(_LI('Resuming management of resource %s'),
                         resource_id)
            except KeyError:
                pass
            try:
                self._resource_locks[resource_id].release()
                LOG.info(_LI('Unlocked resource %s'), resource_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in EVENT_COMMANDS:
            resource_id = instructions.get('resource_id')
            sm = self._find_state_machine_by_resource_id(resource_id)
            if not sm:
                LOG.debug(
                    'Will not process command, no managed state machine '
                    'found for resource %s', resource_id)
                return
            new_res = event.Resource(
                id=resource_id,
                driver=sm.driver.RESOURCE_NAME,
                tenant_id=sm.tenant_id)
            new_msg = event.Event(
                resource=new_res,
                crud=EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_res)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_res)

        # NOTE(adam_g): This is here to support the deprecated old format of
        #               sending commands to specific routers and can be
        #               removed once the CLI component is dropped in M.
        elif instructions['command'] in DEPRECATED_ROUTER_COMMANDS:
            print 'XXX DEPR'
            new_rsc = event.Resource(
                driver=drivers.router.Router.RESOURCE_NAME,
                id=message.body.get('router_id'),
                tenant_id=message.body.get('tenant_id'),
            )
            new_msg = event.Event(
                resource=new_rsc,
                crud=DEPRECATED_ROUTER_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_rsc)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_rsc)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)
Esempio n. 44
0
    def _thread_target(self):
        """This method runs in each worker thread.
        """
        my_id = threading.current_thread().name
        LOG.debug('starting thread')
        # Use a separate context from the one we use when receiving
        # messages and talking to the tenant router manager because we
        # are in a different thread and the clients are not
        # thread-safe.
        context = WorkerContext()
        while self._keep_going:
            try:
                # Try to get a state machine from the work queue. If
                # there's nothing to do, we will block for a while.
                self._thread_status[my_id] = 'waiting for task'
                sm = self.work_queue.get(timeout=10)
            except Queue.Empty:
                continue
            if sm is None:
                LOG.info(_LI('received stop message'))
                break

            # Make sure we didn't already have some updates under way
            # for a router we've been told to ignore for debug mode.
            should_ignore, reason = \
                self.db_api.resource_in_debug(sm.resource_id)
            if should_ignore:
                LOG.debug('Skipping update of resource %s in debug mode. '
                          '(reason: %s)', sm.resource_id, reason)
                continue
            # FIXME(dhellmann): Need to look at the router to see if
            # it belongs to a tenant which is in debug mode, but we
            # don't have that data in the sm, yet.
            LOG.debug('performing work on %s for tenant %s',
                      sm.resource_id, sm.tenant_id)
            try:
                self._thread_status[my_id] = 'updating %s' % sm.resource_id
                sm.update(context)
            except:
                LOG.exception(_LE('could not complete update for %s'),
                              sm.resource_id)
            finally:
                self._thread_status[my_id] = (
                    'finalizing task for %s' % sm.resource_id
                )
                self.work_queue.task_done()
                with self.lock:
                    # Release the lock that prevents us from adding
                    # the state machine back into the queue. If we
                    # find more work, we will re-acquire it. If we do
                    # not find more work, we hold the primary work
                    # queue lock so the main thread cannot put the
                    # state machine back into the queue until we
                    # release that lock.
                    self._release_resource_lock(sm)
                    # The state machine has indicated that it is done
                    # by returning. If there is more work for it to
                    # do, reschedule it by placing it at the end of
                    # the queue.
                    if sm.has_more_work():
                        LOG.debug('%s has more work, returning to work queue',
                                  sm.resource_id)
                        self._add_resource_to_work_queue(sm)
                    else:
                        LOG.debug('%s has no more work', sm.resource_id)
        # Return the context object so tests can look at it
        self._thread_status[my_id] = 'exiting'
        return context
Esempio n. 45
0
    def get_state_machines(self, message, worker_context):
        """Return the state machines and the queue for sending it messages for
        the logical resource being addressed by the message.
        """
        if (not message.resource
                or (message.resource and not message.resource.id)):
            LOG.error(
                _LE('Cannot get state machine for message with '
                    'no message.resource'))
            raise InvalidIncomingMessage()

        state_machines = []

        # Send to all of our resources.
        if message.resource.id == '*':
            LOG.debug('routing to all state machines')
            state_machines = self.state_machines.values()

        # Ignore messages to deleted resources.
        elif self.state_machines.has_been_deleted(message.resource.id):
            LOG.debug('dropping message for deleted resource')
            return []

        # Send to resources that have an ERROR status
        elif message.resource.id == 'error':
            state_machines = [
                sm for sm in self.state_machines.values() if sm.has_error()
            ]
            LOG.debug('routing to %d errored state machines',
                      len(state_machines))

        # Create a new state machine for this router.
        elif message.resource.id not in self.state_machines:
            LOG.debug('creating state machine for %s', message.resource.id)

            # load the driver
            if not message.resource.driver:
                LOG.error(
                    _LE('cannot create state machine without specifying'
                        'a driver.'))
                return []

            driver_obj = \
                drivers.get(message.resource.driver)(worker_context,
                                                     message.resource.id)

            if not driver_obj:
                # this means the driver didn't load for some reason..
                # this might not be needed at all.
                LOG.debug('for some reason loading the driver failed')
                return []

            def deleter():
                self._delete_resource(message.resource.id)

            new_state_machine = state.Automaton(
                driver=driver_obj,
                resource_id=message.resource.id,
                tenant_id=self.tenant_id,
                delete_callback=deleter,
                bandwidth_callback=self._report_bandwidth,
                worker_context=worker_context,
                queue_warning_threshold=self._queue_warning_threshold,
                reboot_error_threshold=self._reboot_error_threshold,
            )
            self.state_machines[message.resource.id] = new_state_machine
            state_machines = [new_state_machine]

        # Send directly to an existing router.
        elif message.resource.id:
            state_machines = [self.state_machines[message.resource.id]]

        # Filter out any deleted state machines.
        return [
            machine for machine in state_machines
            if (not machine.deleted and
                not self.state_machines.has_been_deleted(machine.resource_id))
        ]
Esempio n. 46
0
 def run_vsctl(self, args):
     full_args = ["ovs-vsctl", "--timeout=2"] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(_LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {"cmd": full_args, "exception": e})
    def replug(self, worker_context):
        self.log.debug("Attempting to replug...")
        self._ensure_provider_ports(self.router_obj, worker_context)

        interfaces = router_api.get_interfaces(self.instance_info.management_address, cfg.CONF.akanda_mgt_service_port)
        actual_macs = set((iface["lladdr"] for iface in interfaces))
        instance_macs = set(p.mac_address for p in self.instance_info.ports)
        instance_macs.add(self.instance_info.management_port.mac_address)

        if instance_macs != actual_macs:
            # our cached copy of the ports is wrong reboot and clean up
            self.log.warning(
                _LW("Instance macs(%s) do not match actual macs (%s). " "Instance cache appears out-of-sync"),
                instance_macs,
                actual_macs,
            )
            self.state = RESTART
            return

        instance_ports = {p.network_id: p for p in self.instance_info.ports}
        instance_networks = set(instance_ports.keys())

        logical_networks = set(p.network_id for p in self.router_obj.ports)

        if logical_networks != instance_networks:
            instance = worker_context.nova_client.get_instance_by_id(self.instance_info.id_)

            # For each port that doesn't have a mac address on the instance...
            for network_id in logical_networks - instance_networks:
                port = worker_context.neutron.create_vrrp_port(self.router_obj.id, network_id)
                self.log.debug("Net %s is missing from the router, plugging: %s", network_id, port.id)

                try:
                    instance.interface_attach(port.id, None, None)
                except:
                    self.log.exception(_LE("Interface attach failed"))
                    self.state = RESTART
                    return
                self.instance_info.ports.append(port)

            for network_id in instance_networks - logical_networks:
                port = instance_ports[network_id]
                self.log.debug("Net %s is detached from the router, unplugging: %s", network_id, port.id)

                try:
                    instance.interface_detach(port.id)
                except:
                    self.log.exception(_LE("Interface detach failed"))
                    self.state = RESTART
                    return

                self.instance_info.ports.remove(port)

        # The action of attaching/detaching interfaces in Nova happens via the
        # message bus and is *not* blocking.  We need to wait a few seconds to
        # see if the list of tap devices on the appliance actually changed.  If
        # not, assume the hotplug failed, and reboot the Instance.
        replug_seconds = cfg.CONF.hotplug_timeout
        while replug_seconds > 0:
            self.log.debug("Waiting for interface attachments to take effect...")
            interfaces = router_api.get_interfaces(
                self.instance_info.management_address, cfg.CONF.akanda_mgt_service_port
            )
            if self._verify_interfaces(self.router_obj, interfaces):
                # replugging was successful
                # TODO(mark) update port states
                return
            time.sleep(1)
            replug_seconds -= 1

        self.log.debug("Interfaces aren't plugged as expected, rebooting.")
        self.state = RESTART
Esempio n. 48
0
 def get_xapi_iface_id(self, xs_vif_uuid):
     args = ["xe", "vif-param-get", "param-name=other-config", "param-key=nicira-iface-id", "uuid=%s" % xs_vif_uuid]
     try:
         return utils.execute(args, root_helper=self.root_helper).strip()
     except Exception, e:
         LOG.error(_LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {"cmd": args, "exception": e})
Esempio n. 49
0
 def run_ofctl(self, cmd, args):
     full_args = ["ovs-ofctl", cmd, self.br_name] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(_LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {"cmd": full_args, "exception": e})
Esempio n. 50
0
    def _dispatch_command(self, target, message):
        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        elif instructions['command'] == commands.ROUTER_DEBUG:
            router_id = instructions['router_id']
            reason = instructions.get('reason')
            if router_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all routers with %r'),
                    router_id)
            else:
                LOG.info(_LI('Placing router %s in debug mode (reason: %s)'),
                         router_id, reason)
                self.db_api.enable_router_debug(router_id, reason)

        elif instructions['command'] == commands.ROUTER_MANAGE:
            router_id = instructions['router_id']
            try:
                self.db_api.disable_router_debug(router_id)
                LOG.info(_LI('Resuming management of router %s'), router_id)
            except KeyError:
                pass
            try:
                self._router_locks[router_id].release()
                LOG.info(_LI('Unlocked router %s'), router_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in self._EVENT_COMMANDS:
            new_msg = event.Event(
                tenant_id=message.tenant_id,
                router_id=message.router_id,
                crud=self._EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], message.tenant_id)
            self.handle_message(new_msg.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], message.tenant_id)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)
Esempio n. 51
0
    def get_state_machines(self, message, worker_context):
        """Return the state machines and the queue for sending it messages for
        the router being addressed by the message.
        """
        router_id = message.router_id
        if not router_id:
            LOG.error(_LE('Cannot get state machine for message with '
                          'no router_id'))
            raise InvalidIncomingMessage()

        # Ignore messages to deleted routers.
        if self.state_machines.has_been_deleted(router_id):
            LOG.debug('dropping message for deleted router')
            return []

        state_machines = []

        # Send to all of our routers.
        if router_id == '*':
            LOG.debug('routing to all state machines')
            state_machines = self.state_machines.values()

        # Send to routers that have an ERROR status
        elif router_id == 'error':
            state_machines = [
                sm for sm in self.state_machines.values()
                if sm.has_error()
            ]
            LOG.debug('routing to %d errored state machines',
                      len(state_machines))

        # Create a new state machine for this router.
        elif router_id not in self.state_machines:
            LOG.debug('creating state machine for %s', router_id)

            def deleter():
                self._delete_router(router_id)

            sm = state.Automaton(
                router_id=router_id,
                tenant_id=self.tenant_id,
                delete_callback=deleter,
                bandwidth_callback=self._report_bandwidth,
                worker_context=worker_context,
                queue_warning_threshold=self._queue_warning_threshold,
                reboot_error_threshold=self._reboot_error_threshold,
            )
            self.state_machines[router_id] = sm
            state_machines = [sm]

        # Send directly to an existing router.
        elif router_id:
            sm = self.state_machines[router_id]
            state_machines = [sm]

        # Filter out any deleted state machines.
        return [
            machine
            for machine in state_machines
            if (not machine.deleted and
                not self.state_machines.has_been_deleted(machine.router_id))
        ]