Example #1
0
    def delete_instances_and_wait(self, instance_infos):
        """Deletes the nova instance and waits for its deletion to complete"""
        to_poll = list(instance_infos)

        for inst in instance_infos:
            try:
                self.destroy_instance(inst)
            except novaclient_exceptions.NotFound:
                pass
            except Exception:
                LOG.exception(_LE('Error deleting instance %s' % inst.id_))
                to_poll.remove(inst)

        # XXX parallelize this
        timed_out = []
        for inst in to_poll:
            start = time.time()
            i = 0
            while time.time() - start < cfg.CONF.boot_timeout:
                i += 1
                if not self.get_instance_by_id(inst.id_):
                    LOG.debug('Instance %s has been deleted', inst.id_)
                    break
                LOG.debug('Instance %s has not finished stopping', inst.id_)
                time.sleep(cfg.CONF.retry_delay)
            else:
                timed_out.append(inst)
                LOG.error(_LE('Instance %s failed to stop within %d secs'),
                          inst.id_, cfg.CONF.boot_timeout)

        if timed_out:
            raise NovaInstanceDeleteTimeout()
Example #2
0
    def stop(self, worker_context):
        """Attempts to destroy the instance with configured timeout.

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.driver.delete_ports(worker_context)

        if not self.instance_info:
            self.log.info(_LI('Instance already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            worker_context.nova_client.destroy_instance(self.instance_info)
        except Exception:
            self.log.exception(_LE('Error deleting router instance'))

        start = time.time()
        i = 0
        while time.time() - start < cfg.CONF.boot_timeout:
            i += 1
            if not worker_context.nova_client.\
                    get_instance_by_id(self.instance_info.id_):
                if self.state != states.GONE:
                    self.state = states.DOWN
                return self.state
            self.log.debug('Router has not finished stopping')
            time.sleep(cfg.CONF.retry_delay)
        self.log.error(_LE(
            'Router failed to stop within %d secs'),
            cfg.CONF.boot_timeout)
Example #3
0
    def delete_instances_and_wait(self, instance_infos):
        """Deletes the nova instance and waits for its deletion to complete"""
        to_poll = list(instance_infos)

        for inst in instance_infos:
            try:
                self.destroy_instance(inst)
            except novaclient_exceptions.NotFound:
                pass
            except Exception:
                LOG.exception(
                    _LE('Error deleting instance %s' % inst.id_))
                to_poll.remove(inst)

        # XXX parallelize this
        timed_out = []
        for inst in to_poll:
            start = time.time()
            i = 0
            while time.time() - start < cfg.CONF.boot_timeout:
                i += 1
                if not self.get_instance_by_id(inst.id_):
                    LOG.debug('Instance %s has been deleted', inst.id_)
                    break
                LOG.debug(
                    'Instance %s has not finished stopping', inst.id_)
                time.sleep(cfg.CONF.retry_delay)
            else:
                timed_out.append(inst)
                LOG.error(_LE(
                    'Instance %s failed to stop within %d secs'),
                    inst.id_, cfg.CONF.boot_timeout)

        if timed_out:
            raise NovaInstanceDeleteTimeout()
Example #4
0
def get_bridge_for_iface(root_helper, iface):
    args = ["ovs-vsctl", "--timeout=2", "iface-to-br", iface]
    try:
        return utils.execute(args, root_helper=root_helper).strip()
    except Exception:
        LOG.exception(_LE("Interface %s not found."), iface)
        return None
Example #5
0
    def _check_del_instances(self, pools):
        """Scans the pool for deleted instances and checks deletion timers"""
        # XXX: What do we do with instances stuck in deleting?
        # For now, just return stuck instances to caller and we can figure
        # out what to do with them later.
        stuck_instances = []
        del_instances = []
        for resource, pool in pools.items():
            del_instances += [i for i in pool if i.status == DELETING]

        # clean out counters for old instances that have been deleted entirely
        if self._delete_counters:
            del_instance_ids = [i.id for i in del_instances]
            for inst_id in copy.copy(self._delete_counters):
                if inst_id not in del_instance_ids:
                    self._delete_counters.pop(inst_id)

        for del_inst in del_instances:
            if del_inst.id not in self._delete_counters:
                self._delete_counters[del_inst.id] = timeutils.utcnow()
            else:
                if timeutils.is_older_than(self._delete_counters[del_inst.id],
                                           self.delete_timeout):
                    LOG.error(_LE(
                        'Instance %s is stuck in %s for more than %s '
                        'seconds.'), i.id, DELETING, self.delete_timeout)
                    stuck_instances.append(del_inst)
        return stuck_instances
Example #6
0
    def update(self, worker_context):
        "Called when the router config should be changed"
        while self._queue:
            while True:
                if self.deleted:
                    self.driver.log.debug("skipping update because the router is being deleted")
                    return

                try:
                    self.driver.log.debug(
                        "%s.execute(%s) instance.state=%s", self.state, self.action, self.instance.state
                    )
                    self.action = self.state.execute(self.action, worker_context)
                    self.driver.log.debug(
                        "%s.execute -> %s instance.state=%s", self.state, self.action, self.instance.state
                    )
                except:
                    self.driver.log.exception(_LE("%s.execute() failed for action: %s"), self.state, self.action)

                old_state = self.state
                self.state = self.state.transition(self.action, worker_context)
                self.driver.log.debug(
                    "%s.transition(%s) -> %s instance.state=%s", old_state, self.action, self.state, self.instance.state
                )

                # Yield control each time we stop to figure out what
                # to do next.
                if isinstance(self.state, CalcAction):
                    return  # yield

                # We have reached the exit state, so the router has
                # been deleted somehow.
                if isinstance(self.state, Exit):
                    self._do_delete()
                    return
Example #7
0
    def __call__(self, req):
        try:
            if req.method != 'PUT':
                return webob.exc.HTTPMethodNotAllowed()

            args = filter(None, req.path.split('/'))
            if not args:
                return webob.exc.HTTPNotFound()

            command, _, _ = self.ctl.command_manager.find_command(args)
            if command.interactive:
                return webob.exc.HTTPNotImplemented()

            return str(self.ctl.run(['--debug'] + args))
        except SystemExit:
            # cliff invokes -h (help) on argparse failure
            # (which in turn results in sys.exit call)
            return webob.exc.HTTPBadRequest()
        except ValueError:
            return webob.exc.HTTPNotFound()
        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = _('An unknown error has occurred. '
                    'Please try your request again.')
            return webob.exc.HTTPInternalServerError(
                explanation=six.text_type(msg))
Example #8
0
def get_bridges(root_helper):
    args = ["ovs-vsctl", "--timeout=2", "list-br"]
    try:
        return utils.execute(args, root_helper=root_helper).strip().split("\n")
    except Exception:
        LOG.exception(_LE("Unable to retrieve bridges."))
        return []
    def boot(self, worker_context):
        """Boots the instances with driver pre/post boot hooks.

        :returns: None
        """
        self.log.info('Booting %s' % self.resource.RESOURCE_NAME)

        if self.state != states.DEGRADED:
            self.state = states.DOWN
            self._boot_counter.start()

        # driver preboot hook
        self.resource.pre_boot(worker_context)

        try:
            self.instances.create(worker_context)
            if not self.instances:
                self.log.info(_LI('Previous instances are still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                return
        except:
            self.log.exception(_LE('Instances failed to start boot'))
        else:
            self.state = states.BOOTING

        # driver post boot hook
        self.resource.post_boot(worker_context)
    def boot(self, worker_context):
        """Boots the instances with driver pre/post boot hooks.

        :returns: None
        """
        self.log.info('Booting %s' % self.resource.RESOURCE_NAME)

        if self.state != states.DEGRADED:
            self.state = states.DOWN
            self._boot_counter.start()

        # driver preboot hook
        self.resource.pre_boot(worker_context)

        try:
            self.instances.create(worker_context)
            if not self.instances:
                self.log.info(_LI('Previous instances are still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                return
        except:
            self.log.exception(_LE('Instances failed to start boot'))
        else:
            self.state = states.BOOTING

        # driver post boot hook
        self.resource.post_boot(worker_context)
Example #11
0
def get_bridges(root_helper):
    args = ["ovs-vsctl", "--timeout=2", "list-br"]
    try:
        return utils.execute(args, root_helper=root_helper).strip().split("\n")
    except Exception:
        LOG.exception(_LE("Unable to retrieve bridges."))
        return []
Example #12
0
def get_bridge_for_iface(root_helper, iface):
    args = ["ovs-vsctl", "--timeout=2", "iface-to-br", iface]
    try:
        return utils.execute(args, root_helper=root_helper).strip()
    except Exception:
        LOG.exception(_LE("Interface %s not found."), iface)
        return None
 def _update_config(self, instance, config):
     self.log.debug(
         'Updating config for instance %s on resource %s',
         instance.id_, self.resource.id)
     self.log.debug('New config: %r', config)
     attempts = cfg.CONF.max_retries
     for i in six.moves.range(attempts):
         try:
             self.resource.update_config(
                 instance.management_address,
                 config)
         except Exception:
             if i == attempts - 1:
                 # Only log the traceback if we encounter it many times.
                 self.log.exception(_LE('failed to update config'))
             else:
                 self.log.debug(
                     'failed to update config, attempt %d',
                     i
                 )
             time.sleep(cfg.CONF.retry_delay)
         else:
             self.log.info('Instance config updated')
             return True
     else:
         return False
Example #14
0
    def _check_del_instances(self, pools):
        """Scans the pool for deleted instances and checks deletion timers"""
        # XXX: What do we do with instances stuck in deleting?
        # For now, just return stuck instances to caller and we can figure
        # out what to do with them later.
        stuck_instances = []
        del_instances = []
        for resource, pool in pools.items():
            del_instances += [i for i in pool if i.status == DELETING]

        # clean out counters for old instances that have been deleted entirely
        if self._delete_counters:
            del_instance_ids = [i.id for i in del_instances]
            for inst_id in copy.copy(self._delete_counters):
                if inst_id not in del_instance_ids:
                    self._delete_counters.pop(inst_id)

        for del_inst in del_instances:
            if del_inst.id not in self._delete_counters:
                self._delete_counters[del_inst.id] = timeutils.utcnow()
            else:
                if timeutils.is_older_than(self._delete_counters[del_inst.id],
                                           self.delete_timeout):
                    LOG.error(
                        _LE('Instance %s is stuck in %s for more than %s '
                            'seconds.'), i.id, DELETING, self.delete_timeout)
                    stuck_instances.append(del_inst)
        return stuck_instances
Example #15
0
def get_instance_provider(provider):
    try:
        return INSTANCE_PROVIDERS[provider]
    except KeyError:
        default = INSTANCE_PROVIDERS['default']
        LOG.error(_LE('Could not find %s instance provider, using default %s'),
                  provider, default)
        return default
Example #16
0
 def run_vsctl(self, args):
     full_args = ["ovs-vsctl", "--timeout=2"] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(_LE(
             "Unable to execute %(cmd)s. Exception: %(exception)s"),
             {'cmd': full_args, 'exception': e})
Example #17
0
 def run_ofctl(self, cmd, args):
     full_args = ["ovs-ofctl", cmd, self.br_name] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(_LE(
             "Unable to execute %(cmd)s. Exception: %(exception)s"),
             {'cmd': full_args, 'exception': e})
Example #18
0
 def shutdown(self):
     LOG.info('shutting down')
     for resource_id, sm in self.state_machines.items():
         try:
             sm.service_shutdown()
         except Exception:
             LOG.exception(_LE('Failed to shutdown state machine for %s'),
                           resource_id)
Example #19
0
def get_instance_provider(provider):
    try:
        return INSTANCE_PROVIDERS[provider]
    except KeyError:
        default = INSTANCE_PROVIDERS['default']
        LOG.error(_LE('Could not find %s instance provider, using default %s'),
                  provider, default)
        return default
Example #20
0
 def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
     """Unplug the interface."""
     device = ip_lib.IPDevice(device_name, self.root_helper, namespace)
     try:
         device.link.delete()
         LOG.debug("Unplugged interface '%s'", device_name)
     except RuntimeError:
         LOG.exception(_LE(
             "Failed unplugging interface '%s'"), device_name)
Example #21
0
 def shutdown(self):
     LOG.info('shutting down')
     for resource_id, sm in self.state_machines.items():
         try:
             sm.service_shutdown()
         except Exception:
             LOG.exception(_LE(
                 'Failed to shutdown state machine for %s'), resource_id
             )
Example #22
0
 def get_xapi_iface_id(self, xs_vif_uuid):
     args = ["xe", "vif-param-get", "param-name=other-config",
             "param-key=nicira-iface-id", "uuid=%s" % xs_vif_uuid]
     try:
         return utils.execute(args, root_helper=self.root_helper).strip()
     except Exception, e:
         LOG.error(_LE(
             "Unable to execute %(cmd)s. Exception: %(exception)s"),
             {'cmd': args, 'exception': e})
Example #23
0
 def run_vsctl(self, args):
     full_args = ["ovs-vsctl", "--timeout=2"] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': full_args,
                 'exception': e
             })
Example #24
0
 def run_ofctl(self, cmd, args):
     full_args = ["ovs-ofctl", cmd, self.br_name] + args
     try:
         return utils.execute(full_args, root_helper=self.root_helper)
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': full_args,
                 'exception': e
             })
Example #25
0
 def _check_err_instances(self, pools):
     """Scans the pool and deletes any instances in error state"""
     for resource, pool in copy.copy(pools).items():
         err_instances = [i for i in pool if i.status == ERROR]
         for err_inst in err_instances:
             LOG.error(_LE('Instance %s is in %s state, deleting.'), i.id,
                       ERROR)
             del_instance = self.delete_instance(err_inst.id)
             i = pool.index(err_inst)
             pools[resource][i] = del_instance
Example #26
0
    def update(self, worker_context):
        "Called when the router config should be changed"
        while self._queue:
            while True:
                if self.deleted:
                    self.resource.log.debug(
                        'skipping update because the router is being deleted'
                    )
                    return

                try:
                    self.resource.log.debug(
                        '%s.execute(%s) instance.state=%s',
                        self.state,
                        self.action,
                        self.instance.state)
                    self.action = self.state.execute(
                        self.action,
                        worker_context,
                    )
                    self.resource.log.debug(
                        '%s.execute -> %s instance.state=%s',
                        self.state,
                        self.action,
                        self.instance.state)
                except:
                    self.resource.log.exception(
                        _LE('%s.execute() failed for action: %s'),
                        self.state,
                        self.action
                    )

                old_state = self.state
                self.state = self.state.transition(
                    self.action,
                    worker_context,
                )
                self.resource.log.debug(
                    '%s.transition(%s) -> %s instance.state=%s',
                    old_state,
                    self.action,
                    self.state,
                    self.instance.state
                )

                # Yield control each time we stop to figure out what
                # to do next.
                if isinstance(self.state, CalcAction):
                    return  # yield

                # We have reached the exit state, so the router has
                # been deleted somehow.
                if isinstance(self.state, Exit):
                    self._do_delete()
                    return
Example #27
0
 def _check_err_instances(self, pools):
     """Scans the pool and deletes any instances in error state"""
     for resource, pool in copy.copy(pools).items():
         err_instances = [i for i in pool if i.status == ERROR]
         for err_inst in err_instances:
             LOG.error(_LE(
                 'Instance %s is in %s state, deleting.'),
                 i.id, ERROR)
             del_instance = self.delete_instance(err_inst.id)
             i = pool.index(err_inst)
             pools[resource][i] = del_instance
Example #28
0
 def get_xapi_iface_id(self, xs_vif_uuid):
     args = [
         "xe", "vif-param-get", "param-name=other-config",
         "param-key=nicira-iface-id",
         "uuid=%s" % xs_vif_uuid
     ]
     try:
         return utils.execute(args, root_helper=self.root_helper).strip()
     except Exception, e:
         LOG.error(
             _LE("Unable to execute %(cmd)s. Exception: %(exception)s"), {
                 'cmd': args,
                 'exception': e
             })
Example #29
0
    def __init__(self, conf):
        self.conf = conf
        ks_session = keystone.KeystoneSession()
        self.client = client.Client(version='2',
                                    session=ks_session.session,
                                    region_name=conf.auth_region)

        try:
            self.instance_provider = get_instance_provider(
                conf.instance_provider)(self.client)
        except AttributeError:
            default = INSTANCE_PROVIDERS['default']
            LOG.error(_LE('Could not find provider config, using default %s'),
                      default)
            self.instance_provider = default(self.client)
Example #30
0
    def __init__(self, conf):
        self.conf = conf
        ks_session = keystone.KeystoneSession()
        self.client = client.Client(
            version='2',
            session=ks_session.session,
            region_name=conf.auth_region)

        try:
            self.instance_provider = get_instance_provider(
                conf.instance_provider)(self.client)
        except AttributeError:
            default = INSTANCE_PROVIDERS['default']
            LOG.error(_LE('Could not find provider config, using default %s'),
                      default)
            self.instance_provider = default(self.client)
Example #31
0
    def _load_resource_from_message(self, worker_context, message):
        if cfg.CONF.enable_byonf:
            byonf_res = worker_context.neutron.tenant_has_byo_for_function(
                tenant_id=self.tenant_id.replace('-', ''),
                function_type=message.resource.driver)

            if byonf_res:
                try:
                    return drivers.load_from_byonf(worker_context, byonf_res,
                                                   message.resource.id)
                except drivers.InvalidDriverException:
                    LOG.exception(
                        _LE('Could not load BYONF driver, falling back to '
                            'configured image'))
                    pass

        return drivers.get(message.resource.driver)(worker_context,
                                                    message.resource.id)
Example #32
0
 def _send(self, ready):
     """Deliver notification messages from the in-process queue
     to the appropriate topic via the AMQP service.
     """
     # setup notifier driver ahead a time
     self.get_notifier()
     # Tell the start() method that we have set up the AMQP
     # communication stuff and are ready to do some work.
     ready.set()
     while True:
         msg = self._q.get()
         if msg is None:
             break
         LOG.debug('sending notification %r', msg)
         try:
             self.send(event_type=msg['event_type'], message=msg['payload'])
         except Exception:
             LOG.exception(_LE('could not publish notification'))
Example #33
0
 def _send(self, ready):
     """Deliver notification messages from the in-process queue
     to the appropriate topic via the AMQP service.
     """
     # setup notifier driver ahead a time
     self.get_notifier()
     # Tell the start() method that we have set up the AMQP
     # communication stuff and are ready to do some work.
     ready.set()
     while True:
         msg = self._q.get()
         if msg is None:
             break
         LOG.debug('sending notification %r', msg)
         try:
             self.send(event_type=msg['event_type'], message=msg['payload'])
         except Exception:
             LOG.exception(_LE('could not publish notification'))
Example #34
0
    def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
        """Unplug the interface."""
        if not bridge:
            bridge = self.conf.ovs_integration_bridge

        tap_name = self._get_tap_name(device_name, prefix)
        self.check_bridge_exists(bridge)
        ovs = ovs_lib.OVSBridge(bridge, self.root_helper)

        try:
            ovs.delete_port(tap_name)
            if self.conf.ovs_use_veth:
                device = ip_lib.IPDevice(device_name,
                                         self.root_helper,
                                         namespace)
                device.link.delete()
                LOG.debug(_("Unplugged interface '%s'"), device_name)
        except RuntimeError:
            LOG.exception(_LE("Failed unplugging interface '%s'"), device_name)
Example #35
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
Example #36
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
Example #37
0
    def boot(self, worker_context):
        """Boots the instance with driver pre/post boot hooks.

        :returns: None
        """
        self._ensure_cache(worker_context)

        self.log.info('Booting %s' % self.driver.RESOURCE_NAME)
        self.state = states.DOWN
        self._boot_counter.start()

        # driver preboot hook
        self.driver.pre_boot(worker_context)

        # try to boot the instance
        try:
            instance_info = worker_context.nova_client.boot_instance(
                resource_type=self.driver.RESOURCE_NAME,
                prev_instance_info=self.instance_info,
                name=self.driver.name,
                image_uuid=self.driver.image_uuid,
                flavor=self.driver.flavor,
                make_ports_callback=self.driver.make_ports(worker_context)
            )
            if not instance_info:
                self.log.info(_LI('Previous instance is still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                self.instance_info = None
                return
        except:
            self.log.exception(_LE('Instance failed to start boot'))
            self.driver.delete_ports(worker_context)
        else:
            # We have successfully started a (re)boot attempt so
            # record the timestamp so we can report how long it takes.
            self.state = states.BOOTING
            self.instance_info = instance_info

        # driver post boot hook
        self.driver.post_boot(worker_context)
Example #38
0
    def _load_resource_from_message(self, worker_context, message):
        if cfg.CONF.enable_byonf:
            byonf_res = worker_context.neutron.tenant_has_byo_for_function(
                tenant_id=self.tenant_id.replace('-', ''),
                function_type=message.resource.driver)

            if byonf_res:
                try:
                    return drivers.load_from_byonf(
                        worker_context,
                        byonf_res,
                        message.resource.id)
                except drivers.InvalidDriverException:
                    LOG.exception(_LE(
                        'Could not load BYONF driver, falling back to '
                        'configured image'))
                    pass

        return drivers.get(message.resource.driver)(
            worker_context, message.resource.id)
Example #39
0
    def __call__(self, req):
        """Inital handler for an incoming `webob.Request`.

        :param req: The webob.Request to handle
        :returns: returns a valid HTTP Response or Error
        """
        try:
            LOG.debug("Request: %s", req)

            instance_id = self._get_instance_id(req)
            if instance_id:
                return self._proxy_request(instance_id, req)
            else:
                return webob.exc.HTTPNotFound()

        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = ('An unknown error has occurred. '
                   'Please try your request again.')
            return webob.exc.HTTPInternalServerError(explanation=unicode(msg))
Example #40
0
    def __call__(self, req):
        """Inital handler for an incoming `webob.Request`.

        :param req: The webob.Request to handle
        :returns: returns a valid HTTP Response or Error
        """
        try:
            LOG.debug("Request: %s", req)

            instance_id = self._get_instance_id(req)
            if instance_id:
                return self._proxy_request(instance_id, req)
            else:
                return webob.exc.HTTPNotFound()

        except Exception:
            LOG.exception(_LE("Unexpected error."))
            msg = ('An unknown error has occurred. '
                   'Please try your request again.')
            return webob.exc.HTTPInternalServerError(
                explanation=six.text_type(msg))
 def _update_config(self, instance, config):
     self.log.debug('Updating config for instance %s on resource %s',
                    instance.id_, self.resource.id)
     self.log.debug('New config: %r', config)
     attempts = cfg.CONF.max_retries
     for i in six.moves.range(attempts):
         try:
             self.resource.update_config(instance.management_address,
                                         config)
         except Exception:
             if i == attempts - 1:
                 # Only log the traceback if we encounter it many times.
                 self.log.exception(_LE('failed to update config'))
             else:
                 self.log.debug('failed to update config, attempt %d', i)
             time.sleep(cfg.CONF.retry_delay)
         else:
             self.log.info('Instance config updated')
             return True
     else:
         return False
    def stop(self, worker_context):
        """Attempts to destroy the instance cluster

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.resource.delete_ports(worker_context)

        if not self.instances:
            self.log.info(_LI('Instance(s) already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            self.instances.destroy(worker_context)
            if self.state != states.GONE:
                self.state = states.DOWN
        except Exception:
            self.log.exception(_LE('Failed to stop instance(s)'))
    def stop(self, worker_context):
        """Attempts to destroy the instance cluster

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.resource.delete_ports(worker_context)

        if not self.instances:
            self.log.info(_LI('Instance(s) already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            self.instances.destroy(worker_context)
            if self.state != states.GONE:
                self.state = states.DOWN
        except Exception:
            self.log.exception(_LE('Failed to stop instance(s)'))
Example #44
0
def _worker(inq, worker_factory, scheduler, proc_name):
    """Scheduler's worker process main function.
    """
    daemon.ignore_signals()
    LOG.debug('starting worker process')
    worker = worker_factory(scheduler=scheduler, proc_name=proc_name)
    while True:
        try:
            data = inq.get()
        except IOError:
            # NOTE(dhellmann): Likely caused by a signal arriving
            # during processing, especially SIGCHLD.
            data = None
        if data is None:
            target, message = None, None
        else:
            target, message = data
        try:
            worker.handle_message(target, message)
        except Exception:
            LOG.exception(_LE('Error processing data %s'), unicode(data))
        if data is None:
            break
    LOG.debug('exiting')
Example #45
0
def _worker(inq, worker_factory, scheduler, proc_name):
    """Scheduler's worker process main function.
    """
    daemon.ignore_signals()
    LOG.debug('starting worker process')
    worker = worker_factory(scheduler=scheduler, proc_name=proc_name)
    while True:
        try:
            data = inq.get()
        except IOError:
            # NOTE(dhellmann): Likely caused by a signal arriving
            # during processing, especially SIGCHLD.
            data = None
        if data is None:
            target, message = None, None
        else:
            target, message = data
        try:
            worker.handle_message(target, message)
        except Exception:
            LOG.exception(_LE('Error processing data %s'), six.text_type(data))
        if data is None:
            break
    LOG.debug('exiting')
Example #46
0
    def get_state_machines(self, message, worker_context):
        """Return the state machines and the queue for sending it messages for
        the logical resource being addressed by the message.
        """
        if (not message.resource or
           (message.resource and not message.resource.id)):
                LOG.error(_LE(
                    'Cannot get state machine for message with '
                    'no message.resource'))
                raise InvalidIncomingMessage()

        state_machines = []

        # Send to all of our resources.
        if message.resource.id == '*':
            LOG.debug('routing to all state machines')
            state_machines = self.state_machines.values()

        # Ignore messages to deleted resources.
        elif self.state_machines.has_been_deleted(message.resource.id):
            LOG.debug('dropping message for deleted resource')
            return []

        # Send to resources that have an ERROR status
        elif message.resource.id == 'error':
            state_machines = [
                sm for sm in self.state_machines.values()
                if sm.has_error()
            ]
            LOG.debug('routing to %d errored state machines',
                      len(state_machines))

        # Create a new state machine for this router.
        elif message.resource.id not in self.state_machines:
            LOG.debug('creating state machine for %s', message.resource.id)

            # load the driver
            if not message.resource.driver:
                LOG.error(_LE('cannot create state machine without specifying'
                              'a driver.'))
                return []

            driver_obj = \
                drivers.get(message.resource.driver)(worker_context,
                                                     message.resource.id)

            if not driver_obj:
                # this means the driver didn't load for some reason..
                # this might not be needed at all.
                LOG.debug('for some reason loading the driver failed')
                return []

            def deleter():
                self._delete_resource(message.resource)

            new_state_machine = state.Automaton(
                driver=driver_obj,
                resource_id=message.resource.id,
                tenant_id=self.tenant_id,
                delete_callback=deleter,
                bandwidth_callback=self._report_bandwidth,
                worker_context=worker_context,
                queue_warning_threshold=self._queue_warning_threshold,
                reboot_error_threshold=self._reboot_error_threshold,
            )
            self.state_machines[message.resource.id] = new_state_machine
            state_machines = [new_state_machine]

        # Send directly to an existing router.
        elif message.resource.id:
            state_machines = [self.state_machines[message.resource.id]]

        # Filter out any deleted state machines.
        return [
            machine
            for machine in state_machines
            if (not machine.deleted and
                not self.state_machines.has_been_deleted(machine.resource_id))
        ]
Example #47
0
    def _dispatch_command(self, target, message):
        if not self._should_process_command(message):
            return

        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        # NOTE(adam_g): Drop 'router-debug' compat in M.
        elif (instructions['command'] == commands.RESOURCE_DEBUG or
              instructions['command'] == commands.ROUTER_DEBUG):

            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to debug resource with no id'))
                return
            reason = instructions.get('reason')
            if resource_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all resources with %r'),
                    resource_id)
            else:
                LOG.info(_LI('Placing resource %s in debug mode (reason: %s)'),
                         resource_id, reason)
                self.db_api.enable_resource_debug(resource_id, reason)

        elif (instructions['command'] == commands.RESOURCE_MANAGE or
              instructions['command'] == commands.ROUTER_MANAGE):
            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to manage resource with no id'))
                return
            try:
                self.db_api.disable_resource_debug(resource_id)
                LOG.info(_LI('Resuming management of resource %s'),
                         resource_id)
            except KeyError:
                pass
            try:
                self._resource_locks[resource_id].release()
                LOG.info(_LI('Unlocked resource %s'), resource_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in EVENT_COMMANDS:
            resource_id = instructions.get('resource_id')
            sm = self._find_state_machine_by_resource_id(resource_id)
            if not sm:
                LOG.debug(
                    'Will not process command, no managed state machine '
                    'found for resource %s', resource_id)
                return
            new_res = event.Resource(
                id=resource_id,
                driver=sm.driver.RESOURCE_NAME,
                tenant_id=sm.tenant_id)
            new_msg = event.Event(
                resource=new_res,
                crud=EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_res)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_res)

        # NOTE(adam_g): This is here to support the deprecated old format of
        #               sending commands to specific routers and can be
        #               removed once the CLI component is dropped in M.
        elif instructions['command'] in DEPRECATED_ROUTER_COMMANDS:
            new_rsc = event.Resource(
                driver=drivers.router.Router.RESOURCE_NAME,
                id=message.body.get('router_id'),
                tenant_id=message.body.get('tenant_id'),
            )
            new_msg = event.Event(
                resource=new_rsc,
                crud=DEPRECATED_ROUTER_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_rsc)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_rsc)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)
Example #48
0
    def _thread_target(self):
        """This method runs in each worker thread.
        """
        my_id = threading.current_thread().name
        LOG.debug('starting thread')
        # Use a separate context from the one we use when receiving
        # messages and talking to the tenant router manager because we
        # are in a different thread and the clients are not
        # thread-safe.
        context = WorkerContext(self.management_address)
        while self._keep_going:
            try:
                # Try to get a state machine from the work queue. If
                # there's nothing to do, we will block for a while.
                self._thread_status[my_id] = 'waiting for task'
                sm = self.work_queue.get(timeout=10)
            except Queue.Empty:
                continue
            if sm is None:
                LOG.info(_LI('received stop message'))
                break

            # Make sure we didn't already have some updates under way
            # for a router we've been told to ignore for debug mode.
            should_ignore, reason = \
                self.db_api.resource_in_debug(sm.resource_id)
            if should_ignore:
                LOG.debug('Skipping update of resource %s in debug mode. '
                          '(reason: %s)', sm.resource_id, reason)
                continue

            # In the event that a rebalance took place while processing an
            # event, it may have been put back into the work queue. Check
            # the hash table once more to find out if we still manage it
            # and do some cleanup if not.
            if cfg.CONF.coordination.enabled:
                target_hosts = self.hash_ring_mgr.ring.get_hosts(
                    sm.resource_id)
                if self.host not in target_hosts:
                    LOG.debug('Skipping update of router %s, it no longer '
                              'maps here.', sm.resource_id)
                    trm = self.tenant_managers[sm.tenant_id]
                    trm.unmanage_resource(sm.resource_id)
                    self.work_queue.task_done()
                    with self.lock:
                        self._release_resource_lock(sm)
                    continue

            # FIXME(dhellmann): Need to look at the router to see if
            # it belongs to a tenant which is in debug mode, but we
            # don't have that data in the sm, yet.
            LOG.debug('performing work on %s for tenant %s',
                      sm.resource_id, sm.tenant_id)
            try:
                self._thread_status[my_id] = 'updating %s' % sm.resource_id
                sm.update(context)
            except:
                LOG.exception(_LE('could not complete update for %s'),
                              sm.resource_id)
            finally:
                self._thread_status[my_id] = (
                    'finalizing task for %s' % sm.resource_id
                )
                self.work_queue.task_done()
                with self.lock:
                    # Release the lock that prevents us from adding
                    # the state machine back into the queue. If we
                    # find more work, we will re-acquire it. If we do
                    # not find more work, we hold the primary work
                    # queue lock so the main thread cannot put the
                    # state machine back into the queue until we
                    # release that lock.
                    self._release_resource_lock(sm)
                    # The state machine has indicated that it is done
                    # by returning. If there is more work for it to
                    # do, reschedule it by placing it at the end of
                    # the queue.
                    if sm.has_more_work():
                        LOG.debug('%s has more work, returning to work queue',
                                  sm.resource_id)
                        self._add_resource_to_work_queue(sm)
                    else:
                        LOG.debug('%s has no more work', sm.resource_id)
        # Return the context object so tests can look at it
        self._thread_status[my_id] = 'exiting'
        return context
Example #49
0
    def configure(self, worker_context):
        """Pushes config to instance

        :param worker_context:
        :param failure_state:
        :param attempts:
        :returns:
        """
        self.log.debug('Begin instance config')
        self.state = states.UP
        attempts = cfg.CONF.max_retries

        if self.driver.get_state(worker_context) == states.GONE:
            return states.GONE

        interfaces = self.driver.get_interfaces(
            self.instance_info.management_address)

        if not self._verify_interfaces(self.driver.ports, interfaces):
            self.log.debug("Interfaces aren't plugged as expected.")
            self.state = states.REPLUG
            return self.state

        # TODO(mark): We're in the first phase of VRRP, so we need
        # map the interface to the network ID.
        # Eventually we'll send VRRP data and real interface data
        port_mac_to_net = {
            p.mac_address: p.network_id
            for p in self.instance_info.ports
        }
        # Add in the management port
        mgt_port = self.instance_info.management_port
        port_mac_to_net[mgt_port.mac_address] = mgt_port.network_id
        # this is a network to logical interface id
        iface_map = {
            port_mac_to_net[i['lladdr']]: i['ifname']
            for i in interfaces if i['lladdr'] in port_mac_to_net
        }

        # sending all the standard config over to the driver for final updates
        config = self.driver.build_config(
            worker_context,
            mgt_port,
            iface_map
        )

        self.log.debug('preparing to update config to %r', config)

        for i in six.moves.range(attempts):
            try:
                self.driver.update_config(
                    self.instance_info.management_address,
                    config)
            except Exception:
                if i == attempts - 1:
                    # Only log the traceback if we encounter it many times.
                    self.log.exception(_LE('failed to update config'))
                else:
                    self.log.debug(
                        'failed to update config, attempt %d',
                        i
                    )
                time.sleep(cfg.CONF.retry_delay)
            else:
                self.state = states.CONFIGURED
                self.log.info('Instance config updated')
                return self.state
        else:
            self.state = states.RESTART
            return self.state
Example #50
0
    def _thread_target(self):
        """This method runs in each worker thread.
        """
        my_id = threading.current_thread().name
        LOG.debug('starting thread')
        # Use a separate context from the one we use when receiving
        # messages and talking to the tenant router manager because we
        # are in a different thread and the clients are not
        # thread-safe.
        context = WorkerContext(self.management_address)
        while self._keep_going:
            try:
                # Try to get a state machine from the work queue. If
                # there's nothing to do, we will block for a while.
                self._thread_status[my_id] = 'waiting for task'
                sm = self.work_queue.get(timeout=10)
            except Queue.Empty:
                continue
            if sm is None:
                LOG.info(_LI('received stop message'))
                break

            # Make sure we didn't already have some updates under way
            # for a router we've been told to ignore for debug mode.
            should_ignore, reason = \
                self.db_api.resource_in_debug(sm.resource_id)
            if should_ignore:
                LOG.debug(
                    'Skipping update of resource %s in debug mode. '
                    '(reason: %s)', sm.resource_id, reason)
                continue

            # In the event that a rebalance took place while processing an
            # event, it may have been put back into the work queue. Check
            # the hash table once more to find out if we still manage it
            # and do some cleanup if not.
            if cfg.CONF.coordination.enabled:
                target_hosts = self.hash_ring_mgr.ring.get_hosts(
                    sm.resource_id)
                if self.host not in target_hosts:
                    LOG.debug(
                        'Skipping update of router %s, it no longer '
                        'maps here.', sm.resource_id)
                    trm = self.tenant_managers[sm.tenant_id]
                    trm.unmanage_resource(sm.resource_id)
                    self.work_queue.task_done()
                    with self.lock:
                        self._release_resource_lock(sm)
                    continue

            # FIXME(dhellmann): Need to look at the router to see if
            # it belongs to a tenant which is in debug mode, but we
            # don't have that data in the sm, yet.
            LOG.debug('performing work on %s for tenant %s', sm.resource_id,
                      sm.tenant_id)
            try:
                self._thread_status[my_id] = 'updating %s' % sm.resource_id
                sm.update(context)
            except:
                LOG.exception(_LE('could not complete update for %s'),
                              sm.resource_id)
            finally:
                self._thread_status[my_id] = ('finalizing task for %s' %
                                              sm.resource_id)
                self.work_queue.task_done()
                with self.lock:
                    # Release the lock that prevents us from adding
                    # the state machine back into the queue. If we
                    # find more work, we will re-acquire it. If we do
                    # not find more work, we hold the primary work
                    # queue lock so the main thread cannot put the
                    # state machine back into the queue until we
                    # release that lock.
                    self._release_resource_lock(sm)
                    # The state machine has indicated that it is done
                    # by returning. If there is more work for it to
                    # do, reschedule it by placing it at the end of
                    # the queue.
                    if sm.has_more_work():
                        LOG.debug('%s has more work, returning to work queue',
                                  sm.resource_id)
                        self._add_resource_to_work_queue(sm)
                    else:
                        LOG.debug('%s has no more work', sm.resource_id)
        # Return the context object so tests can look at it
        self._thread_status[my_id] = 'exiting'
        return context
Example #51
0
    def _dispatch_command(self, target, message):
        if not self._should_process_command(message):
            return

        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        # NOTE(adam_g): Drop 'router-debug' compat in M.
        elif (instructions['command'] == commands.RESOURCE_DEBUG
              or instructions['command'] == commands.ROUTER_DEBUG):

            resource_id = (instructions.get('resource_id')
                           or instructions.get('router_id'))
            if not resource_id:
                LOG.warning(
                    _LW('Ignoring instruction to debug resource with no id'))
                return
            reason = instructions.get('reason')
            if resource_id in commands.WILDCARDS:
                LOG.warning(
                    _LW('Ignoring instruction to debug all resources with %r'),
                    resource_id)
            else:
                LOG.info(_LI('Placing resource %s in debug mode (reason: %s)'),
                         resource_id, reason)
                self.db_api.enable_resource_debug(resource_id, reason)

        elif (instructions['command'] == commands.RESOURCE_MANAGE
              or instructions['command'] == commands.ROUTER_MANAGE):
            resource_id = (instructions.get('resource_id')
                           or instructions.get('router_id'))
            if not resource_id:
                LOG.warning(
                    _LW('Ignoring instruction to manage resource with no id'))
                return
            try:
                self.db_api.disable_resource_debug(resource_id)
                LOG.info(_LI('Resuming management of resource %s'),
                         resource_id)
            except KeyError:
                pass
            try:
                self._resource_locks[resource_id].release()
                LOG.info(_LI('Unlocked resource %s'), resource_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in EVENT_COMMANDS:
            resource_id = instructions.get('resource_id')
            sm = self._find_state_machine_by_resource_id(resource_id)
            if not sm:
                LOG.debug(
                    'Will not process command, no managed state machine '
                    'found for resource %s', resource_id)
                return
            new_res = event.Resource(id=resource_id,
                                     driver=sm.resource.RESOURCE_NAME,
                                     tenant_id=sm.tenant_id)
            new_msg = event.Event(
                resource=new_res,
                crud=EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_res)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'), instructions['command'],
                     new_res)

        # NOTE(adam_g): This is here to support the deprecated old format of
        #               sending commands to specific routers and can be
        #               removed once the CLI component is dropped in M.
        elif instructions['command'] in DEPRECATED_ROUTER_COMMANDS:
            new_rsc = event.Resource(
                driver=drivers.router.Router.RESOURCE_NAME,
                id=message.body.get('router_id'),
                tenant_id=message.body.get('tenant_id'),
            )
            new_msg = event.Event(
                resource=new_rsc,
                crud=DEPRECATED_ROUTER_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_rsc)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'), instructions['command'],
                     new_rsc)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(
                    _LW('Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)
Example #52
0
    def get_state_machines(self, message, worker_context):
        """Return the state machines and the queue for sending it messages for
        the logical resource being addressed by the message.
        """
        if (not message.resource
                or (message.resource and not message.resource.id)):
            LOG.error(
                _LE('Cannot get state machine for message with '
                    'no message.resource'))
            raise InvalidIncomingMessage()

        state_machines = []

        # Send to all of our resources.
        if message.resource.id == '*':
            LOG.debug('routing to all state machines')
            state_machines = self.state_machines.values()

        # Ignore messages to deleted resources.
        elif self.state_machines.has_been_deleted(message.resource.id):
            LOG.debug('dropping message for deleted resource')
            return []

        # Send to resources that have an ERROR status
        elif message.resource.id == 'error':
            state_machines = [
                sm for sm in self.state_machines.values() if sm.has_error()
            ]
            LOG.debug('routing to %d errored state machines',
                      len(state_machines))

        # Create a new state machine for this router.
        elif message.resource.id not in self.state_machines:
            LOG.debug('creating state machine for %s', message.resource.id)

            # load the driver
            if not message.resource.driver:
                LOG.error(
                    _LE('cannot create state machine without specifying'
                        'a driver.'))
                return []

            resource_obj = self._load_resource_from_message(
                worker_context, message)

            if not resource_obj:
                # this means the driver didn't load for some reason..
                # this might not be needed at all.
                LOG.debug('for some reason loading the driver failed')
                return []

            def deleter():
                self._delete_resource(message.resource)

            new_state_machine = state.Automaton(
                resource=resource_obj,
                tenant_id=self.tenant_id,
                delete_callback=deleter,
                bandwidth_callback=self._report_bandwidth,
                worker_context=worker_context,
                queue_warning_threshold=self._queue_warning_threshold,
                reboot_error_threshold=self._reboot_error_threshold,
            )
            self.state_machines[message.resource.id] = new_state_machine
            state_machines = [new_state_machine]

        # Send directly to an existing router.
        elif message.resource.id:
            state_machines = [self.state_machines[message.resource.id]]

        # Filter out any deleted state machines.
        return [
            machine for machine in state_machines
            if (not machine.deleted and
                not self.state_machines.has_been_deleted(machine.resource.id))
        ]