예제 #1
0
    def stop(self, worker_context):
        """Attempts to destroy the instance with configured timeout.

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.driver.delete_ports(worker_context)

        if not self.instance_info:
            self.log.info(_LI('Instance already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            worker_context.nova_client.destroy_instance(self.instance_info)
        except Exception:
            self.log.exception(_LE('Error deleting router instance'))

        start = time.time()
        i = 0
        while time.time() - start < cfg.CONF.boot_timeout:
            i += 1
            if not worker_context.nova_client.\
                    get_instance_by_id(self.instance_info.id_):
                if self.state != states.GONE:
                    self.state = states.DOWN
                return self.state
            self.log.debug('Router has not finished stopping')
            time.sleep(cfg.CONF.retry_delay)
        self.log.error(_LE(
            'Router failed to stop within %d secs'),
            cfg.CONF.boot_timeout)
예제 #2
0
파일: state.py 프로젝트: gocloudxyz/astara
    def send_message(self, message):
        "Called when the worker put a message in the state machine queue"
        if self.deleted:
            # Ignore any more incoming messages
            self.driver.log.debug("deleted state machine, ignoring incoming message %s", message)
            return False

        # NOTE(dhellmann): This check is largely redundant with the
        # one in CalcAction.transition() but it may allow us to avoid
        # adding poll events to the queue at all, and therefore cut
        # down on the number of times a worker thread wakes up to
        # process something on a router that isn't going to actually
        # do any work.
        if message.crud == POLL and self.instance.state == states.ERROR:
            self.driver.log.info(_LI("Resource status is ERROR, ignoring POLL message: %s"), message)
            return False

        if message.crud == REBUILD:
            if message.body.get("image_uuid"):
                self.driver.log.info(_LI("Resource is being REBUILT with custom image %s"), message.body["image_uuid"])
                self.image_uuid = message.body["image_uuid"]
            else:
                self.image_uuid = self.driver.image_uuid

        self._queue.append(message.crud)
        queue_len = len(self._queue)
        if queue_len > self._queue_warning_threshold:
            logger = self.driver.log.warning
        else:
            logger = self.driver.log.debug
        logger(_LW("incoming message brings queue length to %s"), queue_len)
        return True
예제 #3
0
 def __init__(self, id_, name, tenant_id, network_id, ip_version, cidr,
              gateway_ip, enable_dhcp, dns_nameservers, host_routes,
              ipv6_ra_mode):
     self.id = id_
     self.name = name
     self.tenant_id = tenant_id
     self.network_id = network_id
     self.ip_version = ip_version
     try:
         self.cidr = netaddr.IPNetwork(cidr)
     except (TypeError, netaddr.AddrFormatError) as e:
         raise ValueError(
             _('Invalid CIDR %r for subnet %s of network %s: %s') % (
                 cidr,
                 id_,
                 network_id,
                 e,
             ))
     try:
         self.gateway_ip = netaddr.IPAddress(gateway_ip)
     except (TypeError, netaddr.AddrFormatError) as e:
         self.gateway_ip = None
         LOG.info(_LI('Bad gateway_ip on subnet %s: %r (%s)'), id_,
                  gateway_ip, e)
     self.enable_dhcp = enable_dhcp
     self.dns_nameservers = dns_nameservers
     self.host_routes = host_routes
     self.ipv6_ra_mode = ipv6_ra_mode
예제 #4
0
    def launch_instances(self, count, driver):
        LOG.info(_LI(
            'Launching %s %s instances.'), driver.RESOURCE_NAME, count)
        for i in range(0, count):
            # NOTE: Use a fake UUID so astara-neutron's name matching still
            # catches this port as an astara port. This can be avoided if
            # we use a mgt security group in the future.
            mgt_port = self.ctxt.neutron_client.create_management_port(
                '00000000-0000-0000-0000-000000000000')
            nics = [{
                'net-id': mgt_port.network_id,
                'v4-fixed-ip': '',
                'port-id': mgt_port.id}]

            instance_name = INSTANCE_FREE % {
                'resource_name': driver.RESOURCE_NAME
            }
            image = self.images[driver.RESOURCE_NAME]
            flavor = self.flavors[driver.RESOURCE_NAME]

            self.ctxt.nova_client.client.servers.create(
                name=instance_name,
                image=image,
                flavor=flavor,
                nics=nics,
                config_drive=True,
                userdata=nova.format_userdata(mgt_port),
            )
예제 #5
0
    def start(self):
        """The pool manager main loop.

        The bulk of the algorithm exists in the 'unused_instances' property.
        This main loop simply checks for a deficit in the pool and dispatches
        a 'launch_instances' call when a deficit needs to be filled.
        """
        while True:
            cur_pools = self.unused_instances
            report = []
            for driver in self.drivers:
                report.append(
                    '%s:%s/%s' %
                    (driver.RESOURCE_NAME,
                     len(cur_pools[driver.RESOURCE_NAME]),
                     self.pool_size))
            LOG.debug('Current pools: %s' % ' '.join(report))

            for driver in self.drivers:
                cur_pool = cur_pools[driver.RESOURCE_NAME]
                deficit = self.pool_size - len(cur_pool)
                if deficit:
                    LOG.info(_LI(
                        'Need to launch %s more %s instance(s).'),
                        deficit, driver.RESOURCE_NAME)
                    self.launch_instances(
                        driver=driver, count=deficit)
            time.sleep(self.poll_interval)
예제 #6
0
 def __init__(self, id_, name, tenant_id, network_id, ip_version, cidr,
              gateway_ip, enable_dhcp, dns_nameservers, host_routes,
              ipv6_ra_mode):
     self.id = id_
     self.name = name
     self.tenant_id = tenant_id
     self.network_id = network_id
     self.ip_version = ip_version
     try:
         self.cidr = netaddr.IPNetwork(cidr)
     except (TypeError, netaddr.AddrFormatError) as e:
         raise ValueError(
             _('Invalid CIDR %r for subnet %s of network %s: %s') % (
                 cidr, id_, network_id, e,
             )
         )
     try:
         self.gateway_ip = netaddr.IPAddress(gateway_ip)
     except (TypeError, netaddr.AddrFormatError) as e:
         self.gateway_ip = None
         LOG.info(_LI(
             'Bad gateway_ip on subnet %s: %r (%s)'),
             id_, gateway_ip, e)
     self.enable_dhcp = enable_dhcp
     self.dns_nameservers = dns_nameservers
     self.host_routes = host_routes
     self.ipv6_ra_mode = ipv6_ra_mode
예제 #7
0
    def run(self, ip_address, port=cfg.CONF.astara_metadata_port):
        """Run the MetadataProxy.

        :param ip_address: the ip address to bind to for incoming requests
        :param port: the port to bind to for incoming requests
        :returns: returns nothing
        """
        app = MetadataProxyHandler()
        for i in six.moves.range(5):
            LOG.info(_LI('Starting the metadata proxy on %s:%s'), ip_address,
                     port)
            try:
                sock = eventlet.listen((ip_address, port),
                                       family=socket.AF_INET6,
                                       backlog=128)
            except socket.error as err:
                if err.errno != 99:
                    raise
                LOG.warning(_LW('Could not create metadata proxy socket: %s'),
                            err)
                LOG.warning(_LW('Sleeping %s before trying again'), i + 1)
                eventlet.sleep(i + 1)
            else:
                break
        else:
            raise RuntimeError(
                _('Could not establish metadata proxy socket on %s:%s') %
                (ip_address, port))
        eventlet.wsgi.server(sock, app, custom_pool=self.pool, log=LOG)
예제 #8
0
    def start(self):
        """The pool manager main loop.

        The bulk of the algorithm exists in the 'unused_instances' property.
        This main loop simply checks for a deficit in the pool and dispatches
        a 'launch_instances' call when a deficit needs to be filled.
        """
        while True:
            cur_pools = self.unused_instances
            report = []
            for driver in self.drivers:
                report.append(
                    '%s:%s/%s' %
                    (driver.RESOURCE_NAME, len(
                        cur_pools[driver.RESOURCE_NAME]), self.pool_size))
            LOG.debug('Current pools: %s' % ' '.join(report))

            for driver in self.drivers:
                cur_pool = cur_pools[driver.RESOURCE_NAME]
                deficit = self.pool_size - len(cur_pool)
                if deficit:
                    LOG.info(_LI('Need to launch %s more %s instance(s).'),
                             deficit, driver.RESOURCE_NAME)
                    self.launch_instances(driver=driver, count=deficit)
            time.sleep(self.poll_interval)
예제 #9
0
    def launch_instances(self, count, driver):
        LOG.info(_LI('Launching %s %s instances.'), driver.RESOURCE_NAME,
                 count)
        for i in range(0, count):
            # NOTE: Use a fake UUID so astara-neutron's name matching still
            # catches this port as an astara port. This can be avoided if
            # we use a mgt security group in the future.
            mgt_port = self.ctxt.neutron_client.create_management_port(
                '00000000-0000-0000-0000-000000000000')
            nics = [{
                'net-id': mgt_port.network_id,
                'v4-fixed-ip': '',
                'port-id': mgt_port.id
            }]

            instance_name = INSTANCE_FREE % {
                'resource_name': driver.RESOURCE_NAME
            }
            image = self.images[driver.RESOURCE_NAME]
            flavor = self.flavors[driver.RESOURCE_NAME]

            self.ctxt.nova_client.client.servers.create(
                name=instance_name,
                image=image,
                flavor=flavor,
                nics=nics,
                config_drive=True,
                userdata=nova.format_userdata(mgt_port),
            )
예제 #10
0
    def boot(self, worker_context):
        """Boots the instances with driver pre/post boot hooks.

        :returns: None
        """
        self.log.info('Booting %s' % self.resource.RESOURCE_NAME)

        if self.state != states.DEGRADED:
            self.state = states.DOWN
            self._boot_counter.start()

        # driver preboot hook
        self.resource.pre_boot(worker_context)

        try:
            self.instances.create(worker_context)
            if not self.instances:
                self.log.info(_LI('Previous instances are still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                return
        except:
            self.log.exception(_LE('Instances failed to start boot'))
        else:
            self.state = states.BOOTING

        # driver post boot hook
        self.resource.post_boot(worker_context)
예제 #11
0
    def boot(self, worker_context):
        """Boots the instances with driver pre/post boot hooks.

        :returns: None
        """
        self.log.info('Booting %s' % self.resource.RESOURCE_NAME)

        if self.state != states.DEGRADED:
            self.state = states.DOWN
            self._boot_counter.start()

        # driver preboot hook
        self.resource.pre_boot(worker_context)

        try:
            self.instances.create(worker_context)
            if not self.instances:
                self.log.info(_LI('Previous instances are still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                return
        except:
            self.log.exception(_LE('Instances failed to start boot'))
        else:
            self.state = states.BOOTING

        # driver post boot hook
        self.resource.post_boot(worker_context)
예제 #12
0
    def get_instance(self,
                     resource_type,
                     name,
                     management_port=None,
                     instance_ports=None):
        """Get an instance from the pool.

        This involves popping it out of the pool, updating its name and
        attaching
        any ports.

        :param resource_type: The str driver name of the resource
        :param name: The requested name of the instance
        :param managment_port: The management port dict that was created for
                               the instance by the RUG.
        :param instance_ports: A list of dicts of ports to be attached to
                               instance upon reservation.

        :returns: A tuple containing (novaclient server object for the
                  reserved server, a port object for the management port,
                  a list of port objects that were attached the server)
        """
        instance_ports = instance_ports or []

        try:
            server = self.unused_instances[resource_type][0]
        except IndexError:
            raise PezPoolExhausted()

        LOG.info(_LI('Renaming instance %s to %s'), server.name, name)
        server = self.ctxt.nova_client.client.servers.update(server, name=name)

        for port in instance_ports:
            LOG.info(_LI('Attaching instance port %s to %s (%s)'), port['id'],
                     server.name, server.id)
            self.ctxt.nova_client.client.servers.interface_attach(
                server=server, port_id=port['id'], net_id=None, fixed_ip=None)

        mgt_port, instance_ports = (
            self.ctxt.neutron_client.get_ports_for_instance(server.id))

        return (
            self.ctxt.nova_client.client.servers.get(server.id),
            mgt_port,
            instance_ports,
        )
예제 #13
0
    def get_instance(self, resource_type, name, management_port=None,
                     instance_ports=None):
        """Get an instance from the pool.

        This involves popping it out of the pool, updating its name and
        attaching
        any ports.

        :param resource_type: The str driver name of the resource
        :param name: The requested name of the instance
        :param managment_port: The management port dict that was created for
                               the instance by the RUG.
        :param instance_ports: A list of dicts of ports to be attached to
                               instance upon reservation.

        :returns: A tuple containing (novaclient server object for the
                  reserved server, a port object for the management port,
                  a list of port objects that were attached the server)
        """
        instance_ports = instance_ports or []

        try:
            server = self.unused_instances[resource_type][0]
        except IndexError:
            raise PezPoolExhausted()

        LOG.info(_LI('Renaming instance %s to %s'), server.name, name)
        server = self.ctxt.nova_client.client.servers.update(
            server, name=name)

        for port in instance_ports:
            LOG.info(_LI('Attaching instance port %s to %s (%s)'),
                     port['id'], server.name, server.id)
            self.ctxt.nova_client.client.servers.interface_attach(
                server=server, port_id=port['id'], net_id=None, fixed_ip=None)

        mgt_port, instance_ports = (
            self.ctxt.neutron_client.get_ports_for_instance(server.id)
        )

        return (
            self.ctxt.nova_client.client.servers.get(server.id),
            mgt_port,
            instance_ports,
        )
예제 #14
0
 def run(self):
     try:
         while True:
             self._coordinator.heartbeat()
             self._coordinator.run_watchers()
             time.sleep(self.heartbeat_interval)
     except CoordinatorDone:
         LOG.info(_LI('Stopping RUG coordinator.'))
         return
예제 #15
0
 def run(self):
     try:
         while True:
             self._coordinator.heartbeat()
             self._coordinator.run_watchers()
             time.sleep(self.heartbeat_interval)
     except CoordinatorDone:
         LOG.info(_LI('Stopping RUG coordinator.'))
         return
예제 #16
0
    def _check_outdated_instances(self, pools):
        outdated_instances = []
        for resource, pool in pools.items():
            for server in pool:
                if server.image['id'] != str(self.images[resource]):
                    LOG.info(
                        _LI('Deleting instance %s with outdated image, '
                            '%s != %s'), server.id, server.image['id'],
                        self.image_uuid)
                    outdated_instances.append(server)
                elif server.flavor['id'] != str(self.flavors[resource]):
                    LOG.info(
                        _LI('Deleting instance %s with outdated flavor, '
                            '%s != %s'), server.id, server.flavor['id'],
                        self.flavor)
                    outdated_instances.append(server)

        if outdated_instances:
            [self.delete_instance(i.id) for i in outdated_instances]
예제 #17
0
def main(argv=sys.argv[1:]):
    ak_cfg.parse_config(argv)
    log.setup(CONF, 'astara-pez')
    CONF.log_opt_values(LOG, logging.INFO)

    LOG.info(_LI("Starting Astara Pez service."))

    mgr = PezService()
    launcher = service.launch(CONF, mgr)
    launcher.wait()
예제 #18
0
    def _check_outdated_instances(self, pools):
        outdated_instances = []
        for resource, pool in pools.items():
            for server in pool:
                if server.image['id'] != str(self.images[resource]):
                    LOG.info(_LI(
                        'Deleting instance %s with outdated image, '
                        '%s != %s'),
                        server.id, server.image['id'], self.image_uuid)
                    outdated_instances.append(server)
                elif server.flavor['id'] != str(self.flavors[resource]):
                    LOG.info(_LI(
                        'Deleting instance %s with outdated flavor, '
                        '%s != %s'),
                        server.id, server.flavor['id'], self.flavor)
                    outdated_instances.append(server)

        if outdated_instances:
            [self.delete_instance(i.id) for i in outdated_instances]
예제 #19
0
def main(argv=sys.argv[1:]):
    ak_cfg.parse_config(argv)
    log.setup(CONF, 'astara-pez')
    CONF.log_opt_values(LOG, logging.INFO)

    LOG.info(_LI("Starting Astara Pez service."))

    mgr = PezService()
    launcher = service.launch(CONF, mgr)
    launcher.wait()
예제 #20
0
 def update_loadbalancer_status(self, loadbalancer_id, status):
     try:
         self.api_client.update_loadbalancer_status(loadbalancer_id, status)
     except Exception as e:
         # We don't want to die just because we can't tell neutron
         # what the status of the router should be. Log the error
         # but otherwise ignore it.
         LOG.info(_LI(
             'ignoring failure to update status for %s to %s: %s'),
             id, status, e,
         )
예제 #21
0
 def get_network_subnets(self, network_id):
     response = []
     subnet_response = self.api_client.list_subnets(network_id=network_id)
     subnets = subnet_response['subnets']
     for s in subnets:
         try:
             response.append(Subnet.from_dict(s))
         except Exception as e:
             LOG.info(_LI('ignoring subnet %s (%s) on network %s: %s'),
                      s.get('id'), s.get('cidr'), network_id, e)
     return response
예제 #22
0
    def send_message(self, message):
        "Called when the worker put a message in the state machine queue"
        if self.deleted:
            # Ignore any more incoming messages
            self.resource.log.debug(
                'deleted state machine, ignoring incoming message %s',
                message)
            return False

        # NOTE(dhellmann): This check is largely redundant with the
        # one in CalcAction.transition() but it may allow us to avoid
        # adding poll events to the queue at all, and therefore cut
        # down on the number of times a worker thread wakes up to
        # process something on a router that isn't going to actually
        # do any work.
        if message.crud == POLL and \
                self.instance.state == states.ERROR:
            self.resource.log.info(_LI(
                'Resource status is ERROR, ignoring POLL message: %s'),
                message,
            )
            return False

        if message.crud == REBUILD:
            if message.body.get('image_uuid'):
                self.resource.log.info(_LI(
                    'Resource is being REBUILT with custom image %s'),
                    message.body['image_uuid']
                )
                self.image_uuid = message.body['image_uuid']
            else:
                self.image_uuid = self.resource.image_uuid

        self._queue.append(message.crud)
        queue_len = len(self._queue)
        if queue_len > self._queue_warning_threshold:
            logger = self.resource.log.warning
        else:
            logger = self.resource.log.debug
        logger(_LW('incoming message brings queue length to %s'), queue_len)
        return True
예제 #23
0
 def get_network_subnets(self, network_id):
     response = []
     subnet_response = self.api_client.list_subnets(network_id=network_id)
     subnets = subnet_response['subnets']
     for s in subnets:
         try:
             response.append(Subnet.from_dict(s))
         except Exception as e:
             LOG.info(_LI('ignoring subnet %s (%s) on network %s: %s'),
                      s.get('id'), s.get('cidr'),
                      network_id, e)
     return response
예제 #24
0
파일: state.py 프로젝트: gocloudxyz/astara
 def execute(self, action, worker_context):
     # Check for a loop where the resource keeps failing to boot or
     # accept the configuration.
     if self.instance.attempts >= self.params.reboot_error_threshold:
         self.params.driver.log.info(_LI("Dropping out of boot loop after " " %s trials"), self.instance.attempts)
         self.instance.set_error(worker_context)
         return action
     self.instance.boot(worker_context)
     self.params.driver.log.debug(
         "CreateInstance attempt %s/%s", self.instance.attempts, self.params.reboot_error_threshold
     )
     return action
예제 #25
0
    def _should_process_message(self, target, message):
        """Determines whether a message should be processed or not."""
        global_debug, reason = self.db_api.global_debug()
        if global_debug:
            LOG.info(
                'Skipping incoming event, cluster in global debug '
                'mode. (reason: %s)', reason)
            return False

        if message.resource.id not in commands.WILDCARDS:
            message = self._populate_resource_id(message)
            if not message.resource.id:
                LOG.info(_LI('Ignoring message with no resource found.'))
                return False

            should_ignore, reason = \
                self.db_api.tenant_in_debug(message.resource.tenant_id)
            if should_ignore:
                LOG.info(
                    'Ignoring message intended for tenant %s in debug mode '
                    '(reason: %s): %s',
                    message.resource.tenant_id,
                    reason,
                    message,
                )
                return False

            should_ignore, reason = self.db_api.resource_in_debug(
                message.resource.id)
            if should_ignore:
                LOG.info(
                    'Ignoring message intended for resource %s in '
                    'debug mode (reason: %s): %s',
                    message.resource.id,
                    reason,
                    message,
                )
                return False

        if target in commands.WILDCARDS:
            return message

        if cfg.CONF.coordination.enabled:
            target_hosts = self.hash_ring_mgr.ring.get_hosts(
                message.resource.id)
            if self.host not in target_hosts:
                LOG.debug(
                    'Ignoring message intended for resource %s as it '
                    'does not map to this Rug process.', message.resource.id)
                return False

        return message
예제 #26
0
 def update_loadbalancer_status(self, loadbalancer_id, status):
     try:
         self.api_client.update_loadbalancer_status(loadbalancer_id, status)
     except Exception as e:
         # We don't want to die just because we can't tell neutron
         # what the status of the router should be. Log the error
         # but otherwise ignore it.
         LOG.info(
             _LI('ignoring failure to update status for %s to %s: %s'),
             id,
             status,
             e,
         )
예제 #27
0
    def stop(self, worker_context):
        """Attempts to destroy the instance cluster

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.resource.delete_ports(worker_context)

        if not self.instances:
            self.log.info(_LI('Instance(s) already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            self.instances.destroy(worker_context)
            if self.state != states.GONE:
                self.state = states.DOWN
        except Exception:
            self.log.exception(_LE('Failed to stop instance(s)'))
예제 #28
0
    def stop(self, worker_context):
        """Attempts to destroy the instance cluster

        :param worker_context:
        :returns:
        """
        self.log.info(_LI('Destroying instance'))

        self.resource.delete_ports(worker_context)

        if not self.instances:
            self.log.info(_LI('Instance(s) already destroyed.'))
            if self.state != states.GONE:
                self.state = states.DOWN
            return self.state

        try:
            self.instances.destroy(worker_context)
            if self.state != states.GONE:
                self.state = states.DOWN
        except Exception:
            self.log.exception(_LE('Failed to stop instance(s)'))
예제 #29
0
 def stop(self):
     """Shutdown all workers cleanly.
     """
     LOG.info('shutting down scheduler')
     # Send a poison pill to all of the workers
     for w in self.workers:
         LOG.debug('sending stop message to %s', w['worker'].name)
         w['queue'].put(None)
     # Wait for the workers to finish and be ready to exit.
     for w in self.workers:
         LOG.debug('waiting for queue for %s', w['worker'].name)
         w['queue'].close()
         LOG.debug('waiting for worker %s', w['worker'].name)
         w['worker'].join()
     LOG.info(_LI('scheduler shutdown'))
예제 #30
0
 def execute(self, action, worker_context):
     # Check for a loop where the resource keeps failing to boot or
     # accept the configuration.
     if (not self.instance.state == states.DEGRADED and
        self.instance.attempts >= self.params.reboot_error_threshold):
         self.params.resource.log.info(_LI(
             'Dropping out of boot loop after  %s trials'),
             self.instance.attempts)
         self.instance.set_error(worker_context)
         return action
     self.instance.boot(worker_context)
     self.params.resource.log.debug('CreateInstance attempt %s/%s',
                                    self.instance.attempts,
                                    self.params.reboot_error_threshold)
     return action
예제 #31
0
 def stop(self):
     """Shutdown all workers cleanly.
     """
     LOG.info('shutting down scheduler')
     # Send a poison pill to all of the workers
     for w in self.workers:
         LOG.debug('sending stop message to %s', w['worker'].name)
         w['queue'].put(None)
     # Wait for the workers to finish and be ready to exit.
     for w in self.workers:
         LOG.debug('waiting for queue for %s', w['worker'].name)
         w['queue'].close()
         LOG.debug('waiting for worker %s', w['worker'].name)
         w['worker'].join()
     LOG.info(_LI('scheduler shutdown'))
예제 #32
0
파일: worker.py 프로젝트: gocloudxyz/astara
    def report_status(self, show_config=True):
        if show_config:
            cfg.CONF.log_opt_values(LOG, INFO)
        LOG.info(_LI(
            'Number of state machines in work queue: %d'),
            self.work_queue.qsize()
        )
        LOG.info(_LI(
            'Number of tenant resource managers managed: %d'),
            len(self.tenant_managers)
        )
        for thread in self.threads:
            LOG.info(_LI(
                'Thread %s is %s. Last seen: %s'),
                thread.name,
                'alive' if thread.isAlive() else 'DEAD',
                self._thread_status.get(thread.name, 'UNKNOWN'),
            )
        debug_tenants = self.db_api.tenants_in_debug()
        if debug_tenants:
            for t_uuid, reason in debug_tenants:
                LOG.info(_LI('Debugging tenant: %s (reason: %s)'),
                         t_uuid, reason)
        else:
            LOG.info(_LI('No tenants in debug mode'))

        debug_resources = self.db_api.resources_in_debug()
        if debug_resources:
            for resource_id, reason in debug_resources:
                LOG.info(_LI('Debugging resource: %s (reason: %s)'),
                         resource_id, reason)
        else:
            LOG.info(_LI('No resources in debug mode'))

        if cfg.CONF.coordination.enabled:
            # NOTE(adam_g): This list could be big with a large cluster.
            LOG.info(_LI('Peer astara-orchestrator hosts: %s'),
                     self.hash_ring_mgr.hosts)
        else:
            LOG.info(_LI(
                'No peer astara-orchestrator hosts, coordination disabled.'))
예제 #33
0
    def delete_vrrp_port(self, object_id, label='VRRP'):
        name = 'ASTARA:%s:%s' % (label, object_id)
        response = self.api_client.list_ports(name=name)
        port_data = response.get('ports')

        if not port_data and self.conf.legacy_fallback_mode:
            name = name.replace('ASTARA', 'AKANDA')
            LOG.info(_LI('Attempting legacy query for %s.'), name)
            response = self.api_client.list_ports(name=name)
            port_data = response.get('ports')

        if not port_data:
            LOG.warning(_LW(
                'Unable to find VRRP port to delete with name %s.'), name)
        for port in port_data:
            self.api_client.delete_port(port['id'])
예제 #34
0
    def delete_vrrp_port(self, object_id, label='VRRP'):
        name = 'ASTARA:%s:%s' % (label, object_id)
        response = self.api_client.list_ports(name=name)
        port_data = response.get('ports')

        if not port_data and self.conf.legacy_fallback_mode:
            name = name.replace('ASTARA', 'AKANDA')
            LOG.info(_LI('Attempting legacy query for %s.'), name)
            response = self.api_client.list_ports(name=name)
            port_data = response.get('ports')

        if not port_data:
            LOG.warning(
                _LW('Unable to find VRRP port to delete with name %s.'), name)
        for port in port_data:
            self.api_client.delete_port(port['id'])
예제 #35
0
파일: worker.py 프로젝트: gocloudxyz/astara
    def _should_process_message(self, target, message):
        """Determines whether a message should be processed or not."""
        global_debug, reason = self.db_api.global_debug()
        if global_debug:
            LOG.info('Skipping incoming event, cluster in global debug '
                     'mode. (reason: %s)', reason)
            return False

        if message.resource.id not in commands.WILDCARDS:
            message = self._populate_resource_id(message)
            if not message.resource.id:
                LOG.info(_LI('Ignoring message with no resource found.'))
                return False

            should_ignore, reason = \
                self.db_api.tenant_in_debug(message.resource.tenant_id)
            if should_ignore:
                LOG.info(
                    'Ignoring message intended for tenant %s in debug mode '
                    '(reason: %s): %s',
                    message.resource.tenant_id, reason, message,
                )
                return False

            should_ignore, reason = self.db_api.resource_in_debug(
                message.resource.id)
            if should_ignore:
                LOG.info(
                    'Ignoring message intended for resource %s in '
                    'debug mode (reason: %s): %s',
                    message.resource.id, reason, message,
                )
                return False

        if target in commands.WILDCARDS:
            return message

        if cfg.CONF.coordination.enabled:
            target_hosts = self.hash_ring_mgr.ring.get_hosts(
                message.resource.id)
            if self.host not in target_hosts:
                LOG.debug('Ignoring message intended for resource %s as it '
                          'does not map to this Rug process.',
                          message.resource.id)
                return False

        return message
예제 #36
0
def ignore_signals():
    """Ignore signals that might interrupt processing

    Since the RUG doesn't want to be asynchronously interrupted,
    various signals received needs to be ignored. The registered
    signals including SIGHUP, SIGALRM, and default signals
    SIGUSR1 and SIGUSR2 are captured and ignored through the SIG_IGN
    action.

    :param: None

    :returns: None

    """
    for s in [signal.SIGHUP, signal.SIGUSR1, signal.SIGUSR2, signal.SIGALRM]:
        logging.getLogger(__name__).info(_LI('ignoring signal %s'), s)
        signal.signal(s, signal.SIG_IGN)
예제 #37
0
def ignore_signals():
    """Ignore signals that might interrupt processing

    Since the RUG doesn't want to be asynchronously interrupted,
    various signals received needs to be ignored. The registered
    signals including SIGHUP, SIGALRM, and default signals
    SIGUSR1 and SIGUSR2 are captured and ignored through the SIG_IGN
    action.

    :param: None

    :returns: None

    """
    for s in [signal.SIGHUP, signal.SIGUSR1, signal.SIGUSR2, signal.SIGALRM]:
        logging.getLogger(__name__).info(_LI('ignoring signal %s'), s)
        signal.signal(s, signal.SIG_IGN)
예제 #38
0
    def report_status(self, show_config=True):
        if show_config:
            cfg.CONF.log_opt_values(LOG, INFO)
        LOG.info(_LI('Number of state machines in work queue: %d'),
                 self.work_queue.qsize())
        LOG.info(_LI('Number of tenant resource managers managed: %d'),
                 len(self.tenant_managers))
        for thread in self.threads:
            LOG.info(
                _LI('Thread %s is %s. Last seen: %s'),
                thread.name,
                'alive' if thread.isAlive() else 'DEAD',
                self._thread_status.get(thread.name, 'UNKNOWN'),
            )
        debug_tenants = self.db_api.tenants_in_debug()
        if debug_tenants:
            for t_uuid, reason in debug_tenants:
                LOG.info(_LI('Debugging tenant: %s (reason: %s)'), t_uuid,
                         reason)
        else:
            LOG.info(_LI('No tenants in debug mode'))

        debug_resources = self.db_api.resources_in_debug()
        if debug_resources:
            for resource_id, reason in debug_resources:
                LOG.info(_LI('Debugging resource: %s (reason: %s)'),
                         resource_id, reason)
        else:
            LOG.info(_LI('No resources in debug mode'))

        if cfg.CONF.coordination.enabled:
            # NOTE(adam_g): This list could be big with a large cluster.
            LOG.info(_LI('Peer astara-orchestrator hosts: %s'),
                     self.hash_ring_mgr.hosts)
        else:
            LOG.info(
                _LI('No peer astara-orchestrator hosts, coordination disabled.'
                    ))
예제 #39
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
예제 #40
0
def shuffle_notifications(notification_queue, sched):
    """Copy messages from the notification queue into the scheduler.
    """
    while True:
        try:
            target, message = notification_queue.get()
            if target is None:
                break
            sched.handle_message(target, message)
        except IOError:
            # FIXME(rods): if a signal arrive during an IO operation
            # an IOError is raised. We catch the exceptions in
            # meantime waiting for a better solution.
            pass
        except KeyboardInterrupt:
            LOG.info(_LI('got Ctrl-C'))
            break
        except:
            LOG.exception(_LE('unhandled exception processing message'))
예제 #41
0
    def boot(self, worker_context):
        """Boots the instance with driver pre/post boot hooks.

        :returns: None
        """
        self._ensure_cache(worker_context)

        self.log.info('Booting %s' % self.driver.RESOURCE_NAME)
        self.state = states.DOWN
        self._boot_counter.start()

        # driver preboot hook
        self.driver.pre_boot(worker_context)

        # try to boot the instance
        try:
            instance_info = worker_context.nova_client.boot_instance(
                resource_type=self.driver.RESOURCE_NAME,
                prev_instance_info=self.instance_info,
                name=self.driver.name,
                image_uuid=self.driver.image_uuid,
                flavor=self.driver.flavor,
                make_ports_callback=self.driver.make_ports(worker_context)
            )
            if not instance_info:
                self.log.info(_LI('Previous instance is still deleting'))
                # Reset the boot counter, causing the state machine to start
                # again with a new Instance.
                self.reset_boot_counter()
                self.instance_info = None
                return
        except:
            self.log.exception(_LE('Instance failed to start boot'))
            self.driver.delete_ports(worker_context)
        else:
            # We have successfully started a (re)boot attempt so
            # record the timestamp so we can report how long it takes.
            self.state = states.BOOTING
            self.instance_info = instance_info

        # driver post boot hook
        self.driver.post_boot(worker_context)
예제 #42
0
def get_default_v4_gateway(client, router, networks):
    """Find the IPv4 default gateway for the router.
    """
    LOG.debug('networks = %r', networks)
    if router.external_port:
        LOG.debug('external interface = %s', router.external_port.mac_address)

    # Now find the subnet that our external IP is on, and return its
    # gateway.
    for n in networks:
        if n['network_type'] == EXTERNAL_NET:
            v4_addresses = [
                addr
                for addr in (netaddr.IPAddress(ip.partition('/')[0])
                             for ip in n['interface']['addresses'])
                if addr.version == 4
            ]
            for s in n['subnets']:
                subnet = netaddr.IPNetwork(s['cidr'])
                if subnet.version != 4:
                    continue
                LOG.debug(
                    '%s: checking if subnet %s should have the default route',
                    router.id, s['cidr'])
                for addr in v4_addresses:
                    if addr in subnet:
                        LOG.debug(
                            '%s: found gateway %s for subnet %s on network %s',
                            router.id,
                            s['gateway_ip'],
                            s['cidr'],
                            n['network_id'],
                        )
                        return s['gateway_ip']

    # Sometimes we are asked to build a configuration for the server
    # when the external interface is still marked as "down". We can
    # report that case, but we don't treat it as an error here because
    # we'll be asked to do it again when the interface comes up.
    LOG.info(_LI('%s: no default gateway was found'), router.id)
    return ''
예제 #43
0
def get_default_v4_gateway(client, router, networks):
    """Find the IPv4 default gateway for the router.
    """
    LOG.debug('networks = %r', networks)
    if router.external_port:
        LOG.debug('external interface = %s', router.external_port.mac_address)

    # Now find the subnet that our external IP is on, and return its
    # gateway.
    for n in networks:
        if n['network_type'] == EXTERNAL_NET:
            v4_addresses = [
                addr for addr in (netaddr.IPAddress(ip.partition('/')[0])
                                  for ip in n['interface']['addresses'])
                if addr.version == 4
            ]
            for s in n['subnets']:
                subnet = netaddr.IPNetwork(s['cidr'])
                if subnet.version != 4:
                    continue
                LOG.debug(
                    '%s: checking if subnet %s should have the default route',
                    router.id, s['cidr'])
                for addr in v4_addresses:
                    if addr in subnet:
                        LOG.debug(
                            '%s: found gateway %s for subnet %s on network %s',
                            router.id,
                            s['gateway_ip'],
                            s['cidr'],
                            n['network_id'],
                        )
                        return s['gateway_ip']

    # Sometimes we are asked to build a configuration for the server
    # when the external interface is still marked as "down". We can
    # report that case, but we don't treat it as an error here because
    # we'll be asked to do it again when the interface comes up.
    LOG.info(_LI('%s: no default gateway was found'), router.id)
    return ''
예제 #44
0
    def start(self):
        """Brings up coordination service online

        This connects the coordination service to its tooz backend. This
        involves:

            - connecting to the cluster
            - creating the coordination group (if required)
            - joining the coordination group
            - registering callbacks to respond to join/leave membership
              events

        After the local node has joined the cluster and knows its remote
        peers, it fires off an initial rebalance event to the workers
        so they can seed their hash ring with the current membership.
        """
        LOG.info(_LI('Starting RUG coordinator process for host %s on %s'),
                 self.host, self.url)
        self._coordinator = tz_coordination.get_coordinator(
            self.url, self.host)
        self._coordinator.start()

        try:
            self._coordinator.create_group(self.group).get()
        except tooz.coordination.GroupAlreadyExist:
            pass

        try:
            self._coordinator.join_group(self.group).get()
            self._coordinator.heartbeat()
        except tooz.coordination.MemberAlreadyExist:
            pass

        self._coordinator.watch_join_group(self.group, self.cluster_changed)
        self._coordinator.watch_leave_group(self.group, self.cluster_changed)
        self._coordinator.heartbeat()
        LOG.debug("Sending initial event changed for members: %s" %
                  self.members)
        self.cluster_changed(event=None, node_bootstrap=True)
예제 #45
0
    def start(self):
        """Brings up coordination service online

        This connects the coordination service to its tooz backend. This
        involves:

            - connecting to the cluster
            - creating the coordination group (if required)
            - joining the coordination group
            - registering callbacks to respond to join/leave membership
              events

        After the local node has joined the cluster and knows its remote
        peers, it fires off an initial rebalance event to the workers
        so they can seed their hash ring with the current membership.
        """
        LOG.info(_LI('Starting RUG coordinator process for host %s on %s'),
                 self.host, self.url)
        self._coordinator = tz_coordination.get_coordinator(
            self.url, self.host)
        self._coordinator.start()

        try:
            self._coordinator.create_group(self.group).get()
        except tooz.coordination.GroupAlreadyExist:
            pass

        try:
            self._coordinator.join_group(self.group).get()
            self._coordinator.heartbeat()
        except tooz.coordination.MemberAlreadyExist:
            pass

        self._coordinator.watch_join_group(self.group, self.cluster_changed)
        self._coordinator.watch_leave_group(self.group, self.cluster_changed)
        self._coordinator.heartbeat()
        LOG.debug("Sending initial event changed for members: %s" %
                  self.members)
        self.cluster_changed(event=None, node_bootstrap=True)
예제 #46
0
    def run(self, ip_address, port):
        app = RugAPI()

        try:
            socket.inet_pton(socket.AF_INET6, ip_address)
            family = socket.AF_INET6
        except Exception:
            family = socket.AF_INET

        for i in six.moves.range(5):
            LOG.info(_LI(
                'Starting the rug-api on %s:%s'),
                ip_address, port,
            )
            try:
                sock = eventlet.listen(
                    (ip_address, port),
                    family=family,
                    backlog=128
                )
            except socket.error as err:
                if err.errno != 99:  # EADDRNOTAVAIL
                    raise
                LOG.warning(_LW('Could not create rug-api socket: %s'), err)
                LOG.warning(_LW('Sleeping %s before trying again'), i + 1)
                eventlet.sleep(i + 1)
            else:
                break
        else:
            raise RuntimeError(_(
                'Could not establish rug-api socket on %s:%s') %
                (ip_address, port)
            )
        eventlet.wsgi.server(
            sock,
            app,
            custom_pool=self.pool,
            log=LOG)
예제 #47
0
    def run(self, ip_address, port=cfg.CONF.astara_metadata_port):
        """Run the MetadataProxy.

        :param ip_address: the ip address to bind to for incoming requests
        :param port: the port to bind to for incoming requests
        :returns: returns nothing
        """
        app = MetadataProxyHandler()
        for i in six.moves.range(5):
            LOG.info(_LI(
                'Starting the metadata proxy on %s:%s'),
                ip_address, port
            )
            try:
                sock = eventlet.listen(
                    (ip_address, port),
                    family=socket.AF_INET6,
                    backlog=128
                )
            except socket.error as err:
                if err.errno != 99:
                    raise
                LOG.warning(
                    _LW('Could not create metadata proxy socket: %s'), err)
                LOG.warning(_LW('Sleeping %s before trying again'), i + 1)
                eventlet.sleep(i + 1)
            else:
                break
        else:
            raise RuntimeError(
                _('Could not establish metadata proxy socket on %s:%s') %
                (ip_address, port)
            )
        eventlet.wsgi.server(
            sock,
            app,
            custom_pool=self.pool,
            log=loggers.WritableLogger(LOG))
예제 #48
0
파일: worker.py 프로젝트: gocloudxyz/astara
    def _dispatch_command(self, target, message):
        if not self._should_process_command(message):
            return

        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        # NOTE(adam_g): Drop 'router-debug' compat in M.
        elif (instructions['command'] == commands.RESOURCE_DEBUG or
              instructions['command'] == commands.ROUTER_DEBUG):

            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to debug resource with no id'))
                return
            reason = instructions.get('reason')
            if resource_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all resources with %r'),
                    resource_id)
            else:
                LOG.info(_LI('Placing resource %s in debug mode (reason: %s)'),
                         resource_id, reason)
                self.db_api.enable_resource_debug(resource_id, reason)

        elif (instructions['command'] == commands.RESOURCE_MANAGE or
              instructions['command'] == commands.ROUTER_MANAGE):
            resource_id = (instructions.get('resource_id') or
                           instructions.get('router_id'))
            if not resource_id:
                LOG.warning(_LW(
                    'Ignoring instruction to manage resource with no id'))
                return
            try:
                self.db_api.disable_resource_debug(resource_id)
                LOG.info(_LI('Resuming management of resource %s'),
                         resource_id)
            except KeyError:
                pass
            try:
                self._resource_locks[resource_id].release()
                LOG.info(_LI('Unlocked resource %s'), resource_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in EVENT_COMMANDS:
            resource_id = instructions.get('resource_id')
            sm = self._find_state_machine_by_resource_id(resource_id)
            if not sm:
                LOG.debug(
                    'Will not process command, no managed state machine '
                    'found for resource %s', resource_id)
                return
            new_res = event.Resource(
                id=resource_id,
                driver=sm.driver.RESOURCE_NAME,
                tenant_id=sm.tenant_id)
            new_msg = event.Event(
                resource=new_res,
                crud=EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_res)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_res)

        # NOTE(adam_g): This is here to support the deprecated old format of
        #               sending commands to specific routers and can be
        #               removed once the CLI component is dropped in M.
        elif instructions['command'] in DEPRECATED_ROUTER_COMMANDS:
            new_rsc = event.Resource(
                driver=drivers.router.Router.RESOURCE_NAME,
                id=message.body.get('router_id'),
                tenant_id=message.body.get('tenant_id'),
            )
            new_msg = event.Event(
                resource=new_rsc,
                crud=DEPRECATED_ROUTER_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_rsc)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'),
                     instructions['command'], new_rsc)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(_LW(
                    'Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)
예제 #49
0
    def _ensure_local_port(self, network_id, subnet_id, prefix, network_type):
        driver = importutils.import_object(self.conf.interface_driver,
                                           self.conf)

        host_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, socket.gethostname()))

        name = 'ASTARA:RUG:%s' % network_type.upper()

        query_dict = dict(device_owner=DEVICE_OWNER_RUG,
                          device_id=host_id,
                          name=name,
                          network_id=network_id)

        ports = self.api_client.list_ports(**query_dict)['ports']

        if not ports and self.conf.legacy_fallback_mode:
            LOG.info(_LI('Attempting legacy query for %s.'), name)
            query_dict.update({
                'name':
                name.replace('ASTARA', 'AKANDA'),
                'device_owner':
                DEVICE_OWNER_RUG.replace('astara', 'akanda')
            })
            ports = self.api_client.list_ports(**query_dict)['ports']

        if ports and 'AKANDA' in ports[0]['name']:
            port = Port.from_dict(ports[0])
            LOG.info(
                _LI('migrating port to ASTARA for port %r and using local %s'),
                port, network_type)
            self.api_client.update_port(
                port.id, {
                    'port': {
                        'name': port.name.replace('AKANDA', 'ASTARA'),
                        'device_owner': DEVICE_OWNER_RUG
                    }
                })
        elif ports:
            port = Port.from_dict(ports[0])
            LOG.info(_LI('already have local %s port, using %r'), network_type,
                     port)
        else:
            LOG.info(_LI('creating a new local %s port'), network_type)
            port_dict = {
                'admin_state_up': True,
                'network_id': network_id,
                'device_owner': DEVICE_OWNER_ROUTER_INT,  # lying here for IP
                'name': name,
                'device_id': host_id,
                'fixed_ips': [{
                    'subnet_id': subnet_id
                }],
                'binding:host_id': socket.gethostname()
            }
            port = Port.from_dict(
                self.api_client.create_port(dict(port=port_dict))['port'])

            # remove lie that enabled us pick IP on slaac subnet
            self.api_client.update_port(
                port.id, {'port': {
                    'device_owner': DEVICE_OWNER_RUG
                }})
            port.device_owner = DEVICE_OWNER_RUG

            LOG.info(_LI('new local %s port: %r'), network_type, port)

        # create the tap interface if it doesn't already exist
        if not ip_lib.device_exists(driver.get_device_name(port)):
            driver.plug(port.network_id, port.id, driver.get_device_name(port),
                        port.mac_address)

            # add sleep to ensure that port is setup before use
            time.sleep(1)

        try:
            fixed_ip = [
                fip for fip in port.fixed_ips if fip.subnet_id == subnet_id
            ][0]
        except IndexError:
            raise MissingIPAllocation(port.id)

        ip_cidr = '%s/%s' % (fixed_ip.ip_address, prefix.split('/')[1])
        driver.init_l3(driver.get_device_name(port), [ip_cidr])
        return ip_cidr
예제 #50
0
파일: worker.py 프로젝트: gocloudxyz/astara
    def _thread_target(self):
        """This method runs in each worker thread.
        """
        my_id = threading.current_thread().name
        LOG.debug('starting thread')
        # Use a separate context from the one we use when receiving
        # messages and talking to the tenant router manager because we
        # are in a different thread and the clients are not
        # thread-safe.
        context = WorkerContext(self.management_address)
        while self._keep_going:
            try:
                # Try to get a state machine from the work queue. If
                # there's nothing to do, we will block for a while.
                self._thread_status[my_id] = 'waiting for task'
                sm = self.work_queue.get(timeout=10)
            except Queue.Empty:
                continue
            if sm is None:
                LOG.info(_LI('received stop message'))
                break

            # Make sure we didn't already have some updates under way
            # for a router we've been told to ignore for debug mode.
            should_ignore, reason = \
                self.db_api.resource_in_debug(sm.resource_id)
            if should_ignore:
                LOG.debug('Skipping update of resource %s in debug mode. '
                          '(reason: %s)', sm.resource_id, reason)
                continue

            # In the event that a rebalance took place while processing an
            # event, it may have been put back into the work queue. Check
            # the hash table once more to find out if we still manage it
            # and do some cleanup if not.
            if cfg.CONF.coordination.enabled:
                target_hosts = self.hash_ring_mgr.ring.get_hosts(
                    sm.resource_id)
                if self.host not in target_hosts:
                    LOG.debug('Skipping update of router %s, it no longer '
                              'maps here.', sm.resource_id)
                    trm = self.tenant_managers[sm.tenant_id]
                    trm.unmanage_resource(sm.resource_id)
                    self.work_queue.task_done()
                    with self.lock:
                        self._release_resource_lock(sm)
                    continue

            # FIXME(dhellmann): Need to look at the router to see if
            # it belongs to a tenant which is in debug mode, but we
            # don't have that data in the sm, yet.
            LOG.debug('performing work on %s for tenant %s',
                      sm.resource_id, sm.tenant_id)
            try:
                self._thread_status[my_id] = 'updating %s' % sm.resource_id
                sm.update(context)
            except:
                LOG.exception(_LE('could not complete update for %s'),
                              sm.resource_id)
            finally:
                self._thread_status[my_id] = (
                    'finalizing task for %s' % sm.resource_id
                )
                self.work_queue.task_done()
                with self.lock:
                    # Release the lock that prevents us from adding
                    # the state machine back into the queue. If we
                    # find more work, we will re-acquire it. If we do
                    # not find more work, we hold the primary work
                    # queue lock so the main thread cannot put the
                    # state machine back into the queue until we
                    # release that lock.
                    self._release_resource_lock(sm)
                    # The state machine has indicated that it is done
                    # by returning. If there is more work for it to
                    # do, reschedule it by placing it at the end of
                    # the queue.
                    if sm.has_more_work():
                        LOG.debug('%s has more work, returning to work queue',
                                  sm.resource_id)
                        self._add_resource_to_work_queue(sm)
                    else:
                        LOG.debug('%s has no more work', sm.resource_id)
        # Return the context object so tests can look at it
        self._thread_status[my_id] = 'exiting'
        return context
예제 #51
0
def main(argv=sys.argv[1:]):
    """Main Entry point into the astara-orchestrator

    This is the main entry point into the astara-orchestrator. On invocation of
    this method, logging, local network connectivity setup is performed.
    This information is obtained through the 'ak-config' file, passed as
    arguement to this method. Worker threads are spawned for handling
    various tasks that are associated with processing as well as
    responding to different Neutron events prior to starting a notification
    dispatch loop.

    :param argv: list of Command line arguments

    :returns: None

    :raises: None

    """
    # TODO(rama) Error Handling to be added as part of the docstring
    # description

    # Change the process and thread name so the logs are cleaner.
    p = multiprocessing.current_process()
    p.name = 'pmain'
    t = threading.current_thread()
    t.name = 'tmain'
    ak_cfg.parse_config(argv)
    log.setup(cfg.CONF, 'astara-orchestrator')
    cfg.CONF.log_opt_values(LOG, logging.INFO)

    neutron = neutron_api.Neutron(cfg.CONF)

    # TODO(mark): develop better way restore after machine reboot
    # neutron.purge_management_interface()

    # bring the mgt tap interface up
    mgt_ip_address = neutron.ensure_local_service_port().split('/')[0]

    # Set up the queue to move messages between the eventlet-based
    # listening process and the scheduler.
    notification_queue = multiprocessing.Queue()

    # Ignore signals that might interrupt processing.
    daemon.ignore_signals()

    # If we see a SIGINT, stop processing.
    def _stop_processing(*args):
        notification_queue.put((None, None))
    signal.signal(signal.SIGINT, _stop_processing)

    # Listen for notifications.
    notification_proc = multiprocessing.Process(
        target=notifications.listen,
        kwargs={
            'notification_queue': notification_queue
        },
        name='notification-listener',
    )
    notification_proc.start()

    if CONF.coordination.enabled:
        coordinator_proc = multiprocessing.Process(
            target=coordination.start,
            kwargs={
                'notification_queue': notification_queue
            },
            name='coordinator',
        )
        coordinator_proc.start()
    else:
        coordinator_proc = None

    metadata_proc = multiprocessing.Process(
        target=metadata.serve,
        args=(mgt_ip_address,),
        name='metadata-proxy'
    )
    metadata_proc.start()

    from astara.api import rug as rug_api
    rug_api_proc = multiprocessing.Process(
        target=rug_api.serve,
        name='rug-api'
    )
    rug_api_proc.start()

    # Set up the notifications publisher
    Publisher = (notifications.Publisher if cfg.CONF.ceilometer.enabled
                 else notifications.NoopPublisher)
    publisher = Publisher(
        topic=cfg.CONF.ceilometer.topic,
    )

    # Set up a factory to make Workers that know how many threads to
    # run.
    worker_factory = functools.partial(
        worker.Worker,
        notifier=publisher,
        management_address=mgt_ip_address,
    )

    # Set up the scheduler that knows how to manage the routers and
    # dispatch messages.
    sched = scheduler.Scheduler(
        worker_factory=worker_factory,
    )

    # Prepopulate the workers with existing routers on startup
    populate.pre_populate_workers(sched)

    # Set up the periodic health check
    health.start_inspector(cfg.CONF.health_check_period, sched)

    # Block the main process, copying messages from the notification
    # listener to the scheduler
    try:
        shuffle_notifications(notification_queue, sched)
    finally:
        LOG.info(_LI('Stopping scheduler.'))
        sched.stop()
        LOG.info(_LI('Stopping notification publisher.'))
        publisher.stop()

        # Terminate the subprocesses
        for subproc in [notification_proc, coordinator_proc, metadata_proc,
                        rug_api_proc]:
            if not subproc:
                continue
            LOG.info(_LI('Stopping %s.'), subproc.name)
            subproc.terminate()
예제 #52
0
    def update_state(self, worker_context, silent=False):
        """Updates state of the instance and, by extension, its logical resource

        :param worker_context:
        :param silent:
        :returns: state
        """
        if self.driver.get_state(worker_context) == states.GONE:
            self.log.debug('%s driver reported its state is %s',
                           self.driver.RESOURCE_NAME, states.GONE)
            self.state = states.GONE
            return self.state

        if self.instance_info is None:
            self.log.info(_LI('no backing instance, marking as %s'),
                          states.DOWN)
            self.state = states.DOWN
            return self.state

        addr = self.instance_info.management_address
        if not addr:
            self.log.debug('waiting for instance ports to be attached')
            self.state = states.BOOTING
            return self.state

        for i in six.moves.range(cfg.CONF.max_retries):
            if self.driver.is_alive(self.instance_info.management_address):
                if self.state != states.CONFIGURED:
                    self.state = states.UP
                break
            if not silent:
                self.log.debug('Alive check failed. Attempt %d of %d',
                               i,
                               cfg.CONF.max_retries)
            time.sleep(cfg.CONF.retry_delay)
        else:
            old_state = self.state
            self._check_boot_timeout()

            # If the instance isn't responding, make sure Nova knows about it
            instance = worker_context.nova_client.get_instance_for_obj(self.id)
            if instance is None and self.state != states.ERROR:
                self.log.info('No instance was found; rebooting')
                self.state = states.DOWN
                self.instance_info = None

            # update_state() is called from Alive() to check the
            # status of the router. If we can't talk to the API at
            # that point, the router should be considered missing and
            # we should reboot it, so mark it states.DOWN if we think it was
            # configured before.
            if old_state == states.CONFIGURED and self.state != states.ERROR:
                self.log.debug('Instance not alive, marking it as %s',
                               states.DOWN)
                self.state = states.DOWN

        # After the instance is all the way up, record how long it took
        # to boot and accept a configuration.
        self.instance_info = (
            worker_context.nova_client.update_instance_info(
                self.instance_info))

        if not self.instance_info.booting and self.state == states.CONFIGURED:
            # If we didn't boot the server (because we were restarted
            # while it remained running, for example), we won't have a
            # duration to log.
            if not self._boot_logged:
                boot_time = self.instance_info.time_since_boot.total_seconds()
                self.log.info('%s booted in %s seconds after %s attempts',
                              self.driver.RESOURCE_NAME, boot_time,
                              self._boot_counter.count)
                self._boot_logged = True

            # Always reset the boot counter, even if we didn't boot
            # the server ourself, so we don't accidentally think we
            # have an erroring router.
            self._boot_counter.reset()
        return self.state
예제 #53
0
def main(argv=sys.argv[1:]):
    """Main Entry point into the astara-orchestrator

    This is the main entry point into the astara-orchestrator. On invocation of
    this method, logging, local network connectivity setup is performed.
    This information is obtained through the 'ak-config' file, passed as
    arguement to this method. Worker threads are spawned for handling
    various tasks that are associated with processing as well as
    responding to different Neutron events prior to starting a notification
    dispatch loop.

    :param argv: list of Command line arguments

    :returns: None

    :raises: None

    """
    # TODO(rama) Error Handling to be added as part of the docstring
    # description

    # Change the process and thread name so the logs are cleaner.
    p = multiprocessing.current_process()
    p.name = 'pmain'
    t = threading.current_thread()
    t.name = 'tmain'
    ak_cfg.parse_config(argv)
    log.setup(cfg.CONF, 'astara-orchestrator')
    cfg.CONF.log_opt_values(LOG, logging.INFO)

    neutron = neutron_api.Neutron(cfg.CONF)

    # TODO(mark): develop better way restore after machine reboot
    # neutron.purge_management_interface()

    # bring the mgt tap interface up
    mgt_ip_address = neutron.ensure_local_service_port().split('/')[0]

    # Set up the queue to move messages between the eventlet-based
    # listening process and the scheduler.
    notification_queue = multiprocessing.Queue()

    # Ignore signals that might interrupt processing.
    daemon.ignore_signals()

    # If we see a SIGINT, stop processing.
    def _stop_processing(*args):
        notification_queue.put((None, None))

    signal.signal(signal.SIGINT, _stop_processing)

    # Listen for notifications.
    notification_proc = multiprocessing.Process(
        target=notifications.listen,
        kwargs={'notification_queue': notification_queue},
        name='notification-listener',
    )
    notification_proc.start()

    if CONF.coordination.enabled:
        coordinator_proc = multiprocessing.Process(
            target=coordination.start,
            kwargs={'notification_queue': notification_queue},
            name='coordinator',
        )
        coordinator_proc.start()
    else:
        coordinator_proc = None

    metadata_proc = multiprocessing.Process(target=metadata.serve,
                                            args=(mgt_ip_address, ),
                                            name='metadata-proxy')
    metadata_proc.start()

    from astara.api import rug as rug_api
    rug_api_proc = multiprocessing.Process(target=rug_api.serve,
                                           name='rug-api')
    rug_api_proc.start()

    # Set up the notifications publisher
    Publisher = (notifications.Publisher if cfg.CONF.ceilometer.enabled else
                 notifications.NoopPublisher)
    publisher = Publisher(topic=cfg.CONF.ceilometer.topic, )

    # Set up a factory to make Workers that know how many threads to
    # run.
    worker_factory = functools.partial(
        worker.Worker,
        notifier=publisher,
        management_address=mgt_ip_address,
    )

    # Set up the scheduler that knows how to manage the routers and
    # dispatch messages.
    sched = scheduler.Scheduler(worker_factory=worker_factory, )

    # Prepopulate the workers with existing routers on startup
    populate.pre_populate_workers(sched)

    # Set up the periodic health check
    health.start_inspector(cfg.CONF.health_check_period, sched)

    # Block the main process, copying messages from the notification
    # listener to the scheduler
    try:
        shuffle_notifications(notification_queue, sched)
    finally:
        LOG.info(_LI('Stopping scheduler.'))
        sched.stop()
        LOG.info(_LI('Stopping notification publisher.'))
        publisher.stop()

        # Terminate the subprocesses
        for subproc in [
                notification_proc, coordinator_proc, metadata_proc,
                rug_api_proc
        ]:
            if not subproc:
                continue
            LOG.info(_LI('Stopping %s.'), subproc.name)
            subproc.terminate()
예제 #54
0
    def _thread_target(self):
        """This method runs in each worker thread.
        """
        my_id = threading.current_thread().name
        LOG.debug('starting thread')
        # Use a separate context from the one we use when receiving
        # messages and talking to the tenant router manager because we
        # are in a different thread and the clients are not
        # thread-safe.
        context = WorkerContext(self.management_address)
        while self._keep_going:
            try:
                # Try to get a state machine from the work queue. If
                # there's nothing to do, we will block for a while.
                self._thread_status[my_id] = 'waiting for task'
                sm = self.work_queue.get(timeout=10)
            except Queue.Empty:
                continue
            if sm is None:
                LOG.info(_LI('received stop message'))
                break

            # Make sure we didn't already have some updates under way
            # for a router we've been told to ignore for debug mode.
            should_ignore, reason = \
                self.db_api.resource_in_debug(sm.resource_id)
            if should_ignore:
                LOG.debug(
                    'Skipping update of resource %s in debug mode. '
                    '(reason: %s)', sm.resource_id, reason)
                continue

            # In the event that a rebalance took place while processing an
            # event, it may have been put back into the work queue. Check
            # the hash table once more to find out if we still manage it
            # and do some cleanup if not.
            if cfg.CONF.coordination.enabled:
                target_hosts = self.hash_ring_mgr.ring.get_hosts(
                    sm.resource_id)
                if self.host not in target_hosts:
                    LOG.debug(
                        'Skipping update of router %s, it no longer '
                        'maps here.', sm.resource_id)
                    trm = self.tenant_managers[sm.tenant_id]
                    trm.unmanage_resource(sm.resource_id)
                    self.work_queue.task_done()
                    with self.lock:
                        self._release_resource_lock(sm)
                    continue

            # FIXME(dhellmann): Need to look at the router to see if
            # it belongs to a tenant which is in debug mode, but we
            # don't have that data in the sm, yet.
            LOG.debug('performing work on %s for tenant %s', sm.resource_id,
                      sm.tenant_id)
            try:
                self._thread_status[my_id] = 'updating %s' % sm.resource_id
                sm.update(context)
            except:
                LOG.exception(_LE('could not complete update for %s'),
                              sm.resource_id)
            finally:
                self._thread_status[my_id] = ('finalizing task for %s' %
                                              sm.resource_id)
                self.work_queue.task_done()
                with self.lock:
                    # Release the lock that prevents us from adding
                    # the state machine back into the queue. If we
                    # find more work, we will re-acquire it. If we do
                    # not find more work, we hold the primary work
                    # queue lock so the main thread cannot put the
                    # state machine back into the queue until we
                    # release that lock.
                    self._release_resource_lock(sm)
                    # The state machine has indicated that it is done
                    # by returning. If there is more work for it to
                    # do, reschedule it by placing it at the end of
                    # the queue.
                    if sm.has_more_work():
                        LOG.debug('%s has more work, returning to work queue',
                                  sm.resource_id)
                        self._add_resource_to_work_queue(sm)
                    else:
                        LOG.debug('%s has no more work', sm.resource_id)
        # Return the context object so tests can look at it
        self._thread_status[my_id] = 'exiting'
        return context
예제 #55
0
    def update_state(self, worker_context, silent=False):
        """Updates state of the instance and, by extension, its logical resource

        :param worker_context:
        :param silent:
        :returns: state
        """
        if self.resource.get_state(worker_context) == states.GONE:
            self.log.debug('%s driver reported its state is %s',
                           self.resource.RESOURCE_NAME, states.GONE)
            self.state = states.GONE
            return self.state

        if not self.instances:
            self.log.info(_LI('no backing instance(s), marking as %s'),
                          states.DOWN)
            self.state = states.DOWN
            return self.state
        elif self.instances.cluster_degraded is True:
            self.log.info(_LI(
                'instance cluster for resource %s reports degraded'),
                self.resource.id)
            self.state = states.DEGRADED
            return self.state

        has_ports, no_ports = self.instances.validate_ports()

        # ports_state=None means no instances have ports
        if not has_ports:
            self.log.debug('waiting for instance ports to be attached')
            self.state = states.BOOTING
            return self.state

        # XXX TODO need to account for when only a subset of the cluster have
        # correct ports, kick back to Replug

        alive, dead = self.instances.are_alive()
        if not alive:
            # alive checked failed on all instances for an already configured
            # resource, mark it down.
            # XXX need to track timeouts per instance
            # self._check_boot_timeout()

            if self.state == states.CONFIGURED:
                self.log.debug('No instance(s) alive, marking it as %s',
                               states.DOWN)
                self.state = states.DOWN
                return self.state
        elif dead:
            # some subset of instances reported not alive, mark it degraded.
            if self.state == states.CONFIGURED:
                for i in dead:
                    instance = worker_context.nova_client.get_instance_by_id(
                        i.id_)
                    if instance is None and self.state != states.ERROR:
                        self.log.info(
                            'Instance %s was found; rebooting', i.id_)
                    self.instances.delete(i)
            self.state = states.DEGRADED
            return self.state

        self.instances.refresh(worker_context)
        if self.state == states.CONFIGURED:
            for i in alive:
                if not i.booting and i not in self._boot_logged:
                    self.log.info(
                        '%s booted in %s seconds after %s attempts',
                        self.resource.RESOURCE_NAME,
                        i.time_since_boot.total_seconds(),
                        self._boot_counter.count)
                    self._boot_logged.append(i)
            self.reset_boot_counter()
        else:
            if alive:
                self.state = states.UP

        return self.state
예제 #56
0
파일: nova.py 프로젝트: Cloudxtreme/astara
 def __init__(self, client):
     super(PezInstanceProvider, self).__init__(client)
     self.rpc_client = pez_api.AstaraPezAPI(rpc_topic='astara-pez')
     LOG.debug(_LI('Initialized %s with rpc client %s'),
               self.__class__.__name__, self.rpc_client)
예제 #57
0
    def update_state(self, worker_context, silent=False):
        """Updates state of the instance and, by extension, its logical resource

        :param worker_context:
        :param silent:
        :returns: state
        """
        if self.resource.get_state(worker_context) == states.GONE:
            self.log.debug('%s driver reported its state is %s',
                           self.resource.RESOURCE_NAME, states.GONE)
            self.state = states.GONE
            return self.state

        if not self.instances:
            self.log.info(_LI('no backing instance(s), marking as %s'),
                          states.DOWN)
            self.state = states.DOWN
            return self.state
        elif self.instances.cluster_degraded is True:
            self.log.info(
                _LI('instance cluster for resource %s reports degraded'),
                self.resource.id)
            self.state = states.DEGRADED
            return self.state

        has_ports, no_ports = self.instances.validate_ports()

        # ports_state=None means no instances have ports
        if not has_ports:
            self.log.debug('waiting for instance ports to be attached')
            self.state = states.BOOTING
            return self.state

        # XXX TODO need to account for when only a subset of the cluster have
        # correct ports, kick back to Replug

        alive, dead = self.instances.are_alive()
        if not alive:
            # alive checked failed on all instances for an already configured
            # resource, mark it down.
            # XXX need to track timeouts per instance
            # self._check_boot_timeout()

            if self.state == states.CONFIGURED:
                self.log.debug('No instance(s) alive, marking it as %s',
                               states.DOWN)
                self.state = states.DOWN
                return self.state
        elif dead:
            # some subset of instances reported not alive, mark it degraded.
            if self.state == states.CONFIGURED:
                for i in dead:
                    instance = worker_context.nova_client.get_instance_by_id(
                        i.id_)
                    if instance is None and self.state != states.ERROR:
                        self.log.info('Instance %s was found; rebooting',
                                      i.id_)
                    self.instances.delete(i)
            self.state = states.DEGRADED
            return self.state

        self.instances.refresh(worker_context)
        if self.state == states.CONFIGURED:
            for i in alive:
                if not i.booting and i not in self._boot_logged:
                    self.log.info('%s booted in %s seconds after %s attempts',
                                  self.resource.RESOURCE_NAME,
                                  i.time_since_boot.total_seconds(),
                                  self._boot_counter.count)
                    self._boot_logged.append(i)
            self.reset_boot_counter()
        else:
            if alive:
                self.state = states.UP

        return self.state
예제 #58
0
 def delete_instance(self, instance_uuid):
     LOG.info(_LI('Deleting instance %s.'), instance_uuid)
     self.ctxt.nova_client.client.servers.delete(instance_uuid)
     self._delete_counters[instance_uuid] = timeutils.utcnow()
예제 #59
0
    def _dispatch_command(self, target, message):
        if not self._should_process_command(message):
            return

        instructions = message.body
        if instructions['command'] == commands.WORKERS_DEBUG:
            self.report_status()

        # NOTE(adam_g): Drop 'router-debug' compat in M.
        elif (instructions['command'] == commands.RESOURCE_DEBUG
              or instructions['command'] == commands.ROUTER_DEBUG):

            resource_id = (instructions.get('resource_id')
                           or instructions.get('router_id'))
            if not resource_id:
                LOG.warning(
                    _LW('Ignoring instruction to debug resource with no id'))
                return
            reason = instructions.get('reason')
            if resource_id in commands.WILDCARDS:
                LOG.warning(
                    _LW('Ignoring instruction to debug all resources with %r'),
                    resource_id)
            else:
                LOG.info(_LI('Placing resource %s in debug mode (reason: %s)'),
                         resource_id, reason)
                self.db_api.enable_resource_debug(resource_id, reason)

        elif (instructions['command'] == commands.RESOURCE_MANAGE
              or instructions['command'] == commands.ROUTER_MANAGE):
            resource_id = (instructions.get('resource_id')
                           or instructions.get('router_id'))
            if not resource_id:
                LOG.warning(
                    _LW('Ignoring instruction to manage resource with no id'))
                return
            try:
                self.db_api.disable_resource_debug(resource_id)
                LOG.info(_LI('Resuming management of resource %s'),
                         resource_id)
            except KeyError:
                pass
            try:
                self._resource_locks[resource_id].release()
                LOG.info(_LI('Unlocked resource %s'), resource_id)
            except KeyError:
                pass
            except threading.ThreadError:
                # Already unlocked, that's OK.
                pass

        elif instructions['command'] in EVENT_COMMANDS:
            resource_id = instructions.get('resource_id')
            sm = self._find_state_machine_by_resource_id(resource_id)
            if not sm:
                LOG.debug(
                    'Will not process command, no managed state machine '
                    'found for resource %s', resource_id)
                return
            new_res = event.Resource(id=resource_id,
                                     driver=sm.resource.RESOURCE_NAME,
                                     tenant_id=sm.tenant_id)
            new_msg = event.Event(
                resource=new_res,
                crud=EVENT_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_res)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'), instructions['command'],
                     new_res)

        # NOTE(adam_g): This is here to support the deprecated old format of
        #               sending commands to specific routers and can be
        #               removed once the CLI component is dropped in M.
        elif instructions['command'] in DEPRECATED_ROUTER_COMMANDS:
            new_rsc = event.Resource(
                driver=drivers.router.Router.RESOURCE_NAME,
                id=message.body.get('router_id'),
                tenant_id=message.body.get('tenant_id'),
            )
            new_msg = event.Event(
                resource=new_rsc,
                crud=DEPRECATED_ROUTER_COMMANDS[instructions['command']],
                body=instructions,
            )
            # Use handle_message() to ensure we acquire the lock
            LOG.info(_LI('sending %s instruction to %s'),
                     instructions['command'], new_rsc)
            self.handle_message(new_msg.resource.tenant_id, new_msg)
            LOG.info(_LI('forced %s for %s complete'), instructions['command'],
                     new_rsc)

        elif instructions['command'] == commands.TENANT_DEBUG:
            tenant_id = instructions['tenant_id']
            reason = instructions.get('reason')
            if tenant_id in commands.WILDCARDS:
                LOG.warning(
                    _LW('Ignoring instruction to debug all tenants with %r'),
                    tenant_id)
            else:
                LOG.info(_LI('Placing tenant %s in debug mode (reason: %s)'),
                         tenant_id, reason)
                self.db_api.enable_tenant_debug(tenant_id, reason)

        elif instructions['command'] == commands.TENANT_MANAGE:
            tenant_id = instructions['tenant_id']
            try:
                self.db_api.disable_tenant_debug(tenant_id)
                LOG.info(_LI('Resuming management of tenant %s'), tenant_id)
            except KeyError:
                pass

        elif instructions['command'] == commands.GLOBAL_DEBUG:
            enable = instructions.get('enabled')
            reason = instructions.get('reason')
            if enable == 1:
                LOG.info('Enabling global debug mode (reason: %s)', reason)
                self.db_api.enable_global_debug(reason)
            elif enable == 0:
                LOG.info('Disabling global debug mode')
                self.db_api.disable_global_debug()
            else:
                LOG.warning('Unrecognized global debug command: %s',
                            instructions)
        elif instructions['command'] == commands.CONFIG_RELOAD:
            try:
                cfg.CONF()
            except Exception:
                LOG.exception(_LE('Could not reload configuration'))
            else:
                cfg.CONF.log_opt_values(LOG, INFO)

        else:
            LOG.warning(_LW('Unrecognized command: %s'), instructions)