Ejemplo n.º 1
0
    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new WatcherService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/watcher.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{
            'name': 'watcher',
            'exclusive': False,
            'routing_key': 'jobs.watcher',
        }]
        # Store the last address seen for backoff
        self.last_address = None

        super().__init__(exchange_name,
                         connection_url,
                         queue_kwargs,
                         config_file=config_file)

        self.storage = StorageClient(self)
Ejemplo n.º 2
0
    def test_get_many(self):
        """
        Verify StorageClient.get_many works as expected
        """
        storage = StorageClient(mock.MagicMock())
        storage.bus_mixin.logger = mock.MagicMock()

        self.assertEqual(storage.get_many([]), [])
        storage.bus_mixin.request.assert_not_called()

        self.assertRaises(TypeError, storage.get_many,
                          [MINI_HOST, MINI_CLUSTER])
        storage.bus_mixin.request.assert_not_called()

        storage.bus_mixin.request.return_value = {
            'jsonrpc': '2.0',
            'id': ID,
            'result': [FULL_HOST_DICT, FULL_HOST_DICT]
        }
        input_list = [MINI_HOST, MINI_HOST]
        output_list = storage.get_many(input_list)
        storage.bus_mixin.request.assert_called_once_with(
            'storage.get',
            params={
                'model_type_name': MINI_HOST.__class__.__name__,
                'model_json_data': [x.to_dict() for x in input_list]
            })
        self.assertEqual([x.to_dict_safe() for x in output_list],
                         [FULL_HOST_DICT, FULL_HOST_DICT])
Ejemplo n.º 3
0
 def test_delete(self):
     """
     Verify StorageClient.delete with a valid model.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.delete(MINI_HOST)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.delete',
         params={
             'model_type_name': MINI_HOST.__class__.__name__,
             'model_json_data': MINI_HOST.to_dict()
         })
Ejemplo n.º 4
0
    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new ClusterExecService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/clusterexec.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{'routing_key': 'jobs.clusterexec.*'}]
        super().__init__(exchange_name, connection_url, queue_kwargs)
        self.storage = StorageClient(self)

        # Apply any logging configuration for this service.
        read_config_file(config_file, '/etc/commissaire/clusterexec.conf')
Ejemplo n.º 5
0
 def test_list(self):
     """
     Verify StorageClient.list returns valid models.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.return_value = {
         'jsonrpc': '2.0',
         'id': ID,
         'result': [FULL_HOST_DICT]
     }
     model = storage.list(Hosts)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.list', params={'model_type_name': 'Hosts'})
     self.assertIsInstance(model, Hosts)
     self.assertEqual(len(model.hosts), 1)
     self.assertIsInstance(model.hosts[0], Host)
     self.assertEqual(model.hosts[0].to_dict_safe(), FULL_HOST_DICT)
Ejemplo n.º 6
0
 def test_get(self):
     """
     Verify StorageClient.get with a valid model.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.return_value = {
         'jsonrpc': '2.0',
         'id': ID,
         'result': FULL_HOST_DICT
     }
     model = storage.get(MINI_HOST)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.get',
         params={
             'model_type_name': MINI_HOST.__class__.__name__,
             'model_json_data': MINI_HOST.to_dict()
         })
     self.assertIsInstance(model, Host)
     self.assertEqual(model.to_dict_safe(), FULL_HOST_DICT)
Ejemplo n.º 7
0
 def test_list_rpc_error(self):
     """
     Verify StorageClient.list re-raises RemoteProcedureCallError.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.side_effect = RemoteProcedureCallError(
         'test')
     self.assertRaises(RemoteProcedureCallError, storage.list, Hosts)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.list', params={'model_type_name': 'Hosts'})
Ejemplo n.º 8
0
    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new InvestigatorService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/investigator.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{'routing_key': 'jobs.investigate'}]

        super().__init__(exchange_name,
                         connection_url,
                         queue_kwargs,
                         config_file=config_file)

        self.storage = StorageClient(self)
Ejemplo n.º 9
0
 def test_save_many_invalid(self):
     """
     Verify StorageClient.save_many rejects an invalid model
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.side_effect = ValidationError('test')
     bad_host = Host.new(**FULL_HOST_DICT)
     bad_host.address = None
     self.assertRaises(ValidationError, storage.save_many, [bad_host])
     storage.bus_mixin.request.assert_not_called()
Ejemplo n.º 10
0
    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new ContainerManagerService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/containermgr.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{
            'name': 'containermgr',
            'routing_key': 'container.*',
            'exclusive': False,
        }]
        super().__init__(exchange_name, connection_url, queue_kwargs)
        self.storage = StorageClient(self)
        self.managers = {}

        # Apply any logging configuration for this service.
        read_config_file(config_file, '/etc/commissaire/containermgr.conf')
Ejemplo n.º 11
0
 def test_get_rpc_error(self):
     """
     Verify StorageClient.get re-raises RemoteProcedureCallError.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.side_effect = RemoteProcedureCallError(
         'test')
     self.assertRaises(RemoteProcedureCallError, storage.get, MINI_HOST)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.get',
         params={
             'model_type_name': MINI_HOST.__class__.__name__,
             'model_json_data': MINI_HOST.to_dict()
         })
Ejemplo n.º 12
0
 def test_list_invalid(self):
     """
     Verify StorageClient.list rejects an invalid list element.
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.return_value = {
         'jsonrpc': '2.0',
         'id': ID,
         'result': [{
             'address': None
         }]
     }
     self.assertRaises(ValidationError, storage.list, Hosts)
     storage.bus_mixin.request.assert_called_once_with(
         'storage.list', params={'model_type_name': 'Hosts'})
Ejemplo n.º 13
0
 def test_save_many_rpc_error(self):
     """
     Verify StorageClient.save_many re-raises RemoteProcedureCallError
     """
     storage = StorageClient(mock.MagicMock())
     storage.bus_mixin.logger = mock.MagicMock()
     storage.bus_mixin.request.side_effect = RemoteProcedureCallError(
         'test')
     self.assertRaises(RemoteProcedureCallError, storage.save_many,
                       [FULL_HOST])
     storage.bus_mixin.request.assert_called_once_with(
         'storage.save',
         params={
             'model_type_name': FULL_HOST.__class__.__name__,
             'model_json_data': [FULL_HOST.to_dict()]
         })
Ejemplo n.º 14
0
    def __init__(self, exchange_name, connection_url, qkwargs):
        """
        Initializes a new Bus instance.

        :param exchange_name: Name of the topic exchange.
        :type exchange_name: str
        :param connection_url: Kombu connection url.
        :type connection_url: str
        :param qkwargs: One or more dicts keyword arguments for queue creation
        :type qkwargs: list
        """
        self.logger = logging.getLogger('Bus')
        self.logger.debug('Initializing bus connection')
        self.connection = None
        self._channel = None
        self._exchange = None
        self.exchange_name = exchange_name
        self.connection_url = connection_url
        self.qkwargs = qkwargs
        self.storage = StorageClient(self)
Ejemplo n.º 15
0
    def test_delete_many(self):
        """
        Verify StorageClient.delete_many works as expected
        """
        storage = StorageClient(mock.MagicMock())
        storage.bus_mixin.logger = mock.MagicMock()

        storage.delete_many([])
        storage.bus_mixin.request.assert_not_called()

        self.assertRaises(TypeError, storage.delete_many,
                          [MINI_HOST, MINI_CLUSTER])
        storage.bus_mixin.request.assert_not_called()

        input_list = [MINI_HOST, MINI_HOST]
        storage.delete_many(input_list)
        storage.bus_mixin.request.assert_called_once_with(
            'storage.delete',
            params={
                'model_type_name': MINI_HOST.__class__.__name__,
                'model_json_data': [x.to_dict() for x in input_list]
            })
Ejemplo n.º 16
0
    def test_register_callback(self):
        """
        Verify StorageClient.register_callback routing keys.
        """
        storage = StorageClient(mock.MagicMock())

        # Verify entry per unique routing key.
        storage.register_callback(mock.MagicMock(), Host, NOTIFY_EVENT_CREATED)
        storage.register_callback(mock.MagicMock(), model_type=Host)
        storage.register_callback(mock.MagicMock(), event=NOTIFY_EVENT_CREATED)
        storage.register_callback(mock.MagicMock())
        self.assertIn('notify.storage.Host.created', storage.notify_callbacks)
        self.assertIn('notify.storage.Host.*', storage.notify_callbacks)
        self.assertIn('notify.storage.*.created', storage.notify_callbacks)
        self.assertIn('notify.storage.*.*', storage.notify_callbacks)
        self.assertEquals(len(storage.notify_callbacks), 4)

        # Verify callbacks with identical routing keys are queued.
        storage.register_callback(mock.MagicMock())
        self.assertEquals(len(storage.notify_callbacks), 4)
        callbacks = storage.notify_callbacks['notify.storage.*.*']
        self.assertEquals(len(callbacks), 2)
Ejemplo n.º 17
0
class ContainerManagerService(CommissaireService):
    """
    Provides access to Container Managers.
    """

    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new ContainerManagerService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/containermgr.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{
            'name': 'containermgr',
            'routing_key': 'container.*',
            'exclusive': False,
        }]
        super().__init__(exchange_name, connection_url, queue_kwargs)
        self.storage = StorageClient(self)
        self.managers = {}

        # Apply any logging configuration for this service.
        read_config_file(config_file, '/etc/commissaire/containermgr.conf')

    def refresh_managers(self):
        """
        Fetches all ContainerManagerConfig records from the storage service,
        and instantiates the corresponding container manager plugins.

        This tries to reuse compatible container manager instances from
        previous calls to try and preserve any internal state.

        :raises ConfigurationError: on an invalid ContainerManagerConfig
        """
        current_managers = {}
        container = self.storage.list(models.ContainerManagerConfigs)
        for config in container.container_managers:
            # This will raise ConfigurationError if the import fails.
            manager_type = import_plugin(
                config.type, 'commissaire.containermgr', ContainerManagerBase)
            manager = self.managers.pop(config.name, None)
            if isinstance(manager, manager_type):
                # If there's already a compatible manager, reuse it.
                # XXX Manager instances may not keep their option
                #     dictionary so we can't detect option changes.
                current_managers[config.name] = manager
            else:
                current_managers[config.name] = manager_type(config.options)
        self.managers = current_managers

    def on_node_registered(self, message, container_manager_name, address):
        """
        Checks if a node is registered to a specific container manager.
        Raises ContainerManagerError if the node is NOT registered.

        :param message: A message instance
        :type message: kombu.message.Message
        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :param address: Address of the node
        :type address: str
        :raises: commissaire.bus.ContainerManagerError
        """
        self._node_operation(
            container_manager_name, 'node_registered', address)

    def on_register_node(self, message, container_manager_name, address):
        """
        Registers a node to a container manager.

        :param message: A message instance
        :type message: kombu.message.Message
        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :param address: Address of the node
        :type address: str
        :raises: commissaire.bus.ContainerManagerError
        """
        self._node_operation(
            container_manager_name, 'register_node', address)

    def on_remove_node(self, message, container_manager_name, address):
        """
        Removes a node from a container manager.

        :param message: A message instance
        :type message: kombu.message.Message
        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :param address: Address of the node
        :type address: str
        :raises: commissaire.bus.ContainerManagerError
        """
        self._node_operation(
            container_manager_name, 'remove_node', address)

    def on_remove_all_nodes(self, message, container_manager_name):
        """
        Removes all nodes from a container manager.

        :param message: A message instance
        :type message: kombu.message.Message
        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :raises: commissaire.bus.ContainerManagerError
        """
        self._node_operation(container_manager_name, 'remove_all_nodes')

    def _node_operation(self, container_manager_name, method, *args):
        """
        Common code for getting node information.

        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :param method: The containermgr method to call.
        :type method: str
        :param args: Additional arguments for the containermgr method.
        :type args: tuple
        :raises: commissaire.bus.ContainerManagerError
        """
        try:
            self.refresh_managers()
            container_manager = self.managers[container_manager_name]

            result = getattr(container_manager, method).__call__(*args)

            self.logger.info(
                '{}{} called via the container manager "{}"'.format(
                    method, args, container_manager_name))

            # Most operations lack a return statement.
            if result is not None:
                self.logger.debug('Result: {}'.format(result))
                return result

        except ContainerManagerError as error:
            self.logger.info('{} raised ContainerManagerError: {}'.format(
                container_manager_name, error))
            raise error
        except KeyError as error:
            self.logger.error('Container manager "{}" does not exist.'.format(
                container_manager_name))
            raise error
        except Exception as error:
            self.logger.error(
                'Unexpected error while attempting {}{} with '
                'container manager "{}". {}: {}'.format(
                    method, args, container_manager_name,
                    error.__class__.__name__, error))
            raise error

    def on_get_node_status(self, message, container_manager_name, address):
        """
        Gets a nodes status from the container manager.

        :param message: A message instance
        :type message: kombu.message.Message
        :param container_manager_name: Name of the container manager to use.
        :type container_manager_name: str
        :param address: Address of the node
        :type address: str
        :returns: Status of the node according to the container manager.
        :rtype: dict
        :raises: commissaire.bus.ContainerManagerError
        """
        return self._node_operation(
            container_manager_name, 'get_node_status', address)
Ejemplo n.º 18
0
class InvestigatorService(CommissaireService):
    """
    Investigates new hosts to retrieve and store facts.
    """

    #: Default configuration file
    _default_config_file = '/etc/commissaire/investigator.conf'

    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new InvestigatorService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/investigator.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{'routing_key': 'jobs.investigate'}]

        super().__init__(exchange_name,
                         connection_url,
                         queue_kwargs,
                         config_file=config_file)

        self.storage = StorageClient(self)

    def _get_etcd_config(self):
        """
        Extracts etcd configuration from a registered store handler.
        If no matching handler is found, return defaults for required
        values.

        :returns: A dictionary of configuration values
        :rtype: dict
        """
        response = self.request('storage.list_store_handlers')
        for handler in response.get('result', []):
            if handler['handler_type'] == 'EtcdStoreHandler':
                return handler['config']

        raise ConfigurationError(
            'Configuration is missing an EtcdStoreHandler')

    def _get_cluster_and_network_models(self, cluster_data):
        """
        Creates cluster and network models from the given cluster data.

        :param cluster_data: Data for a cluster
        :type cluster_data: dict
        :returns: a Cluster and Network model
        :rtype: tuple
        """
        try:
            cluster = Cluster.new(**cluster_data)
            network = self.storage.get_network(cluster.network)
        except TypeError:
            cluster = None
            network = Network.new(**C.DEFAULT_CLUSTER_NETWORK_JSON)

        return cluster, network

    def on_investigate(self, message, address, cluster_data={}):
        """
        Initiates an investigation of the requested host.

        :param message: A message instance
        :type message: kombu.message.Message
        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('{} is now in investigating.'.format(address))
        self.logger.debug('Investigating: {}'.format(address))
        if cluster_data:
            self.logger.debug('Related cluster: {}'.format(cluster_data))

        host = self.storage.get_host(address)
        host_creds = self.storage.get(HostCreds.new(address=host.address))
        transport = ansibleapi.Transport(host.remote_user)

        key = TemporarySSHKey(host_creds, self.logger)
        try:
            key.create()
        except Exception as error:
            self.logger.warn('Unable to continue for {} due to '
                             '{}: {}. Returning...'.format(
                                 address, type(error), error))
            raise error

        try:
            facts = transport.get_info(address, key.path)
            # recreate the host instance with new data
            data = json.loads(host.to_json())
            data.update(facts)
            host = Host.new(**data)
            host.last_check = formatted_dt()
            host.status = C.HOST_STATUS_BOOTSTRAPPING
            self.logger.info('Facts for {} retrieved'.format(address))
            self.logger.debug('Data: {}'.format(host.to_json()))
        except Exception as error:
            self.logger.warn('Getting info failed for {}: {}'.format(
                address, str(error)))
            host.status = C.HOST_STATUS_FAILED
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        self.logger.info(
            'Finished and stored investigation data for {}'.format(address))
        self.logger.debug('Finished investigation update for {}: {}'.format(
            address, host.to_json()))

        self.logger.info('{} is now in bootstrapping'.format(address))
        oscmd = get_oscmd(host.os)
        try:
            etcd_config = self._get_etcd_config()
            cluster, network = self._get_cluster_and_network_models(
                cluster_data)

            container_manager = None
            if cluster:
                if cluster.container_manager:
                    container_manager = cluster.container_manager
                    self.logger.info(
                        'Using cluster "{}" managed by "{}"'.format(
                            cluster.name, container_manager))
                else:
                    self.logger.info('Using unmanaged cluster "{}"'.format(
                        cluster.name))

            self.logger.info('Using network "{}" of type "{}"'.format(
                network.name, network.type))
            transport.bootstrap(address, key.path, oscmd, etcd_config, network)
            host.status = C.HOST_STATUS_DISASSOCIATED
        except Exception as error:
            self.logger.warn('Unable to start bootstraping for {}: {}'.format(
                address, str(error)))
            host.status = C.HOST_STATUS_FAILED
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        # Register with container manager (if applicable).
        try:
            if container_manager:
                self.request('container.register_node', container_manager,
                             address)
                host.status = C.HOST_STATUS_ACTIVE
        except Exception as error:
            self.logger.warn(
                'Unable to register {} to container manager "{}": {}'.format(
                    address, container_manager, error.args[0]))
            key.remove()
            raise error
        finally:
            # Save the updated host model.
            self.storage.save(host)

        self.logger.info('Finished bootstrapping for {}'.format(address))
        self.logger.debug('Finished bootstrapping for {}: {}'.format(
            address, host.to_json()))

        # XXX TEMPORARILY DISABLED
        # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow()))

        key.remove()

        return host.to_json()
Ejemplo n.º 19
0
class ClusterExecService(CommissaireService):
    """
    Executes operations over a cluster by way of remote shell commands.
    """
    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new ClusterExecService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/clusterexec.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{'routing_key': 'jobs.clusterexec.*'}]
        super().__init__(exchange_name, connection_url, queue_kwargs)
        self.storage = StorageClient(self)

        # Apply any logging configuration for this service.
        read_config_file(config_file, '/etc/commissaire/clusterexec.conf')

    def _execute(self, message, model_instance, command_args,
                 finished_hosts_key):
        """
        Remotely executes OS-specific shell commands across a cluster.

        :param message: A message instance
        :type message: kombu.message.Message
        :param model_instance: Initial model for the async operation
        :type model_instance: commissaire.models.Model
        :param command_args: Command name + arguments as a tuple
        :type command_args: tuple
        :param finished_hosts_key: Model attribute name for finished hosts
        :type finished_hosts_key: str
        """
        # Split out the command name.
        command_name = command_args[0]
        command_args = command_args[1:]

        end_status = 'finished'

        # XXX We assume the model instance names a cluster.
        #     Note, cluster_name is used in the except clause,
        #     so it must be reliably defined.
        cluster_name = getattr(model_instance, 'name', None)

        try:
            assert cluster_name is not None
            model_json_data = model_instance.to_dict()

            # Set the initial status in the store.
            self.logger.info('Setting initial status.')
            self.logger.debug('Status={}'.format(model_json_data))
            self.storage.save(model_instance)

            # Respond to the caller with the initial status.
            if message.properties.get('reply_to'):
                # XXX Have to dig up the message ID again.
                #     CommissaireService.on_message() already
                #     does this, but doesn't pass it to us.
                body = message.body
                if isinstance(body, bytes):
                    body = json.loads(body.decode())
                self.respond(message.properties['reply_to'],
                             body.get('id', -1), model_json_data)
        except Exception as error:
            self.logger.error(
                'Unable to save initial state for "{}" clusterexec due to '
                '{}: {}'.format(cluster_name, type(error), error))
            raise error

        # Collect all host addresses in the cluster.

        cluster = self.storage.get_cluster(cluster_name)

        n_hosts = len(cluster.hostset)
        if n_hosts:
            self.logger.debug('{} hosts in cluster "{}"'.format(
                n_hosts, cluster_name))
        else:
            self.logger.warn('No hosts in cluster "{}"'.format(cluster_name))

        for address in cluster.hostset:
            host = self.storage.get_host(address)

            oscmd = get_oscmd(host.os)

            # os_command is only used for logging
            os_command = getattr(oscmd, command_name)(*command_args)
            self.logger.info('Executing {} on {}...'.format(
                os_command, host.address))

            model_instance.in_process.append(host.address)
            self.storage.save(model_instance)

            with TemporarySSHKey(host, self.logger) as key:
                try:
                    transport = ansibleapi.Transport(host.remote_user)
                    method = getattr(transport, command_name)
                    method(host.address, key.path, oscmd, command_args)
                except Exception as error:
                    # If there was a failure, set the end_status and break.
                    end_status = C.HOST_STATUS_FAILED
                    self.logger.error(
                        'Clusterexec {} for {} failed: {}: {}'.format(
                            command_name, host.address, type(error), error))
                    break

            # Set the finished hosts.
            finished_hosts = getattr(model_instance, finished_hosts_key)
            finished_hosts.append(host.address)
            try:
                index = model_instance.in_process.index(host.address)
                model_instance.in_process.pop(index)
            except ValueError:
                self.logger.warn('Host {} was not in_process for {} {}'.format(
                    host.address, command_name, cluster_name))
            self.storage.save(model_instance)

            self.logger.info('Finished executing {} for {} in {}'.format(
                command_name, host.address, cluster_name))

        # Final set of command result.

        model_instance.finished_at = formatted_dt()
        model_instance.status = end_status

        self.logger.info('Cluster {} final {} status: {}'.format(
            cluster_name, command_name, model_instance.to_json()))

        self.storage.save(model_instance)

    def on_upgrade(self, message, cluster_name):
        """
        Executes an upgrade command on hosts across a cluster.

        :param message: A message instance
        :type message: kombu.message.Message
        :param cluster_name: The name of a cluster
        :type cluster_name: str
        """
        self.logger.info(
            'Received message: Upgrade cluster "{}"'.format(cluster_name))
        command_args = ('upgrade', )
        model_instance = ClusterUpgrade.new(name=cluster_name,
                                            status='in_process',
                                            started_at=formatted_dt(),
                                            upgraded=[],
                                            in_process=[])
        self._execute(message, model_instance, command_args, 'upgraded')

    def on_restart(self, message, cluster_name):
        """
        Executes a restart command on hosts across a cluster.

        :param message: A message instance
        :type message: kombu.message.Message
        :param cluster_name: The name of a cluster
        :type cluster_name: str
        """
        self.logger.info(
            'Received message: Restart cluster "{}"'.format(cluster_name))
        command_args = ('restart', )
        model_instance = ClusterRestart.new(name=cluster_name,
                                            status='in_process',
                                            started_at=formatted_dt(),
                                            restarted=[],
                                            in_process=[])
        self._execute(message, model_instance, command_args, 'restarted')

    def on_deploy(self, message, cluster_name, version):
        """
        Executes a deploy command on atomic hosts across a cluster.

        :param message: A message instance
        :type message: kombu.message.Message
        :param cluster_name: The name of a cluster
        :type cluster_name: str
        :param version: The tree image version to deploy
        :type version: str
        """
        self.logger.info(
            'Received message: Deploy version "{}" on cluster "{}"'.format(
                version, cluster_name))
        command_args = ('deploy', version)
        model_instance = ClusterDeploy.new(name=cluster_name,
                                           status='in_process',
                                           started_at=formatted_dt(),
                                           version=version,
                                           deployed=[],
                                           in_process=[])
        self._execute(message, model_instance, command_args, 'deployed')
Ejemplo n.º 20
0
class WatcherService(CommissaireService):
    """
    Periodically connects to hosts to check their status.
    """

    def __init__(self, exchange_name, connection_url, config_file=None):
        """
        Creates a new WatcherService.  If config_file is omitted,
        it will try the default location (/etc/commissaire/watcher.conf).

        :param exchange_name: Name of the topic exchange
        :type exchange_name: str
        :param connection_url: Kombu connection URL
        :type connection_url: str
        :param config_file: Optional configuration file path
        :type config_file: str or None
        """
        queue_kwargs = [{
            'name': 'watcher',
            'exclusive': False,
            'routing_key': 'jobs.watcher',
        }]
        # Store the last address seen for backoff
        self.last_address = None
        super().__init__(exchange_name, connection_url, queue_kwargs)
        self.storage = StorageClient(self)

        # Apply any logging configuration for this service.
        read_config_file(config_file, '/etc/commissaire/watcher.conf')

    def on_message(self, body, message):
        """
        Called when a non-jsonrpc message arrives.

        :param body: Body of the message.
        :type body: dict
        :param message: The message instance.
        :type message: kombu.message.Message
        """
        record = WatcherRecord(**json.loads(body))
        # Ack the message so it does not requeue on it's own
        message.ack()
        self.logger.debug(
            'Checking on WatcherQueue item: {}'.format(record.to_json()))
        if datetime.strptime(record.last_check, C.DATE_FORMAT) < (
                datetime.utcnow() - timedelta(minutes=1)):
            try:
                self._check(record.address)
            except Exception as error:
                self.logger.debug('Error: {}: {}'.format(type(error), error))
            record.last_check = formatted_dt()
        else:
            if self.last_address == record.address:
                # Since we got the same address we could process twice
                # back off a little extra
                self.logger.debug(
                    'Got "{}" twice. Backing off...'.format(record.address))
                sleep(10)
            else:
                # Since the top item wasn't ready for processing sleep a bit
                sleep(2)
        self.last_address = record.address
        # Requeue the host
        self.producer.publish(record.to_json(), 'jobs.watcher')

    def _check(self, address):
        """
        Initiates an check on the requested host.

        :param address: Host address to investigate
        :type address: str
        :param cluster_data: Optional data for the associated cluster
        :type cluster_data: dict
        """
        # Statuses follow:
        # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses

        self.logger.info('Checking host "{}".'.format(address))

        host = self.storage.get_host(address)
        transport = ansibleapi.Transport(host.remote_user)

        with TemporarySSHKey(host, self.logger) as key:
            try:
                self.logger.debug(
                    'Starting watcher run for host "{}"'.format(address))
                result = transport.check_host_availability(host, key.path)
                host.last_check = formatted_dt()
                self.logger.debug(
                    'Watcher result for host {}: {}'.format(address, result))
            except Exception as error:
                self.logger.warn(
                    'Failed to connect to host node "{}"'.format(address))
                self.logger.debug(
                    'Watcher failed for host node "{}" with {}: {}'.format(
                        address, str(error), error))
                host.status = C.HOST_STATUS_FAILED
                raise error
            finally:
                # Save the model
                self.storage.save(host)
            self.logger.info(
                'Finished watcher run for host "{}"'.format(address))