def test_get(self): """ Verify StorageClient.get with a valid model. """ storage = StorageClient(mock.MagicMock()) storage.bus_mixin.logger = mock.MagicMock() storage.bus_mixin.request.return_value = { 'jsonrpc': '2.0', 'id': ID, 'result': FULL_HOST_DICT } model = storage.get(MINI_HOST) storage.bus_mixin.request.assert_called_once_with( 'storage.get', params={ 'model_type_name': MINI_HOST.__class__.__name__, 'model_json_data': MINI_HOST.to_dict() }) self.assertIsInstance(model, Host) self.assertEqual(model.to_dict_safe(), FULL_HOST_DICT)
class InvestigatorService(CommissaireService): """ Investigates new hosts to retrieve and store facts. """ #: Default configuration file _default_config_file = '/etc/commissaire/investigator.conf' def __init__(self, exchange_name, connection_url, config_file=None): """ Creates a new InvestigatorService. If config_file is omitted, it will try the default location (/etc/commissaire/investigator.conf). :param exchange_name: Name of the topic exchange :type exchange_name: str :param connection_url: Kombu connection URL :type connection_url: str :param config_file: Optional configuration file path :type config_file: str or None """ queue_kwargs = [{'routing_key': 'jobs.investigate'}] super().__init__(exchange_name, connection_url, queue_kwargs, config_file=config_file) self.storage = StorageClient(self) def _get_etcd_config(self): """ Extracts etcd configuration from a registered store handler. If no matching handler is found, return defaults for required values. :returns: A dictionary of configuration values :rtype: dict """ response = self.request('storage.list_store_handlers') for handler in response.get('result', []): if handler['handler_type'] == 'EtcdStoreHandler': return handler['config'] raise ConfigurationError( 'Configuration is missing an EtcdStoreHandler') def _get_cluster_and_network_models(self, cluster_data): """ Creates cluster and network models from the given cluster data. :param cluster_data: Data for a cluster :type cluster_data: dict :returns: a Cluster and Network model :rtype: tuple """ try: cluster = Cluster.new(**cluster_data) network = self.storage.get_network(cluster.network) except TypeError: cluster = None network = Network.new(**C.DEFAULT_CLUSTER_NETWORK_JSON) return cluster, network def on_investigate(self, message, address, cluster_data={}): """ Initiates an investigation of the requested host. :param message: A message instance :type message: kombu.message.Message :param address: Host address to investigate :type address: str :param cluster_data: Optional data for the associated cluster :type cluster_data: dict """ # Statuses follow: # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses self.logger.info('{} is now in investigating.'.format(address)) self.logger.debug('Investigating: {}'.format(address)) if cluster_data: self.logger.debug('Related cluster: {}'.format(cluster_data)) host = self.storage.get_host(address) host_creds = self.storage.get(HostCreds.new(address=host.address)) transport = ansibleapi.Transport(host.remote_user) key = TemporarySSHKey(host_creds, self.logger) try: key.create() except Exception as error: self.logger.warn('Unable to continue for {} due to ' '{}: {}. Returning...'.format( address, type(error), error)) raise error try: facts = transport.get_info(address, key.path) # recreate the host instance with new data data = json.loads(host.to_json()) data.update(facts) host = Host.new(**data) host.last_check = formatted_dt() host.status = C.HOST_STATUS_BOOTSTRAPPING self.logger.info('Facts for {} retrieved'.format(address)) self.logger.debug('Data: {}'.format(host.to_json())) except Exception as error: self.logger.warn('Getting info failed for {}: {}'.format( address, str(error))) host.status = C.HOST_STATUS_FAILED key.remove() raise error finally: # Save the updated host model. self.storage.save(host) self.logger.info( 'Finished and stored investigation data for {}'.format(address)) self.logger.debug('Finished investigation update for {}: {}'.format( address, host.to_json())) self.logger.info('{} is now in bootstrapping'.format(address)) oscmd = get_oscmd(host.os) try: etcd_config = self._get_etcd_config() cluster, network = self._get_cluster_and_network_models( cluster_data) container_manager = None if cluster: if cluster.container_manager: container_manager = cluster.container_manager self.logger.info( 'Using cluster "{}" managed by "{}"'.format( cluster.name, container_manager)) else: self.logger.info('Using unmanaged cluster "{}"'.format( cluster.name)) self.logger.info('Using network "{}" of type "{}"'.format( network.name, network.type)) transport.bootstrap(address, key.path, oscmd, etcd_config, network) host.status = C.HOST_STATUS_DISASSOCIATED except Exception as error: self.logger.warn('Unable to start bootstraping for {}: {}'.format( address, str(error))) host.status = C.HOST_STATUS_FAILED key.remove() raise error finally: # Save the updated host model. self.storage.save(host) # Register with container manager (if applicable). try: if container_manager: self.request('container.register_node', container_manager, address) host.status = C.HOST_STATUS_ACTIVE except Exception as error: self.logger.warn( 'Unable to register {} to container manager "{}": {}'.format( address, container_manager, error.args[0])) key.remove() raise error finally: # Save the updated host model. self.storage.save(host) self.logger.info('Finished bootstrapping for {}'.format(address)) self.logger.debug('Finished bootstrapping for {}: {}'.format( address, host.to_json())) # XXX TEMPORARILY DISABLED # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow())) key.remove() return host.to_json()
class WatcherService(CommissaireService): """ Periodically connects to hosts to check their status. """ #: Default configuration file _default_config_file = '/etc/commissaire/watcher.conf' def __init__(self, exchange_name, connection_url, config_file=None): """ Creates a new WatcherService. If config_file is omitted, it will try the default location (/etc/commissaire/watcher.conf). :param exchange_name: Name of the topic exchange :type exchange_name: str :param connection_url: Kombu connection URL :type connection_url: str :param config_file: Optional configuration file path :type config_file: str or None """ queue_kwargs = [{ 'name': 'watcher', 'exclusive': False, 'routing_key': 'jobs.watcher', }] # Store the last address seen for backoff self.last_address = None super().__init__(exchange_name, connection_url, queue_kwargs, config_file=config_file) self.storage = StorageClient(self) def on_message(self, body, message): """ Called when a non-jsonrpc message arrives. :param body: Body of the message. :type body: dict :param message: The message instance. :type message: kombu.message.Message """ record = WatcherRecord(**json.loads(body)) # Ack the message so it does not requeue on it's own message.ack() self.logger.debug('Checking on WatcherQueue item: {}'.format( record.to_json())) if datetime.strptime( record.last_check, C.DATE_FORMAT) < (datetime.utcnow() - timedelta(minutes=1)): try: self._check(record.address) except Exception as error: self.logger.debug('Error: {}: {}'.format(type(error), error)) record.last_check = formatted_dt() else: if self.last_address == record.address: # Since we got the same address we could process twice # back off a little extra self.logger.debug('Got "{}" twice. Backing off...'.format( record.address)) sleep(10) else: # Since the top item wasn't ready for processing sleep a bit sleep(2) self.last_address = record.address # Requeue the host self.producer.publish(record.to_json(), 'jobs.watcher') def _check(self, address): """ Initiates an check on the requested host. :param address: Host address to investigate :type address: str :param cluster_data: Optional data for the associated cluster :type cluster_data: dict """ # Statuses follow: # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses self.logger.info('Checking host "{}".'.format(address)) host = self.storage.get_host(address) host_creds = self.storage.get(HostCreds.new(address=host.address)) transport = ansibleapi.Transport(host_creds.remote_user) with TemporarySSHKey(host_creds, self.logger) as key: try: self.logger.debug( 'Starting watcher run for host "{}"'.format(address)) result = transport.check_host_availability(host, key.path) host.last_check = formatted_dt() self.logger.debug('Watcher result for host {}: {}'.format( address, result)) except Exception as error: self.logger.warn( 'Failed to connect to host node "{}"'.format(address)) self.logger.debug( 'Watcher failed for host node "{}" with {}: {}'.format( address, str(error), error)) host.status = C.HOST_STATUS_FAILED raise error finally: # Save the model self.storage.save(host) self.logger.info( 'Finished watcher run for host "{}"'.format(address))
class ClusterExecService(CommissaireService): """ Executes operations over a cluster by way of remote shell commands. """ #: Default configuration file _default_config_file = '/etc/commissaire/clusterexec.conf' def __init__(self, exchange_name, connection_url, config_file=None): """ Creates a new ClusterExecService. If config_file is omitted, it will try the default location (/etc/commissaire/clusterexec.conf). :param exchange_name: Name of the topic exchange :type exchange_name: str :param connection_url: Kombu connection URL :type connection_url: str :param config_file: Optional configuration file path :type config_file: str or None """ queue_kwargs = [{'routing_key': 'jobs.clusterexec.*'}] super().__init__(exchange_name, connection_url, queue_kwargs, config_file=config_file) self.storage = StorageClient(self) def _execute(self, message, model_instance, command_args, finished_hosts_key): """ Remotely executes OS-specific shell commands across a cluster. :param message: A message instance :type message: kombu.message.Message :param model_instance: Initial model for the async operation :type model_instance: commissaire.models.Model :param command_args: Command name + arguments as a tuple :type command_args: tuple :param finished_hosts_key: Model attribute name for finished hosts :type finished_hosts_key: str """ # Split out the command name. command_name = command_args[0] command_args = command_args[1:] end_status = 'finished' # XXX We assume the model instance names a cluster. # Note, cluster_name is used in the except clause, # so it must be reliably defined. cluster_name = getattr(model_instance, 'name', None) try: assert cluster_name is not None model_json_data = model_instance.to_dict() # Set the initial status in the store. self.logger.info('Setting initial status.') self.logger.debug('Status={}'.format(model_json_data)) self.storage.save(model_instance) # Respond to the caller with the initial status. if message.properties.get('reply_to'): # XXX Have to dig up the message ID again. # CommissaireService.on_message() already # does this, but doesn't pass it to us. body = message.body if isinstance(body, bytes): body = json.loads(body.decode()) self.respond(message.properties['reply_to'], body.get('id', -1), model_json_data) except Exception as error: self.logger.error( 'Unable to save initial state for "{}" clusterexec due to ' '{}: {}'.format(cluster_name, type(error), error)) raise error # Collect all host addresses in the cluster. cluster = self.storage.get_cluster(cluster_name) n_hosts = len(cluster.hostset) if n_hosts: self.logger.debug('{} hosts in cluster "{}"'.format( n_hosts, cluster_name)) else: self.logger.warn('No hosts in cluster "{}"'.format(cluster_name)) for address in cluster.hostset: # Get initial data host = self.storage.get_host(address) host_creds = self.storage.get(HostCreds.new(address=host.address)) oscmd = get_oscmd(host.os) # os_command is only used for logging os_command = getattr(oscmd, command_name)(*command_args) self.logger.info('Executing {} on {}...'.format( os_command, host.address)) model_instance.in_process.append(host.address) self.storage.save(model_instance) with TemporarySSHKey(host_creds, self.logger) as key: try: transport = ansibleapi.Transport(host_creds.remote_user) method = getattr(transport, command_name) method(host.address, key.path, oscmd, command_args) except Exception as error: # If there was a failure, set the end_status and break. end_status = C.HOST_STATUS_FAILED self.logger.error( 'Clusterexec {} for {} failed: {}: {}'.format( command_name, host.address, type(error), error)) break # Set the finished hosts. finished_hosts = getattr(model_instance, finished_hosts_key) finished_hosts.append(host.address) try: index = model_instance.in_process.index(host.address) model_instance.in_process.pop(index) except ValueError: self.logger.warn('Host {} was not in_process for {} {}'.format( host.address, command_name, cluster_name)) self.storage.save(model_instance) self.logger.info('Finished executing {} for {} in {}'.format( command_name, host.address, cluster_name)) # Final set of command result. model_instance.finished_at = formatted_dt() model_instance.status = end_status self.logger.info('Cluster {} final {} status: {}'.format( cluster_name, command_name, model_instance.to_json())) self.storage.save(model_instance) def on_upgrade(self, message, cluster_name): """ Executes an upgrade command on hosts across a cluster. :param message: A message instance :type message: kombu.message.Message :param cluster_name: The name of a cluster :type cluster_name: str """ self.logger.info( 'Received message: Upgrade cluster "{}"'.format(cluster_name)) command_args = ('upgrade', ) model_instance = ClusterUpgrade.new(name=cluster_name, status='in_process', started_at=formatted_dt(), upgraded=[], in_process=[]) self._execute(message, model_instance, command_args, 'upgraded') def on_restart(self, message, cluster_name): """ Executes a restart command on hosts across a cluster. :param message: A message instance :type message: kombu.message.Message :param cluster_name: The name of a cluster :type cluster_name: str """ self.logger.info( 'Received message: Restart cluster "{}"'.format(cluster_name)) command_args = ('restart', ) model_instance = ClusterRestart.new(name=cluster_name, status='in_process', started_at=formatted_dt(), restarted=[], in_process=[]) self._execute(message, model_instance, command_args, 'restarted') def on_deploy(self, message, cluster_name, version): """ Executes a deploy command on atomic hosts across a cluster. :param message: A message instance :type message: kombu.message.Message :param cluster_name: The name of a cluster :type cluster_name: str :param version: The tree image version to deploy :type version: str """ self.logger.info( 'Received message: Deploy version "{}" on cluster "{}"'.format( version, cluster_name)) command_args = ('deploy', version) model_instance = ClusterDeploy.new(name=cluster_name, status='in_process', started_at=formatted_dt(), version=version, deployed=[], in_process=[]) self._execute(message, model_instance, command_args, 'deployed')