def _execute(self, message, model_instance, command_args, finished_hosts_key): """ Remotely executes OS-specific shell commands across a cluster. :param message: A message instance :type message: kombu.message.Message :param model_instance: Initial model for the async operation :type model_instance: commissaire.models.Model :param command_args: Command name + arguments as a tuple :type command_args: tuple :param finished_hosts_key: Model attribute name for finished hosts :type finished_hosts_key: str """ # Split out the command name. command_name = command_args[0] command_args = command_args[1:] end_status = 'finished' # XXX We assume the model instance names a cluster. # Note, cluster_name is used in the except clause, # so it must be reliably defined. cluster_name = getattr(model_instance, 'name', None) try: assert cluster_name is not None model_json_data = model_instance.to_dict() # Set the initial status in the store. self.logger.info('Setting initial status.') self.logger.debug('Status={}'.format(model_json_data)) self.storage.save(model_instance) # Respond to the caller with the initial status. if message.properties.get('reply_to'): # XXX Have to dig up the message ID again. # CommissaireService.on_message() already # does this, but doesn't pass it to us. body = message.body if isinstance(body, bytes): body = json.loads(body.decode()) self.respond(message.properties['reply_to'], body.get('id', -1), model_json_data) except Exception as error: self.logger.error( 'Unable to save initial state for "{}" clusterexec due to ' '{}: {}'.format(cluster_name, type(error), error)) raise error # Collect all host addresses in the cluster. cluster = self.storage.get_cluster(cluster_name) n_hosts = len(cluster.hostset) if n_hosts: self.logger.debug('{} hosts in cluster "{}"'.format( n_hosts, cluster_name)) else: self.logger.warn('No hosts in cluster "{}"'.format(cluster_name)) for address in cluster.hostset: host = self.storage.get_host(address) oscmd = get_oscmd(host.os) # os_command is only used for logging os_command = getattr(oscmd, command_name)(*command_args) self.logger.info('Executing {} on {}...'.format( os_command, host.address)) model_instance.in_process.append(host.address) self.storage.save(model_instance) with TemporarySSHKey(host, self.logger) as key: try: transport = ansibleapi.Transport(host.remote_user) method = getattr(transport, command_name) method(host.address, key.path, oscmd, command_args) except Exception as error: # If there was a failure, set the end_status and break. end_status = C.HOST_STATUS_FAILED self.logger.error( 'Clusterexec {} for {} failed: {}: {}'.format( command_name, host.address, type(error), error)) break # Set the finished hosts. finished_hosts = getattr(model_instance, finished_hosts_key) finished_hosts.append(host.address) try: index = model_instance.in_process.index(host.address) model_instance.in_process.pop(index) except ValueError: self.logger.warn('Host {} was not in_process for {} {}'.format( host.address, command_name, cluster_name)) self.storage.save(model_instance) self.logger.info('Finished executing {} for {} in {}'.format( command_name, host.address, cluster_name)) # Final set of command result. model_instance.finished_at = formatted_dt() model_instance.status = end_status self.logger.info('Cluster {} final {} status: {}'.format( cluster_name, command_name, model_instance.to_json())) self.storage.save(model_instance)
def on_investigate(self, message, address, cluster_data={}): """ Initiates an investigation of the requested host. :param message: A message instance :type message: kombu.message.Message :param address: Host address to investigate :type address: str :param cluster_data: Optional data for the associated cluster :type cluster_data: dict """ # Statuses follow: # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses self.logger.info('{} is now in investigating.'.format(address)) self.logger.debug('Investigating: {}'.format(address)) if cluster_data: self.logger.debug('Related cluster: {}'.format(cluster_data)) host = self.storage.get_host(address) host_creds = self.storage.get(HostCreds.new(address=host.address)) transport = ansibleapi.Transport(host.remote_user) key = TemporarySSHKey(host_creds, self.logger) try: key.create() except Exception as error: self.logger.warn('Unable to continue for {} due to ' '{}: {}. Returning...'.format( address, type(error), error)) raise error try: facts = transport.get_info(address, key.path) # recreate the host instance with new data data = json.loads(host.to_json()) data.update(facts) host = Host.new(**data) host.last_check = formatted_dt() host.status = C.HOST_STATUS_BOOTSTRAPPING self.logger.info('Facts for {} retrieved'.format(address)) self.logger.debug('Data: {}'.format(host.to_json())) except Exception as error: self.logger.warn('Getting info failed for {}: {}'.format( address, str(error))) host.status = C.HOST_STATUS_FAILED key.remove() raise error finally: # Save the updated host model. self.storage.save(host) self.logger.info( 'Finished and stored investigation data for {}'.format(address)) self.logger.debug('Finished investigation update for {}: {}'.format( address, host.to_json())) self.logger.info('{} is now in bootstrapping'.format(address)) oscmd = get_oscmd(host.os) try: etcd_config = self._get_etcd_config() cluster, network = self._get_cluster_and_network_models( cluster_data) container_manager = None if cluster: if cluster.container_manager: container_manager = cluster.container_manager self.logger.info( 'Using cluster "{}" managed by "{}"'.format( cluster.name, container_manager)) else: self.logger.info('Using unmanaged cluster "{}"'.format( cluster.name)) self.logger.info('Using network "{}" of type "{}"'.format( network.name, network.type)) transport.bootstrap(address, key.path, oscmd, etcd_config, network) host.status = C.HOST_STATUS_DISASSOCIATED except Exception as error: self.logger.warn('Unable to start bootstraping for {}: {}'.format( address, str(error))) host.status = C.HOST_STATUS_FAILED key.remove() raise error finally: # Save the updated host model. self.storage.save(host) # Register with container manager (if applicable). try: if container_manager: self.request('container.register_node', container_manager, address) host.status = C.HOST_STATUS_ACTIVE except Exception as error: self.logger.warn( 'Unable to register {} to container manager "{}": {}'.format( address, container_manager, error.args[0])) key.remove() raise error finally: # Save the updated host model. self.storage.save(host) self.logger.info('Finished bootstrapping for {}'.format(address)) self.logger.debug('Finished bootstrapping for {}: {}'.format( address, host.to_json())) # XXX TEMPORARILY DISABLED # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow())) key.remove() return host.to_json()
def on_investigate(self, message, address, cluster_data={}): """ Initiates an investigation of the requested host. :param message: A message instance :type message: kombu.message.Message :param address: Host address to investigate :type address: str :param cluster_data: Optional data for the associated cluster :type cluster_data: dict """ # Statuses follow: # http://commissaire.readthedocs.org/en/latest/enums.html#host-statuses self.logger.info('{0} is now in investigating.'.format(address)) self.logger.debug('Investigating: {0}'.format(address)) if cluster_data: self.logger.debug('Related cluster: {0}'.format(cluster_data)) try: params = { 'model_type_name': 'Host', 'model_json_data': Host.new(address=address).to_json(), 'secure': True } response = self.request('storage.get', params=params) host = Host.new(**response['result']) except Exception as error: self.logger.warn( 'Unable to continue for {0} due to ' '{1}: {2}. Returning...'.format(address, type(error), error)) raise error transport = ansibleapi.Transport(host.remote_user) key = TemporarySSHKey(host, self.logger) try: key.create() except Exception as error: self.logger.warn( 'Unable to continue for {0} due to ' '{1}: {2}. Returning...'.format(address, type(error), error)) raise error try: facts = transport.get_info(address, key.path) # recreate the host instance with new data data = json.loads(host.to_json(secure=True)) data.update(facts) host = Host.new(**data) host.last_check = datetime.datetime.utcnow().isoformat() host.status = 'bootstrapping' self.logger.info('Facts for {0} retrieved'.format(address)) self.logger.debug('Data: {0}'.format(host.to_json())) except Exception as error: self.logger.warn('Getting info failed for {0}: {1}'.format( address, str(error))) host.status = 'failed' key.remove() raise error finally: # Save the updated host model. params = { 'model_type_name': host.__class__.__name__, 'model_json_data': host.to_json() } self.request('storage.save', params=params) self.logger.info( 'Finished and stored investigation data for {0}'.format(address)) self.logger.debug( 'Finished investigation update for {0}: {1}'.format( address, host.to_json())) self.logger.info('{0} is now in bootstrapping'.format(address)) oscmd = get_oscmd(host.os) try: etcd_config = self._get_etcd_config() cluster, network = self._get_cluster_and_network_models( cluster_data) self.logger.info( 'Using cluster "{0}" of type "{1}"'.format( cluster.name, cluster.type)) self.logger.info( 'Using network "{0}" of type "{1}"'.format( network.name, network.type)) transport.bootstrap( address, key.path, oscmd, etcd_config, cluster, network) host.status = 'inactive' except Exception as error: self.logger.warn( 'Unable to start bootstraping for {0}: {1}'.format( address, str(error))) host.status = 'disassociated' key.remove() raise error finally: # Save the updated host model. params = { 'model_type_name': host.__class__.__name__, 'model_json_data': host.to_json() } self.request('storage.save', params=params) # Verify association with relevant container managers params = { 'cluster_type': cluster.type, 'address': address } response = self.request('storage.node_registered', params=params) if response['result']: host.status = 'active' self.logger.info( 'Finished bootstrapping for {0}'.format(address)) self.logger.debug('Finished bootstrapping for {0}: {1}'.format( address, host.to_json())) # XXX TEMPORARILY DISABLED # WATCHER_QUEUE.put_nowait((host, datetime.datetime.utcnow())) key.remove() return host.to_json()