def on_get(self, req, resp): """ Handles GET requests for Hosts. :param req: Request instance that will be passed through. :type req: falcon.Request :param resp: Response instance that will be passed through. :type resp: falcon.Response """ try: hosts_dir = self.store.get('/commissaire/hosts/') self.logger.debug('Etcd Response: {0}'.format(hosts_dir)) except etcd.EtcdKeyNotFound: self.logger.warn( 'Etcd does not have any hosts. Returning [] and 404.') resp.status = falcon.HTTP_404 req.context['model'] = None return results = [] # Don't let an empty host directory through if len(hosts_dir._children): for host in hosts_dir.leaves: results.append(Host(**json.loads(host.value))) resp.status = falcon.HTTP_200 req.context['model'] = Hosts(hosts=results) else: self.logger.debug( 'Etcd has a hosts directory but no content.') resp.status = falcon.HTTP_200 req.context['model'] = None
def test_watcher_failed_to_active(self): """ Verify the watcher. """ with mock.patch('commissaire.transport.ansibleapi.Transport') as _tp: _tp().check_host_availability.return_value = (0, {}) q = Queue() test_host = make_new(HOST) test_host.last_check = (datetime.datetime.now() - datetime.timedelta(days=10)).isoformat() test_host.status = 'failed' test_cluster = make_new(CLUSTER) test_cluster.type = C.CLUSTER_TYPE_KUBERNETES test_cluster.hostset = [test_host.address] store_manager = MagicMock(StoreHandlerManager) store_manager.list.side_effect = (Hosts.new( hosts=[test_host]), Clusters.new(clusters=[test_cluster])) store_manager.get.return_value = test_host watcher(q, store_manager, run_once=True) self.assertEquals(2, store_manager.list.call_count) store_manager.save.assert_called_once() self.assertEquals('active', test_host.status)
def _calculate_hosts(self, cluster): """ Calculates the hosts metadata for the cluster. :param cluster: The name of the cluster. :type cluster: str """ # XXX: Not sure which wil be more efficient: fetch all # the host data in one etcd call and sort through # them, or fetch the ones we need individually. # For the MVP phase, fetch all is better. try: store_manager = cherrypy.engine.publish('get-store-manager')[0] hosts = store_manager.list(Hosts(hosts=[])) except: self.logger.warn( 'Store does not have any hosts. ' 'Cannot determine cluster stats.') return available = unavailable = total = 0 for host in hosts.hosts: if host.address in cluster.hostset: total += 1 if host.status == 'active': available += 1 else: unavailable += 1 cluster.hosts['total'] = total cluster.hosts['available'] = available cluster.hosts['unavailable'] = unavailable
def _calculate_hosts(self, cluster): """ Calculates the hosts metadata for the cluster. :param cluster: The name of the cluster. :type cluster: str """ # XXX: Not sure which wil be more efficient: fetch all # the host data in one etcd call and sort through # them, or fetch the ones we need individually. # For the MVP phase, fetch all is better. # etcd_resp, error = cherrypy.engine.publish( # 'store-get', '/commissaire/hosts')[0] # if error: try: hosts = Hosts.retrieve() except: self.logger.warn( 'Etcd does not have any hosts. ' 'Cannot determine cluster stats.') return available = unavailable = total = 0 for host in hosts.hosts: if host.address in cluster.hostset: total += 1 if host.status == 'active': available += 1 else: unavailable += 1 cluster.hosts['total'] = total cluster.hosts['available'] = available cluster.hosts['unavailable'] = unavailable
def on_get(self, req, resp): """ Handles GET requests for Hosts. :param req: Request instance that will be passed through. :type req: falcon.Request :param resp: Response instance that will be passed through. :type resp: falcon.Response """ try: store_manager = cherrypy.engine.publish('get-store-manager')[0] hosts = store_manager.list(Hosts(hosts=[])) if len(hosts.hosts) == 0: raise Exception() resp.status = falcon.HTTP_200 req.context['model'] = hosts except Exception: # This was originally a "no content" but I think a 404 makes # more sense if there are no hosts self.logger.warn( 'Store does not have any hosts. Returning [] and 404.') resp.status = falcon.HTTP_404 req.context['model'] = None return
def test_hosts_listing_with_no_hosts(self): """ Verify listing Hosts when no hosts exists. """ with mock.patch('cherrypy.engine.publish') as _publish: _publish.return_value = Hosts(hosts=[]) body = self.simulate_request('/api/v0/hosts') # datasource's get should have been called once self.assertEqual(self.srmock.status, falcon.HTTP_404) self.assertEqual({}, json.loads(body[0]))
def _list_host(self, model_instance): """ Lists data at a location in a store and returns back model instances. :param model_instance: Model instance to search for and list :type model_instance: commissaire.model.Model :returns: A list of models :rtype: list """ hosts = [] path = _model_mapper[model_instance.__class__.__name__] items = self._store.get(self._endpoint + path).json() for item in items.get('items'): try: hosts.append(self._format_model(item, Host.new(), True)) except (TypeError, KeyError): # TODO: Add logging pass return Hosts.new(hosts=hosts)
def test_watcher_without_a_cluster(self): """ Verify the watcher without a cluster. """ with mock.patch('commissaire.transport.ansibleapi.Transport') as _tp: _tp().check_host_availability.return_value = (0, {}) q = Queue() test_host = make_new(HOST) test_host.last_check = (datetime.datetime.now() - datetime.timedelta(days=10)).isoformat() store_manager = MagicMock(StoreHandlerManager) store_manager.list.return_value = Hosts.new(hosts=[test_host]) store_manager.get.return_value = test_host watcher(q, store_manager, run_once=True) store_manager.list.assert_called_once() store_manager.save.assert_called_once()
def on_get(self, req, resp): """ Handles GET requests for Hosts. :param req: Request instance that will be passed through. :type req: falcon.Request :param resp: Response instance that will be passed through. :type resp: falcon.Response """ try: hosts = Hosts.retrieve() if len(hosts.hosts) == 0: raise Exception() resp.status = falcon.HTTP_200 req.context["model"] = hosts except: # This was originally a "no content" but I think a 404 makes # more sense if there are no hosts self.logger.warn("Etcd does not have any hosts. Returning [] and 404.") resp.status = falcon.HTTP_404 req.context["model"] = None return
def watcher(queue, store_manager, run_once=False): """ Attempts to connect and check hosts for status. :param queue: Queue to pull work from. :type queue: Queue.Queue :param store_manager: Proxy object for remtote stores :type store_manager: commissaire.store.StoreHandlerManager :param run_once: If only one run should occur. :type run_once: bool """ logger = logging.getLogger('watcher') logger.info('Watcher started') # TODO: should be configurable delta = datetime.timedelta(seconds=20) # TODO: should be configurable throttle = 60 # 1 minute # If the queue is empty attempt to populated it with known hosts if queue.qsize() == 0: logger.info('The WATCHER_QUEUE is empty. ' 'Attempting to populate it from the store.') try: hosts = store_manager.list(Hosts(hosts=[])) for host in hosts.hosts: last_check = datetime.datetime.strptime( host.last_check, "%Y-%m-%dT%H:%M:%S.%f") queue.put_nowait((host, last_check)) logger.debug('Inserted {0} into WATCHER_QUEUE'.format( host.address)) except: logger.info('No hosts found in the store.') while True: try: host, last_run = queue.get_nowait() except Empty: time.sleep(throttle) continue logger.debug('Retrieved {0} from queue. Last check was {1}'.format( host.address, last_run)) now = datetime.datetime.utcnow() if last_run > now - delta: logger.debug('{0} not ready to check. {1}'.format( host.address, last_run)) # Requeue the host with the same last_run queue.put_nowait((host, last_run)) else: logger.info('Checking {0} for availability'.format( host.address)) transport = ansibleapi.Transport(host.remote_user) with TemporarySSHKey(host, logger) as key: results = transport.check_host_availability(host, key.path) host.last_check = now.isoformat() if results[0] == 0: # This means the host is available # Only flip the bit on failed only if host.status == 'failed': try: cluster_type = util.cluster_for_host( host.address, store_manager).type except Exception: logger.debug( '{0} has no cluster type. Assuming {1}'.format( host.address, C.CLUSTER_TYPE_HOST)) cluster_type = C.CLUSTER_TYPE_HOST # If the type is CLUSTER_TYPE_HOST then it should be if cluster_type == C.CLUSTER_TYPE_HOST: host.status = 'disassociated' else: host.status = 'active' else: # If we can not access the host at all throw it to failed host.status = 'failed' host.last_check = now.isoformat() host = store_manager.save(host) # Requeue the host queue.put_nowait((host, now)) logger.debug('{0} has been requeued for next check run'.format( host.address)) if run_once: logger.info('Exiting watcher due to run_once request.') break logger.debug('Sleeping for {0} seconds.'.format(throttle)) time.sleep(throttle) logger.info('Watcher stopping')
def test_hosts_defaults_values(self): """ Verify Hosts model fills default values when missing. """ hosts = Hosts.new() self.assertEquals(Hosts._attribute_defaults['hosts'], hosts.hosts)
' "cpus": 0, "memory": 0, "space": 0,' ' "last_check": ""}') #: Credential JSON for tests HOST_CREDS_JSON = '{"remote_user": "******", "ssh_priv_key": "dGVzdAo="}' #: HostStatus JSON for tests HOST_STATUS_JSON = ( '{"type": "host_only", "container_manager": {}, "commissaire": ' '{"status": "available", "last_check": "2016-07-29T20:39:50.529454"}}') #: Host model for most tests HOST = Host.new(ssh_priv_key='dGVzdAo=', remote_user='******', **json.loads(HOST_JSON)) #: HostStatus model for most tests HOST_STATUS = HostStatus.new(**json.loads(HOST_STATUS_JSON)) #: Hosts model for most tests HOSTS = Hosts.new(hosts=[HOST]) #: Cluster model for most tests CLUSTER = Cluster.new( name='cluster', status='ok', hostset=[], ) #: Cluster model with HOST for most tests CLUSTER_WITH_HOST = Cluster.new( name='cluster', status='ok', hostset=[HOST], ) #: Cluster model with flattened HOST for tests CLUSTER_WITH_FLAT_HOST = Cluster.new( name='cluster',
def clusterexec(store_manager, cluster_name, command, kwargs={}): """ Remote executes a shell commands across a cluster. :param store_manager: Proxy object for remtote stores :type store_manager: commissaire.store.StoreHandlerManager :param cluster_name: Name of the cluster to act on :type cluster_name: str :param command: Top-level command to execute :type command: str :param kwargs: Keyword arguments for the command :type kwargs: dict """ logger = logging.getLogger('clusterexec') # TODO: This is a hack and should really be done elsewhere command_args = () if command == 'upgrade': finished_hosts_key = 'upgraded' model_instance = ClusterUpgrade.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), upgraded=[], in_process=[], ) elif command == 'restart': finished_hosts_key = 'restarted' model_instance = ClusterRestart.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), restarted=[], in_process=[], ) elif command == 'deploy': finished_hosts_key = 'deployed' version = kwargs.get('version', '') command_args = (version,) model_instance = ClusterDeploy.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), version=version, deployed=[], in_process=[], ) end_status = 'finished' try: # Set the initial status in the store logger.info('Setting initial status.') logger.debug('Status={0}'.format(model_instance.to_json())) store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save initial state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return # Collect all host addresses in the cluster try: cluster = store_manager.get(Cluster.new( name=cluster_name, status='', hostset=[])) except Exception as error: logger.warn( 'Unable to continue for cluster "{0}" due to ' '{1}: {2}. Returning...'.format(cluster_name, type(error), error)) return if cluster.hostset: logger.debug( '{0} hosts in cluster "{1}"'.format( len(cluster.hostset), cluster_name)) else: logger.warn('No hosts in cluster "{0}"'.format(cluster_name)) # TODO: Find better way to do this try: hosts = store_manager.list(Hosts(hosts=[])) except Exception as error: logger.warn( 'No hosts in the cluster. Error: {0}. Exiting clusterexec'.format( error)) return for host in hosts.hosts: if host.address not in cluster.hostset: logger.debug( 'Skipping {0} as it is not in this cluster.'.format( host.address)) continue # Move on to the next one oscmd = get_oscmd(host.os) # command_list is only used for logging command_list = getattr(oscmd, command)(*command_args) logger.info('Executing {0} on {1}...'.format( command_list, host.address)) model_instance.in_process.append(host.address) try: store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save in_process state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return key = TemporarySSHKey(host, logger) key.create() try: transport = ansibleapi.Transport(host.remote_user) exe = getattr(transport, command) result, facts = exe( host.address, key.path, oscmd, kwargs) # XXX: ansibleapi explicitly raises Exception() except Exception as ex: # If there was a failure set the end_status and break out end_status = 'failed' logger.error('Clusterexec {0} for {1} failed: {2}: {3}'.format( command, host.address, type(ex), ex)) break finally: try: key.remove() logger.debug('Removed temporary key file {0}'.format(key.path)) except: logger.warn( 'Unable to remove the temporary key file: {0}'.format( key.path)) # Set the finished hosts new_finished_hosts = getattr( model_instance, finished_hosts_key) + [host.address] setattr( model_instance, finished_hosts_key, new_finished_hosts) try: idx = model_instance.in_process.index(host.address) model_instance.in_process.pop(idx) except ValueError: logger.warn('Host {0} was not in_process for {1} {2}'.format( host['address'], command, cluster_name)) try: store_manager.save(model_instance) logger.info('Finished executing {0} for {1} in {2}'.format( command, host.address, cluster_name)) except Exception as error: logger.error( 'Unable to save cluster state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return # Final set of command result model_instance.finished_at = datetime.datetime.utcnow().isoformat() model_instance.status = end_status logger.info('Cluster {0} final {1} status: {2}'.format( cluster_name, command, model_instance.to_json())) try: store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save final state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) logger.info('Clusterexec stopping')
#: Response JSON for a single host HOST_JSON = ( '{"address": "10.2.0.2",' ' "status": "available", "os": "atomic",' ' "cpus": 2, "memory": 11989228, "space": 487652,' ' "last_check": "2015-12-17T15:48:18.710454"}') #: Credential JSON for tests HOST_CREDS_JSON = '{"remote_user": "******", "ssh_priv_key": "dGVzdAo="}' #: Host model for most tests HOST = Host.new( ssh_priv_key='dGVzdAo=', remote_user='******', **json.loads(HOST_JSON)) #: Hosts model for most tests HOSTS = Hosts.new( hosts=[HOST] ) #: Cluster model for most tests CLUSTER = Cluster.new( name='cluster', status='ok', hostset=[], ) #: Cluster model with HOST for most tests CLUSTER_WITH_HOST = Cluster.new( name='cluster', status='ok', hostset=[HOST], ) #: Cluster model with flattened HOST for tests CLUSTER_WITH_FLAT_HOST = Cluster.new(