Ejemplo n.º 1
0
    def on_get(self, req, resp):
        """
        Handles GET requests for Hosts.

        :param req: Request instance that will be passed through.
        :type req: falcon.Request
        :param resp: Response instance that will be passed through.
        :type resp: falcon.Response
        """
        try:
            hosts_dir = self.store.get('/commissaire/hosts/')
            self.logger.debug('Etcd Response: {0}'.format(hosts_dir))
        except etcd.EtcdKeyNotFound:
            self.logger.warn(
                'Etcd does not have any hosts. Returning [] and 404.')
            resp.status = falcon.HTTP_404
            req.context['model'] = None
            return
        results = []
        # Don't let an empty host directory through
        if len(hosts_dir._children):
            for host in hosts_dir.leaves:
                results.append(Host(**json.loads(host.value)))
            resp.status = falcon.HTTP_200
            req.context['model'] = Hosts(hosts=results)
        else:
            self.logger.debug(
                'Etcd has a hosts directory but no content.')
            resp.status = falcon.HTTP_200
            req.context['model'] = None
    def test_watcher_failed_to_active(self):
        """
        Verify the watcher.
        """
        with mock.patch('commissaire.transport.ansibleapi.Transport') as _tp:

            _tp().check_host_availability.return_value = (0, {})

            q = Queue()

            test_host = make_new(HOST)
            test_host.last_check = (datetime.datetime.now() -
                                    datetime.timedelta(days=10)).isoformat()
            test_host.status = 'failed'

            test_cluster = make_new(CLUSTER)
            test_cluster.type = C.CLUSTER_TYPE_KUBERNETES
            test_cluster.hostset = [test_host.address]

            store_manager = MagicMock(StoreHandlerManager)
            store_manager.list.side_effect = (Hosts.new(
                hosts=[test_host]), Clusters.new(clusters=[test_cluster]))
            store_manager.get.return_value = test_host

            watcher(q, store_manager, run_once=True)

            self.assertEquals(2, store_manager.list.call_count)
            store_manager.save.assert_called_once()
            self.assertEquals('active', test_host.status)
Ejemplo n.º 3
0
    def _calculate_hosts(self, cluster):
        """
        Calculates the hosts metadata for the cluster.

        :param cluster: The name of the cluster.
        :type cluster: str
        """
        # XXX: Not sure which wil be more efficient: fetch all
        #      the host data in one etcd call and sort through
        #      them, or fetch the ones we need individually.
        #      For the MVP phase, fetch all is better.
        try:
            store_manager = cherrypy.engine.publish('get-store-manager')[0]
            hosts = store_manager.list(Hosts(hosts=[]))
        except:
            self.logger.warn(
                'Store does not have any hosts. '
                'Cannot determine cluster stats.')
            return

        available = unavailable = total = 0

        for host in hosts.hosts:
            if host.address in cluster.hostset:
                total += 1
                if host.status == 'active':
                    available += 1
                else:
                    unavailable += 1

        cluster.hosts['total'] = total
        cluster.hosts['available'] = available
        cluster.hosts['unavailable'] = unavailable
Ejemplo n.º 4
0
    def _calculate_hosts(self, cluster):
        """
        Calculates the hosts metadata for the cluster.

        :param cluster: The name of the cluster.
        :type cluster: str
        """
        # XXX: Not sure which wil be more efficient: fetch all
        #      the host data in one etcd call and sort through
        #      them, or fetch the ones we need individually.
        #      For the MVP phase, fetch all is better.
        # etcd_resp, error = cherrypy.engine.publish(
        #     'store-get', '/commissaire/hosts')[0]

        # if error:
        try:
            hosts = Hosts.retrieve()
        except:
            self.logger.warn(
                'Etcd does not have any hosts. '
                'Cannot determine cluster stats.')
            return
        available = unavailable = total = 0
        for host in hosts.hosts:
            if host.address in cluster.hostset:
                total += 1
                if host.status == 'active':
                    available += 1
                else:
                    unavailable += 1

        cluster.hosts['total'] = total
        cluster.hosts['available'] = available
        cluster.hosts['unavailable'] = unavailable
Ejemplo n.º 5
0
    def on_get(self, req, resp):
        """
        Handles GET requests for Hosts.

        :param req: Request instance that will be passed through.
        :type req: falcon.Request
        :param resp: Response instance that will be passed through.
        :type resp: falcon.Response
        """

        try:
            store_manager = cherrypy.engine.publish('get-store-manager')[0]
            hosts = store_manager.list(Hosts(hosts=[]))
            if len(hosts.hosts) == 0:
                raise Exception()
            resp.status = falcon.HTTP_200
            req.context['model'] = hosts
        except Exception:
            # This was originally a "no content" but I think a 404 makes
            # more sense if there are no hosts
            self.logger.warn(
                'Store does not have any hosts. Returning [] and 404.')
            resp.status = falcon.HTTP_404
            req.context['model'] = None
            return
Ejemplo n.º 6
0
 def test_hosts_listing_with_no_hosts(self):
     """
     Verify listing Hosts when no hosts exists.
     """
     with mock.patch('cherrypy.engine.publish') as _publish:
         _publish.return_value = Hosts(hosts=[])
         body = self.simulate_request('/api/v0/hosts')
         # datasource's get should have been called once
         self.assertEqual(self.srmock.status, falcon.HTTP_404)
         self.assertEqual({}, json.loads(body[0]))
    def _list_host(self, model_instance):
        """
        Lists data at a location in a store and returns back model instances.

        :param model_instance: Model instance to search for and list
        :type model_instance: commissaire.model.Model
        :returns: A list of models
        :rtype: list
        """
        hosts = []
        path = _model_mapper[model_instance.__class__.__name__]
        items = self._store.get(self._endpoint + path).json()
        for item in items.get('items'):
            try:
                hosts.append(self._format_model(item, Host.new(), True))
            except (TypeError, KeyError):
                # TODO: Add logging
                pass

        return Hosts.new(hosts=hosts)
Ejemplo n.º 8
0
    def _list_host(self, model_instance):
        """
        Lists data at a location in a store and returns back model instances.

        :param model_instance: Model instance to search for and list
        :type model_instance: commissaire.model.Model
        :returns: A list of models
        :rtype: list
        """
        hosts = []
        path = _model_mapper[model_instance.__class__.__name__]
        items = self._store.get(self._endpoint + path).json()
        for item in items.get('items'):
            try:
                hosts.append(self._format_model(item, Host.new(), True))
            except (TypeError, KeyError):
                # TODO: Add logging
                pass

        return Hosts.new(hosts=hosts)
    def test_watcher_without_a_cluster(self):
        """
        Verify the watcher without a cluster.
        """
        with mock.patch('commissaire.transport.ansibleapi.Transport') as _tp:

            _tp().check_host_availability.return_value = (0, {})

            q = Queue()

            test_host = make_new(HOST)
            test_host.last_check = (datetime.datetime.now() -
                                    datetime.timedelta(days=10)).isoformat()

            store_manager = MagicMock(StoreHandlerManager)
            store_manager.list.return_value = Hosts.new(hosts=[test_host])
            store_manager.get.return_value = test_host

            watcher(q, store_manager, run_once=True)

            store_manager.list.assert_called_once()
            store_manager.save.assert_called_once()
Ejemplo n.º 10
0
    def on_get(self, req, resp):
        """
        Handles GET requests for Hosts.

        :param req: Request instance that will be passed through.
        :type req: falcon.Request
        :param resp: Response instance that will be passed through.
        :type resp: falcon.Response
        """

        try:
            hosts = Hosts.retrieve()
            if len(hosts.hosts) == 0:
                raise Exception()
            resp.status = falcon.HTTP_200
            req.context["model"] = hosts
        except:
            # This was originally a "no content" but I think a 404 makes
            # more sense if there are no hosts
            self.logger.warn("Etcd does not have any hosts. Returning [] and 404.")
            resp.status = falcon.HTTP_404
            req.context["model"] = None
            return
Ejemplo n.º 11
0
def watcher(queue, store_manager, run_once=False):
    """
    Attempts to connect and check hosts for status.

    :param queue: Queue to pull work from.
    :type queue: Queue.Queue
    :param store_manager: Proxy object for remtote stores
    :type store_manager: commissaire.store.StoreHandlerManager
    :param run_once: If only one run should occur.
    :type run_once: bool
    """
    logger = logging.getLogger('watcher')
    logger.info('Watcher started')
    # TODO: should be configurable
    delta = datetime.timedelta(seconds=20)
    # TODO: should be configurable
    throttle = 60  # 1 minute

    # If the queue is empty attempt to populated it with known hosts
    if queue.qsize() == 0:
        logger.info('The WATCHER_QUEUE is empty. '
                    'Attempting to populate it from the store.')
        try:
            hosts = store_manager.list(Hosts(hosts=[]))
            for host in hosts.hosts:
                last_check = datetime.datetime.strptime(
                    host.last_check, "%Y-%m-%dT%H:%M:%S.%f")
                queue.put_nowait((host, last_check))
                logger.debug('Inserted {0} into WATCHER_QUEUE'.format(
                    host.address))
        except:
            logger.info('No hosts found in the store.')

    while True:
        try:
            host, last_run = queue.get_nowait()
        except Empty:
            time.sleep(throttle)
            continue

        logger.debug('Retrieved {0} from queue. Last check was {1}'.format(
            host.address, last_run))
        now = datetime.datetime.utcnow()
        if last_run > now - delta:
            logger.debug('{0} not ready to check. {1}'.format(
                host.address, last_run))
            # Requeue the host with the same last_run
            queue.put_nowait((host, last_run))
        else:
            logger.info('Checking {0} for availability'.format(
                host.address))
            transport = ansibleapi.Transport(host.remote_user)
            with TemporarySSHKey(host, logger) as key:
                results = transport.check_host_availability(host, key.path)
                host.last_check = now.isoformat()
                if results[0] == 0:  # This means the host is available
                    # Only flip the bit on failed only
                    if host.status == 'failed':
                        try:
                            cluster_type = util.cluster_for_host(
                                host.address, store_manager).type
                        except Exception:
                            logger.debug(
                                '{0} has no cluster type. Assuming {1}'.format(
                                    host.address, C.CLUSTER_TYPE_HOST))
                            cluster_type = C.CLUSTER_TYPE_HOST
                        # If the type is CLUSTER_TYPE_HOST then it should be
                        if cluster_type == C.CLUSTER_TYPE_HOST:
                            host.status = 'disassociated'
                        else:
                            host.status = 'active'
                else:
                    # If we can not access the host at all throw it to failed
                    host.status = 'failed'
                host.last_check = now.isoformat()
                host = store_manager.save(host)
                # Requeue the host
                queue.put_nowait((host, now))
                logger.debug('{0} has been requeued for next check run'.format(
                    host.address))

        if run_once:
            logger.info('Exiting watcher due to run_once request.')
            break

        logger.debug('Sleeping for {0} seconds.'.format(throttle))
        time.sleep(throttle)

    logger.info('Watcher stopping')
Ejemplo n.º 12
0
 def test_hosts_defaults_values(self):
     """
     Verify Hosts model fills default values when missing.
     """
     hosts = Hosts.new()
     self.assertEquals(Hosts._attribute_defaults['hosts'], hosts.hosts)
Ejemplo n.º 13
0
                              ' "cpus": 0, "memory": 0, "space": 0,'
                              ' "last_check": ""}')
#: Credential JSON for tests
HOST_CREDS_JSON = '{"remote_user": "******", "ssh_priv_key": "dGVzdAo="}'
#: HostStatus JSON for tests
HOST_STATUS_JSON = (
    '{"type": "host_only", "container_manager": {}, "commissaire": '
    '{"status": "available", "last_check": "2016-07-29T20:39:50.529454"}}')
#: Host model for most tests
HOST = Host.new(ssh_priv_key='dGVzdAo=',
                remote_user='******',
                **json.loads(HOST_JSON))
#: HostStatus model for most tests
HOST_STATUS = HostStatus.new(**json.loads(HOST_STATUS_JSON))
#: Hosts model for most tests
HOSTS = Hosts.new(hosts=[HOST])
#: Cluster model for most tests
CLUSTER = Cluster.new(
    name='cluster',
    status='ok',
    hostset=[],
)
#: Cluster model with HOST for most tests
CLUSTER_WITH_HOST = Cluster.new(
    name='cluster',
    status='ok',
    hostset=[HOST],
)
#: Cluster model with flattened HOST for tests
CLUSTER_WITH_FLAT_HOST = Cluster.new(
    name='cluster',
 def test_hosts_defaults_values(self):
     """
     Verify Hosts model fills default values when missing.
     """
     hosts = Hosts.new()
     self.assertEquals(Hosts._attribute_defaults['hosts'], hosts.hosts)
Ejemplo n.º 15
0
def clusterexec(store_manager, cluster_name, command, kwargs={}):
    """
    Remote executes a shell commands across a cluster.

    :param store_manager: Proxy object for remtote stores
    :type store_manager: commissaire.store.StoreHandlerManager
    :param cluster_name: Name of the cluster to act on
    :type cluster_name: str
    :param command: Top-level command to execute
    :type command: str
    :param kwargs: Keyword arguments for the command
    :type kwargs: dict
    """
    logger = logging.getLogger('clusterexec')

    # TODO: This is a hack and should really be done elsewhere
    command_args = ()
    if command == 'upgrade':
        finished_hosts_key = 'upgraded'
        model_instance = ClusterUpgrade.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            upgraded=[],
            in_process=[],
        )
    elif command == 'restart':
        finished_hosts_key = 'restarted'
        model_instance = ClusterRestart.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            restarted=[],
            in_process=[],
        )
    elif command == 'deploy':
        finished_hosts_key = 'deployed'
        version = kwargs.get('version', '')
        command_args = (version,)
        model_instance = ClusterDeploy.new(
            name=cluster_name,
            status='in_process',
            started_at=datetime.datetime.utcnow().isoformat(),
            version=version,
            deployed=[],
            in_process=[],
        )

    end_status = 'finished'

    try:
        # Set the initial status in the store
        logger.info('Setting initial status.')
        logger.debug('Status={0}'.format(model_instance.to_json()))
        store_manager.save(model_instance)
    except Exception as error:
        logger.error(
            'Unable to save initial state for "{0}" clusterexec due to '
            '{1}: {2}'.format(cluster_name, type(error), error))
        return

    # Collect all host addresses in the cluster
    try:
        cluster = store_manager.get(Cluster.new(
            name=cluster_name, status='', hostset=[]))
    except Exception as error:
        logger.warn(
            'Unable to continue for cluster "{0}" due to '
            '{1}: {2}. Returning...'.format(cluster_name, type(error), error))
        return

    if cluster.hostset:
        logger.debug(
            '{0} hosts in cluster "{1}"'.format(
                len(cluster.hostset), cluster_name))
    else:
        logger.warn('No hosts in cluster "{0}"'.format(cluster_name))

    # TODO: Find better way to do this
    try:
        hosts = store_manager.list(Hosts(hosts=[]))
    except Exception as error:
        logger.warn(
            'No hosts in the cluster. Error: {0}. Exiting clusterexec'.format(
                error))
        return

    for host in hosts.hosts:
        if host.address not in cluster.hostset:
            logger.debug(
                'Skipping {0} as it is not in this cluster.'.format(
                    host.address))
            continue  # Move on to the next one
        oscmd = get_oscmd(host.os)

        # command_list is only used for logging
        command_list = getattr(oscmd, command)(*command_args)
        logger.info('Executing {0} on {1}...'.format(
            command_list, host.address))

        model_instance.in_process.append(host.address)
        try:
            store_manager.save(model_instance)
        except Exception as error:
            logger.error(
                'Unable to save in_process state for "{0}" clusterexec due to '
                '{1}: {2}'.format(cluster_name, type(error), error))
            return

        key = TemporarySSHKey(host, logger)
        key.create()

        try:
            transport = ansibleapi.Transport(host.remote_user)
            exe = getattr(transport, command)
            result, facts = exe(
                host.address, key.path, oscmd, kwargs)
        # XXX: ansibleapi explicitly raises Exception()
        except Exception as ex:
            # If there was a failure set the end_status and break out
            end_status = 'failed'
            logger.error('Clusterexec {0} for {1} failed: {2}: {3}'.format(
                command, host.address, type(ex), ex))
            break
        finally:
            try:
                key.remove()
                logger.debug('Removed temporary key file {0}'.format(key.path))
            except:
                logger.warn(
                    'Unable to remove the temporary key file: {0}'.format(
                        key.path))

        # Set the finished hosts
        new_finished_hosts = getattr(
            model_instance, finished_hosts_key) + [host.address]
        setattr(
            model_instance,
            finished_hosts_key,
            new_finished_hosts)
        try:
            idx = model_instance.in_process.index(host.address)
            model_instance.in_process.pop(idx)
        except ValueError:
            logger.warn('Host {0} was not in_process for {1} {2}'.format(
                host['address'], command, cluster_name))
        try:
            store_manager.save(model_instance)
            logger.info('Finished executing {0} for {1} in {2}'.format(
                command, host.address, cluster_name))
        except Exception as error:
            logger.error(
                'Unable to save cluster state for "{0}" clusterexec due to '
                '{1}: {2}'.format(cluster_name, type(error), error))
            return

    # Final set of command result
    model_instance.finished_at = datetime.datetime.utcnow().isoformat()
    model_instance.status = end_status

    logger.info('Cluster {0} final {1} status: {2}'.format(
        cluster_name, command, model_instance.to_json()))

    try:
        store_manager.save(model_instance)
    except Exception as error:
        logger.error(
            'Unable to save final state for "{0}" clusterexec due to '
            '{1}: {2}'.format(cluster_name, type(error), error))

    logger.info('Clusterexec stopping')
Ejemplo n.º 16
0
#: Response JSON for a single host
HOST_JSON = (
    '{"address": "10.2.0.2",'
    ' "status": "available", "os": "atomic",'
    ' "cpus": 2, "memory": 11989228, "space": 487652,'
    ' "last_check": "2015-12-17T15:48:18.710454"}')
#: Credential JSON for tests
HOST_CREDS_JSON = '{"remote_user": "******", "ssh_priv_key": "dGVzdAo="}'
#: Host model for most tests
HOST = Host.new(
    ssh_priv_key='dGVzdAo=',
    remote_user='******',
    **json.loads(HOST_JSON))
#: Hosts model for most tests
HOSTS = Hosts.new(
    hosts=[HOST]
)
#: Cluster model for most tests
CLUSTER = Cluster.new(
    name='cluster',
    status='ok',
    hostset=[],
)
#: Cluster model with HOST for most tests
CLUSTER_WITH_HOST = Cluster.new(
    name='cluster',
    status='ok',
    hostset=[HOST],
)
#: Cluster model with flattened HOST for tests
CLUSTER_WITH_FLAT_HOST = Cluster.new(