예제 #1
0
def get_service_records():
    sr_list = list()
    for service_record in ServiceRegistry.all():
        sr_list.append({
            'name':
            service_record._name,
            'type':
            service_record._type,
            'address':
            service_record._address,
            'management_port':
            service_record._management_port,
            'service_port':
            service_record._port,
            'protocol':
            service_record._protocol,
            'status':
            ServiceRecord.Status(int(service_record._status)).name.lower()
        })
    recs = {'services': sr_list}
    return recs
예제 #2
0
    async def get_service(cls, request):
        """ Returns a list of all services or of the selected service

        :Example: curl -X GET  http://localhost:8082/foglamp/service
        :Example: curl -X GET  http://localhost:8082/foglamp/service?name=X&type=Storage
        """
        service_name = request.query[
            'name'] if 'name' in request.query else None
        service_type = request.query[
            'type'] if 'type' in request.query else None

        try:
            if not service_name and not service_type:
                services_list = ServiceRegistry.all()
            elif service_name and not service_type:
                services_list = ServiceRegistry.get(name=service_name)
            elif not service_name and service_type:
                services_list = ServiceRegistry.get(s_type=service_type)
            else:
                services_list = ServiceRegistry.filter_by_name_and_type(
                    name=service_name, s_type=service_type)
        except service_registry_exceptions.DoesNotExist as ex:
            raise web.HTTPBadRequest(
                reason="Invalid service name and/or type provided" + str(ex))

        services = []
        for service in services_list:
            svc = dict()
            svc["id"] = service._id
            svc["name"] = service._name
            svc["type"] = service._type
            svc["address"] = service._address
            svc["management_port"] = service._management_port
            svc["protocol"] = service._protocol
            svc["status"] = service._status
            if service._port:
                svc["service_port"] = service._port
            services.append(svc)

        return web.json_response({"services": services})
예제 #3
0
    async def _monitor_loop(self):
        """async Monitor loop to monitor registered services"""
        # check health of all micro-services every N seconds
        round_cnt = 0
        check_count = {}  # dict to hold current count of current status.
        # In case of ok and running status, count will always be 1.
        # In case of of non running statuses, count shows since when this status is set.
        while True:
            round_cnt += 1
            self._logger.debug(
                "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}"
                .format(round_cnt, self._sleep_interval, self._ping_timeout,
                        self._max_attempts))
            for service_record in ServiceRegistry.all():
                if service_record._id not in check_count:
                    check_count.update({service_record._id: 1})

                # Try ping if service status is either running or doubtful (i.e. give service a chance to recover)
                if service_record._status not in [
                        ServiceRecord.Status.Running,
                        ServiceRecord.Status.Unresponsive,
                        ServiceRecord.Status.Failed
                ]:
                    continue

                self._logger.debug("Service: {} Status: {}".format(
                    service_record._name, service_record._status))

                if service_record._status == ServiceRecord.Status.Failed:
                    if self._restart_failed == "auto":
                        if service_record._id not in self.restarted_services:
                            self.restarted_services.append(service_record._id)
                            asyncio.ensure_future(
                                self.restart_service(service_record))
                    continue

                try:
                    url = "{}://{}:{}/foglamp/service/ping".format(
                        service_record._protocol, service_record._address,
                        service_record._management_port)
                    async with aiohttp.ClientSession() as session:
                        async with session.get(
                                url, timeout=self._ping_timeout) as resp:
                            text = await resp.text()
                            res = json.loads(text)
                            if res["uptime"] is None:
                                raise ValueError('res.uptime is None')
                except (asyncio.TimeoutError,
                        aiohttp.client_exceptions.ServerTimeoutError) as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("ServerTimeoutError: %s, %s", str(ex),
                                      service_record.__repr__())
                except aiohttp.client_exceptions.ClientConnectorError as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("ClientConnectorError: %s, %s", str(ex),
                                      service_record.__repr__())
                except ValueError as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Invalid response: %s, %s", str(ex),
                                      service_record.__repr__())
                except Exception as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Exception occurred: %s, %s", str(ex),
                                      service_record.__repr__())
                else:
                    service_record._status = ServiceRecord.Status.Running
                    check_count[service_record._id] = 1

                if check_count[service_record._id] > self._max_attempts:
                    ServiceRegistry.mark_as_failed(service_record._id)
                    check_count[service_record._id] = 0
                    try:
                        audit = AuditLogger(connect.get_storage_async())
                        await audit.failure('SRVFL',
                                            {'name': service_record._name})
                    except Exception as ex:
                        self._logger.info("Failed to audit service failure %s",
                                          str(ex))
            await self._sleep(self._sleep_interval)
예제 #4
0
    async def _monitor_loop(self):
        """async Monitor loop to monitor registered services"""
        # check health of all micro-services every N seconds
        round_cnt = 0
        check_count = {}  # dict to hold current count of current status.
        # In case of ok and running status, count will always be 1.
        # In case of of non running statuses, count shows since when this status is set.
        while True:
            round_cnt += 1
            self._logger.debug(
                "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}"
                .format(round_cnt, self._sleep_interval, self._ping_timeout,
                        self._max_attempts))
            for service_record in ServiceRegistry.all():
                if service_record._id not in check_count:
                    check_count.update({service_record._id: 1})
                # Try ping if service status is either running or doubtful (i.e. give service a chance to recover)
                if service_record._status not in [
                        ServiceRecord.Status.Running,
                        ServiceRecord.Status.Unresponsive
                ]:
                    continue
                try:
                    url = "{}://{}:{}/foglamp/service/ping".format(
                        service_record._protocol, service_record._address,
                        service_record._management_port)
                    async with aiohttp.ClientSession() as session:
                        async with session.get(
                                url, timeout=self._ping_timeout) as resp:
                            text = await resp.text()
                            res = json.loads(text)
                            if res["uptime"] is None:
                                raise ValueError('Improper Response')
                except ValueError:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Marked as doubtful micro-service %s",
                                      service_record.__repr__())
                except Exception as ex:  # TODO: Fix too broad exception clause
                    # Fixme: Investigate as why no exception message can appear, e.g. Apr 16 15:32:08 nerd51-ThinkPad
                    # FogLAMP[423] INFO: monitor: foglamp.services.core.service_registry.monitor: Exception occurred
                    # during monitoring:

                    if "" != str(ex).strip(
                    ):  # i.e. if a genuine exception occurred
                        self._logger.info(
                            "Exception occurred during monitoring: %s",
                            str(ex))
                        service_record._status = ServiceRecord.Status.Unresponsive
                        check_count[service_record._id] += 1
                        self._logger.info(
                            "Marked as unresponsive micro-service %s",
                            service_record.__repr__())
                else:
                    service_record._status = ServiceRecord.Status.Running
                    check_count[service_record._id] = 1

                if check_count[service_record._id] > self._max_attempts:
                    ServiceRegistry.mark_as_failed(service_record._id)
                    check_count[service_record._id] = 0
                    try:
                        audit = AuditLogger(connect.get_storage())
                        await audit.failure('SRVFL',
                                            {'name': service_record._name})
                    except Exception as ex:
                        self._logger.info("Failed to audit service failure %s",
                                          str(ex))
            await self._sleep(self._sleep_interval)