def get_service_records(): sr_list = list() for service_record in ServiceRegistry.all(): sr_list.append({ 'name': service_record._name, 'type': service_record._type, 'address': service_record._address, 'management_port': service_record._management_port, 'service_port': service_record._port, 'protocol': service_record._protocol, 'status': ServiceRecord.Status(int(service_record._status)).name.lower() }) recs = {'services': sr_list} return recs
async def get_service(cls, request): """ Returns a list of all services or of the selected service :Example: curl -X GET http://localhost:8082/foglamp/service :Example: curl -X GET http://localhost:8082/foglamp/service?name=X&type=Storage """ service_name = request.query[ 'name'] if 'name' in request.query else None service_type = request.query[ 'type'] if 'type' in request.query else None try: if not service_name and not service_type: services_list = ServiceRegistry.all() elif service_name and not service_type: services_list = ServiceRegistry.get(name=service_name) elif not service_name and service_type: services_list = ServiceRegistry.get(s_type=service_type) else: services_list = ServiceRegistry.filter_by_name_and_type( name=service_name, s_type=service_type) except service_registry_exceptions.DoesNotExist as ex: raise web.HTTPBadRequest( reason="Invalid service name and/or type provided" + str(ex)) services = [] for service in services_list: svc = dict() svc["id"] = service._id svc["name"] = service._name svc["type"] = service._type svc["address"] = service._address svc["management_port"] = service._management_port svc["protocol"] = service._protocol svc["status"] = service._status if service._port: svc["service_port"] = service._port services.append(svc) return web.json_response({"services": services})
async def _monitor_loop(self): """async Monitor loop to monitor registered services""" # check health of all micro-services every N seconds round_cnt = 0 check_count = {} # dict to hold current count of current status. # In case of ok and running status, count will always be 1. # In case of of non running statuses, count shows since when this status is set. while True: round_cnt += 1 self._logger.debug( "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}" .format(round_cnt, self._sleep_interval, self._ping_timeout, self._max_attempts)) for service_record in ServiceRegistry.all(): if service_record._id not in check_count: check_count.update({service_record._id: 1}) # Try ping if service status is either running or doubtful (i.e. give service a chance to recover) if service_record._status not in [ ServiceRecord.Status.Running, ServiceRecord.Status.Unresponsive, ServiceRecord.Status.Failed ]: continue self._logger.debug("Service: {} Status: {}".format( service_record._name, service_record._status)) if service_record._status == ServiceRecord.Status.Failed: if self._restart_failed == "auto": if service_record._id not in self.restarted_services: self.restarted_services.append(service_record._id) asyncio.ensure_future( self.restart_service(service_record)) continue try: url = "{}://{}:{}/foglamp/service/ping".format( service_record._protocol, service_record._address, service_record._management_port) async with aiohttp.ClientSession() as session: async with session.get( url, timeout=self._ping_timeout) as resp: text = await resp.text() res = json.loads(text) if res["uptime"] is None: raise ValueError('res.uptime is None') except (asyncio.TimeoutError, aiohttp.client_exceptions.ServerTimeoutError) as ex: service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info("ServerTimeoutError: %s, %s", str(ex), service_record.__repr__()) except aiohttp.client_exceptions.ClientConnectorError as ex: service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info("ClientConnectorError: %s, %s", str(ex), service_record.__repr__()) except ValueError as ex: service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info("Invalid response: %s, %s", str(ex), service_record.__repr__()) except Exception as ex: service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info("Exception occurred: %s, %s", str(ex), service_record.__repr__()) else: service_record._status = ServiceRecord.Status.Running check_count[service_record._id] = 1 if check_count[service_record._id] > self._max_attempts: ServiceRegistry.mark_as_failed(service_record._id) check_count[service_record._id] = 0 try: audit = AuditLogger(connect.get_storage_async()) await audit.failure('SRVFL', {'name': service_record._name}) except Exception as ex: self._logger.info("Failed to audit service failure %s", str(ex)) await self._sleep(self._sleep_interval)
async def _monitor_loop(self): """async Monitor loop to monitor registered services""" # check health of all micro-services every N seconds round_cnt = 0 check_count = {} # dict to hold current count of current status. # In case of ok and running status, count will always be 1. # In case of of non running statuses, count shows since when this status is set. while True: round_cnt += 1 self._logger.debug( "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}" .format(round_cnt, self._sleep_interval, self._ping_timeout, self._max_attempts)) for service_record in ServiceRegistry.all(): if service_record._id not in check_count: check_count.update({service_record._id: 1}) # Try ping if service status is either running or doubtful (i.e. give service a chance to recover) if service_record._status not in [ ServiceRecord.Status.Running, ServiceRecord.Status.Unresponsive ]: continue try: url = "{}://{}:{}/foglamp/service/ping".format( service_record._protocol, service_record._address, service_record._management_port) async with aiohttp.ClientSession() as session: async with session.get( url, timeout=self._ping_timeout) as resp: text = await resp.text() res = json.loads(text) if res["uptime"] is None: raise ValueError('Improper Response') except ValueError: service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info("Marked as doubtful micro-service %s", service_record.__repr__()) except Exception as ex: # TODO: Fix too broad exception clause # Fixme: Investigate as why no exception message can appear, e.g. Apr 16 15:32:08 nerd51-ThinkPad # FogLAMP[423] INFO: monitor: foglamp.services.core.service_registry.monitor: Exception occurred # during monitoring: if "" != str(ex).strip( ): # i.e. if a genuine exception occurred self._logger.info( "Exception occurred during monitoring: %s", str(ex)) service_record._status = ServiceRecord.Status.Unresponsive check_count[service_record._id] += 1 self._logger.info( "Marked as unresponsive micro-service %s", service_record.__repr__()) else: service_record._status = ServiceRecord.Status.Running check_count[service_record._id] = 1 if check_count[service_record._id] > self._max_attempts: ServiceRegistry.mark_as_failed(service_record._id) check_count[service_record._id] = 0 try: audit = AuditLogger(connect.get_storage()) await audit.failure('SRVFL', {'name': service_record._name}) except Exception as ex: self._logger.info("Failed to audit service failure %s", str(ex)) await self._sleep(self._sleep_interval)