コード例 #1
0
    async def _monitor_loop(self):
        """async Monitor loop to monitor registered services"""
        # check health of all micro-services every N seconds
        round_cnt = 0
        check_count = {}  # dict to hold current count of current status.
        # In case of ok and running status, count will always be 1.
        # In case of of non running statuses, count shows since when this status is set.
        while True:
            round_cnt += 1
            self._logger.debug(
                "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}"
                .format(round_cnt, self._sleep_interval, self._ping_timeout,
                        self._max_attempts))
            for service_record in ServiceRegistry.all():
                if service_record._id not in check_count:
                    check_count.update({service_record._id: 1})

                # Try ping if service status is either running or doubtful (i.e. give service a chance to recover)
                if service_record._status not in [
                        ServiceRecord.Status.Running,
                        ServiceRecord.Status.Unresponsive,
                        ServiceRecord.Status.Failed
                ]:
                    continue

                self._logger.debug("Service: {} Status: {}".format(
                    service_record._name, service_record._status))

                if service_record._status == ServiceRecord.Status.Failed:
                    if self._restart_failed == "auto":
                        if service_record._id not in self.restarted_services:
                            self.restarted_services.append(service_record._id)
                            asyncio.ensure_future(
                                self.restart_service(service_record))
                    continue

                try:
                    url = "{}://{}:{}/foglamp/service/ping".format(
                        service_record._protocol, service_record._address,
                        service_record._management_port)
                    async with aiohttp.ClientSession() as session:
                        async with session.get(
                                url, timeout=self._ping_timeout) as resp:
                            text = await resp.text()
                            res = json.loads(text)
                            if res["uptime"] is None:
                                raise ValueError('res.uptime is None')
                except (asyncio.TimeoutError,
                        aiohttp.client_exceptions.ServerTimeoutError) as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("ServerTimeoutError: %s, %s", str(ex),
                                      service_record.__repr__())
                except aiohttp.client_exceptions.ClientConnectorError as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("ClientConnectorError: %s, %s", str(ex),
                                      service_record.__repr__())
                except ValueError as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Invalid response: %s, %s", str(ex),
                                      service_record.__repr__())
                except Exception as ex:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Exception occurred: %s, %s", str(ex),
                                      service_record.__repr__())
                else:
                    service_record._status = ServiceRecord.Status.Running
                    check_count[service_record._id] = 1

                if check_count[service_record._id] > self._max_attempts:
                    ServiceRegistry.mark_as_failed(service_record._id)
                    check_count[service_record._id] = 0
                    try:
                        audit = AuditLogger(connect.get_storage_async())
                        await audit.failure('SRVFL',
                                            {'name': service_record._name})
                    except Exception as ex:
                        self._logger.info("Failed to audit service failure %s",
                                          str(ex))
            await self._sleep(self._sleep_interval)
コード例 #2
0
ファイル: test_service.py プロジェクト: took13/FogLAMP
    async def test_get_health(self, mocker, client):
        # empty service registry
        resp = await client.get('/foglamp/service')
        assert 200 == resp.status
        result = await resp.text()
        json_response = json.loads(result)
        assert {'services': []} == json_response

        mocker.patch.object(InterestRegistry, "__init__", return_value=None)
        mocker.patch.object(InterestRegistry, "get", return_value=list())

        with patch.object(ServiceRegistry._logger, 'info') as log_patch_info:
            # populated service registry
            ServiceRegistry.register('name1', 'Storage', 'address1', 1, 1,
                                     'protocol1')
            ServiceRegistry.register('name2', 'Southbound', 'address2', 2, 2,
                                     'protocol2')
            s_id_3 = ServiceRegistry.register('name3', 'Southbound',
                                              'address3', 3, 3, 'protocol3')
            s_id_4 = ServiceRegistry.register('name4', 'Southbound',
                                              'address4', 4, 4, 'protocol4')

            ServiceRegistry.unregister(s_id_3)
            ServiceRegistry.mark_as_failed(s_id_4)

            resp = await client.get('/foglamp/service')
            assert 200 == resp.status
            result = await resp.text()
            json_response = json.loads(result)
            assert json_response == {
                'services': [{
                    'type': 'Storage',
                    'service_port': 1,
                    'address': 'address1',
                    'protocol': 'protocol1',
                    'status': 'running',
                    'name': 'name1',
                    'management_port': 1
                }, {
                    'type': 'Southbound',
                    'service_port': 2,
                    'address': 'address2',
                    'protocol': 'protocol2',
                    'status': 'running',
                    'name': 'name2',
                    'management_port': 2
                }, {
                    'type': 'Southbound',
                    'service_port': 3,
                    'address': 'address3',
                    'protocol': 'protocol3',
                    'status': 'shutdown',
                    'name': 'name3',
                    'management_port': 3
                }, {
                    'type': 'Southbound',
                    'service_port': 4,
                    'address': 'address4',
                    'protocol': 'protocol4',
                    'status': 'failed',
                    'name': 'name4',
                    'management_port': 4
                }]
            }
        assert 6 == log_patch_info.call_count
コード例 #3
0
    async def _monitor_loop(self):
        """async Monitor loop to monitor registered services"""
        # check health of all micro-services every N seconds
        round_cnt = 0
        check_count = {}  # dict to hold current count of current status.
        # In case of ok and running status, count will always be 1.
        # In case of of non running statuses, count shows since when this status is set.
        while True:
            round_cnt += 1
            self._logger.debug(
                "Starting next round#{} of service monitoring, sleep/i:{} ping/t:{} max/a:{}"
                .format(round_cnt, self._sleep_interval, self._ping_timeout,
                        self._max_attempts))
            for service_record in ServiceRegistry.all():
                if service_record._id not in check_count:
                    check_count.update({service_record._id: 1})
                # Try ping if service status is either running or doubtful (i.e. give service a chance to recover)
                if service_record._status not in [
                        ServiceRecord.Status.Running,
                        ServiceRecord.Status.Unresponsive
                ]:
                    continue
                try:
                    url = "{}://{}:{}/foglamp/service/ping".format(
                        service_record._protocol, service_record._address,
                        service_record._management_port)
                    async with aiohttp.ClientSession() as session:
                        async with session.get(
                                url, timeout=self._ping_timeout) as resp:
                            text = await resp.text()
                            res = json.loads(text)
                            if res["uptime"] is None:
                                raise ValueError('Improper Response')
                except ValueError:
                    service_record._status = ServiceRecord.Status.Unresponsive
                    check_count[service_record._id] += 1
                    self._logger.info("Marked as doubtful micro-service %s",
                                      service_record.__repr__())
                except Exception as ex:  # TODO: Fix too broad exception clause
                    # Fixme: Investigate as why no exception message can appear, e.g. Apr 16 15:32:08 nerd51-ThinkPad
                    # FogLAMP[423] INFO: monitor: foglamp.services.core.service_registry.monitor: Exception occurred
                    # during monitoring:

                    if "" != str(ex).strip(
                    ):  # i.e. if a genuine exception occurred
                        self._logger.info(
                            "Exception occurred during monitoring: %s",
                            str(ex))
                        service_record._status = ServiceRecord.Status.Unresponsive
                        check_count[service_record._id] += 1
                        self._logger.info(
                            "Marked as unresponsive micro-service %s",
                            service_record.__repr__())
                else:
                    service_record._status = ServiceRecord.Status.Running
                    check_count[service_record._id] = 1

                if check_count[service_record._id] > self._max_attempts:
                    ServiceRegistry.mark_as_failed(service_record._id)
                    check_count[service_record._id] = 0
                    try:
                        audit = AuditLogger(connect.get_storage())
                        await audit.failure('SRVFL',
                                            {'name': service_record._name})
                    except Exception as ex:
                        self._logger.info("Failed to audit service failure %s",
                                          str(ex))
            await self._sleep(self._sleep_interval)