Ejemplo n.º 1
0
    def test_non_zero_exit(self) -> None:
        """
        Test non zero service exit multiple times
        """
        self._non_zero_exit_fixture()
        update_stats(self.serviceName)
        wrapper = ServiceStateWrapper()
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 2, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("EXIT_CODE"))
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"))
        self.assertEqual(status.num_fail_exits, 1)
        self.assertEqual(status.num_clean_exits, 0)

        os.environ["EXIT_STATUS"] = "3"

        update_stats(self.serviceName)
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 3, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("EXIT_CODE"))
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"))
        self.assertEqual(status.num_fail_exits, 2)
        self.assertEqual(status.num_clean_exits, 0)
Ejemplo n.º 2
0
def update_stats(service: str) -> None:
    status = get_status()
    wrapper_obj = ServiceStateWrapper()
    try:
        wrapper_obj.update_service_status(service, status)
    except Exception as e:
        logging.error('Failed to write to redis, status %s', e)
        logging.error('Logging exit info instead \n%s', status)
Ejemplo n.º 3
0
def _collect_service_restart_stats():
    """
    Collect the success and failure restarts for services
    """
    service_dict = ServiceStateWrapper().get_all_services_status()
    for service_name, status in service_dict.items():
        SERVICE_RESTART_STATUS.labels(service_name=service_name,
                                      status="Failure").set(
            status.num_fail_exits)
        SERVICE_RESTART_STATUS.labels(service_name=service_name,
                                      status="Success").set(
            status.num_clean_exits)
Ejemplo n.º 4
0
def _collect_service_restart_stats():
    """
    Collect the success and failure restarts for services
    """
    try:
        service_dict = ServiceStateWrapper().get_all_services_status()
    except Exception as e:
        logging.error("Could not fetch service status: %s", e)
        return
    for service_name, status in service_dict.items():
        SERVICE_RESTART_STATUS.labels(service_name=service_name,
                                      status="Failure").set(
                                          status.num_fail_exits)
        SERVICE_RESTART_STATUS.labels(service_name=service_name,
                                      status="Success").set(
                                          status.num_clean_exits)
Ejemplo n.º 5
0
def main():
    """
    Top-level function for health service
    """
    service = MagmaService('health', None)

    # Optionally pipe errors to Sentry
    sentry_init()

    # Service state wrapper obj
    service_state = ServiceStateWrapper()

    # Load service YML config
    state_recovery_config = service.config["state_recovery"]
    services_check = state_recovery_config["services_check"]
    polling_interval = int(state_recovery_config["interval_check_mins"]) * 60
    restart_threshold = state_recovery_config["restart_threshold"]
    snapshots_dir = state_recovery_config["snapshots_dir"]

    redis_dump_src = load_service_config("redis").get("dir", "/var/opt/magma")

    state_recovery_job = StateRecoveryJob(service_state=service_state,
                                          polling_interval=polling_interval,
                                          services_check=services_check,
                                          restart_threshold=restart_threshold,
                                          redis_dump_src=redis_dump_src,
                                          snapshots_dir=snapshots_dir,
                                          service_loop=service.loop)
    state_recovery_job.start()

    # Run the service loop
    service.run()

    # Cleanup the service
    service.close()
Ejemplo n.º 6
0
    def test_coredump_exit(self) -> None:
        """
        Test core dump exit and also recovery after a core dump
        """
        self._core_dump_fixture()
        update_stats(self.serviceName)
        wrapper = ServiceStateWrapper()
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 0, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("CORE_DUMP"))
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("DUMPED"))
        self.assertEqual(status.num_fail_exits, 1)
        self.assertEqual(status.num_clean_exits, 0)

        update_stats(self.serviceName)
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.num_fail_exits, 2)
        self.assertEqual(status.num_clean_exits, 0)

        # Test that we can do a clean update after exits
        self._success_fixture()
        update_stats(self.serviceName)
        wrapper = ServiceStateWrapper()
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 0, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("SUCCESS"),
                         "Service result check")
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"),
                         "Service exit check")
        self.assertEqual(status.num_clean_exits, 1, "Clean exit check")
        self.assertEqual(status.num_fail_exits, 2)
Ejemplo n.º 7
0
    def test_success_exit(self) -> None:
        """
        Test successful exits for a service multiple times and for multiple
        services
        """
        self._success_fixture()
        update_stats(self.serviceName)
        wrapper = ServiceStateWrapper()
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 0, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("SUCCESS"),
                         "Service result check")
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"),
                         "Service exit check")
        self.assertEqual(status.num_clean_exits, 1, "Clean exit check")

        # Multiple restarts
        update_stats(self.serviceName)
        status = wrapper.get_service_status(self.serviceName)
        self.assertEqual(status.latest_rc, 0, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("SUCCESS"),
                         "Service result check")
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"),
                         "Service exit check")
        self.assertEqual(status.num_clean_exits, 2, "Clean exit check")

        # Multiple service restarts
        update_stats(self.serviceName2)
        status = wrapper.get_service_status(self.serviceName2)
        self.assertEqual(status.latest_rc, 0, "service exit status check")
        self.assertEqual(status.latest_service_result,
                         ServiceExitStatus.ServiceResult.Value("SUCCESS"),
                         "Service result check")
        self.assertEqual(status.latest_exit_code,
                         ServiceExitStatus.ExitCode.Value("EXITED"),
                         "Service exit code check")
        self.assertEqual(status.num_clean_exits, 1, "Clean exit check")
        self.assertEqual(status.num_fail_exits, 0, "Failure exit status")
Ejemplo n.º 8
0
 def tearDown(self):
     wrapper = ServiceStateWrapper()
     wrapper.cleanup_service_status()