def run(cls) -> Dict: timeout = conf.get(HEALTH_CHECK_WORKER_TIMEOUT) arg = random.randint(1, 10) result = None try: task_result = celery_app.send_task( cls.WORKER_HEALTH_TASK, args=[arg, arg], expires=timeout, ) task_result.get(timeout=timeout) if task_result.result != health_task(arg, arg): result = Result( message='Service returned wrong health result.', severity=Result.WARNING ) except IOError: result = Result( message='Service has an "IOError".', severity=Result.ERROR ) except Exception as e: result = Result( message='Service has an "{}" error.'.format(e), severity=Result.ERROR ) if not result: result = Result() return {cls.WORKER_NAME: result}
def run(cls): try: disk = psutil.disk_usage('/') if disk.percent >= cls.ERROR_THRESHOLD: result = Result( message= '{host} {percent}% disk usage exceeds error {disk_usage}%'. format(host=cls.HOST, percent=disk.percent, disk_usage=cls.ERROR_THRESHOLD), severity=Result.ERROR) elif disk.percent >= cls.WARNING_THRESHOLD: result = Result( message= '{host} {percent}% disk usage exceeds warning {disk_usage}%' .format(host=cls.HOST, percent=disk.percent, disk_usage=cls.ERROR_THRESHOLD), severity=Result.ERROR) else: result = Result() except ValueError: result = Result(message='An error was raised, try again later.', severity=Result.ERROR) return {'DISK': result}
def check(): """Open and close the broker channel.""" try: # Context to release connection with Connection(conf.get('CELERY_BROKER_URL')) as conn: conn.connect() except ConnectionRefusedError: return Result( message='Service unable to connect, "Connection was refused".', severity=Result.ERROR) except AccessRefused: return Result( message='Service unable to connect, "Authentication error".', severity=Result.ERROR) except IOError: return Result(message='Service has an "IOError".', severity=Result.ERROR) except Exception as e: return Result(message='Service has an "{}" error.'.format(e), severity=Result.ERROR) else: return Result()
def redis_health(connection): try: info = connection.info() return Result(message='Service is healthy, db size {}'.format(info['used_memory'])) except redis.exceptions.ConnectionError: return Result(message='Service unable to connect, "Connection error".', severity=Result.ERROR) except Exception as e: return Result(message='Service unable to connect, encountered error "{}".'.format(e), severity=Result.ERROR)
def run(cls) -> Dict: response = safe_request('{}/healthz'.format(get_local_stream()), 'GET') status_code = response.status_code if status_code == 200: result = Result() else: result = Result( message='Service is not healthy, response {}'.format( status_code), severity=Result.ERROR) return {'STREAMS': result}
def run(cls): response = safe_request('', 'GET') status_code = response.status_code if status_code == 200: result = Result() else: result = Result( message='Service is not healthy, response {}'.format( status_code), severity=Result.ERROR) return {'STREAMS': result}
def run(cls) -> Dict: response = safe_request('{}/_health'.format(get_settings_ws_api_url()), 'GET') status_code = response.status_code if status_code == 200: result = Result() else: result = Result( message='Service is not healthy, response {}'.format( status_code), severity=Result.ERROR) return {'STREAMS': result}
def run(cls): results = {} result = cls.redis_health(RedisEphemeralTokens.connection()) if not result.is_healthy: results['REDIS_EPH_TOKENS'] = result result = cls.redis_health(RedisSessions.connection()) if not result.is_healthy: results['REDIS_SESSIONS'] = result result = cls.redis_health(RedisTTL.connection()) if not result.is_healthy: results['REDIS_TTL'] = result result = cls.redis_health(RedisToStream.connection()) if not result.is_healthy: results['REDIS_TO_STREAM'] = result result = cls.redis_health(RedisJobContainers.connection()) if not result.is_healthy: results['REDIS_CONTAINERS'] = result if not results: results = {'REDIS': Result()} return results
def pg_health(): try: with connection.cursor() as cursor: cursor.execute('SELECT 1; -- Healthcheck') health = cursor.fetchone()[0] == 1 if health: cursor.execute( "select pg_database_size('postgres') as size") size = cursor.fetchone() return Result( message='Service is healthy, db size {}'.format(size)) return Result(message='Service is not working.', severity=Result.WARNING) except Exception as e: return Result( message='Service unable to connect, encountered "{}" error.'. format(e), severity=Result.ERROR)
def run(cls) -> Dict: try: cache.set('health_check', 'test', 1) if cache.get('health_check') != 'test': result = Result(message='Cache key does not match.', severity=Result.ERROR) else: result = Result() except CacheKeyWarning: result = Result(message='Cache key warning.', severity=Result.ERROR) except ValueError: result = Result(message='Cache raised a ValueError.', severity=Result.ERROR) except ConnectionError: result = Result(message='Cache raised a ConnectionError.', severity=Result.ERROR) return {'CACHE': result}
def run(cls): try: memory = psutil.virtual_memory() available = '{:n}'.format(int(memory.available / 1024 / 1024)) if memory.available < (cls.ERROR_THRESHOLD * 1024 * 1024): result = Result( message='Memory available ({}) is below error threshold ({})' .format(available, cls.ERROR_THRESHOLD), severity=Result.ERROR) elif memory.available < (cls.ERROR_THRESHOLD * 1024 * 1024): result = Result( message= 'Memory available ({}) is below warning threshold ({})'. format(available, cls.WARNING_THRESHOLD), severity=Result.WARNING) else: result = Result() except ValueError: result = Result(message='An error was raised, try again later.', severity=Result.ERROR) return {'MEMORY': result}