def test_https_health_check_healthy(protocol): """ Test HTTPS and MESOS_HTTPS protocols with a prepared nginx image that enables SSL (using self-signed certificate) and listens on 443 """ client = marathon.create_client() app_def = nginx_with_ssl_support() assert_app_healthy(client, app_def, health_check(protocol=protocol, port_index=1))
def test_https_health_check_healthy(protocol): """Tests HTTPS and MESOS_HTTPS health checks using a prepared nginx image that enables SSL (using self-signed certificate) and listens on 443. """ # marathon version captured here will work for root and mom requires_marathon_version('1.4.2') client = marathon.create_client() app_def = apps.docker_nginx_ssl() assert_app_healthy(client, app_def, common.health_check(protocol=protocol, port_index=1))
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check('/bad-url', 'HTTP', failures=0, timeout=3)] client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually( has_values(tasksRunning=1, tasksHealthy=0, tasksUnhealthy=1), max_attempts=30))
def test_task_gets_restarted_due_to_network_split(): """Verifies that a health check fails in presence of a network partition.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check()] common.pin_to_host(app_def, common.ip_other_than_mom()) client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() app = client.get_app(app_def["id"]) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) tasks = client.get_tasks(app_def["id"]) task_id = tasks[0]['id'] host = tasks[0]['host'] port = tasks[0]['ports'][0] # introduce a network partition with shakedown.iptable_rules(host): common.block_port(host, port) time.sleep(10) shakedown.deployment_wait() app = client.get_app(app_def["id"]) tasks = client.get_tasks(app_def["id"]) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task didn't get killed because of a failed health check" assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) # network partition should cause a task restart @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_health_message(): tasks = client.get_tasks(app_def["id"]) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id) app = client.get_app(app_def["id"]) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) check_health_message()
def test_https_health_check_healthy(protocol='MESOS_HTTPS'): """ Test HTTPS and MESOS_HTTPS protocols with a prepared nginx image that enables SSL (using self-signed certificate) and listens on 443 """ # marathon version captured here will work for root and mom if marthon_version_less_than('1.4.2'): pytest.skip() client = marathon.create_client() app_def = nginx_with_ssl_support() assert_app_healthy(client, app_def, health_check(protocol=protocol, port_index=1))
def test_http_health_check_healthy(protocol): """ Test HTTP, MESOS_HTTP, TCP and MESOS_TCP with standard python server """ client = marathon.create_client() app_def = python_http_app() app_def['id'] = 'no-health' client.add_app(app_def) shakedown.deployment_wait() app = client.get_app('/no-health') assert app['tasksRunning'] == 1 assert app['tasksHealthy'] == 0 client.remove_app('/no-health') assert_app_healthy(client, app_def, health_check(protocol=protocol))
def test_health_failed_check(): """ Tests a health check of an app launched by marathon. The health check succeeded, then failed due to a network partition. """ client = marathon.create_client() app_def = python_http_app() health_list = [] health_list.append(health_check()) app_def['id'] = 'healthy' app_def['healthChecks'] = health_list pin_to_host(app_def, ip_other_than_mom()) client.add_app(app_def) shakedown.deployment_wait() # healthy app = client.get_app('/healthy') assert app['tasksRunning'] == 1 assert app['tasksHealthy'] == 1 tasks = client.get_tasks('/healthy') host = tasks[0]['host'] port = tasks[0]['ports'][0] # prefer to break at the agent (having issues) mom_ip = ip_of_mom() shakedown.save_iptables(host) block_port(host, port) time.sleep(7) restore_iptables(host) shakedown.deployment_wait() # after network failure is restored. The task returns and is a new task ID @retrying.retry(wait_fixed=1000, stop_max_delay=3000, retry_on_exception=ignore_on_exception) def check_health_message(): new_tasks = client.get_tasks('/healthy') assert new_tasks[0]['id'] != tasks[0]['id'] app = client.get_app('/healthy') assert app['tasksRunning'] == 1 assert app['tasksHealthy'] == 1 check_health_message()
def test_health_failed_check(): """ Tests a health check of an app launched by marathon. The health check succeeded, then failed due to a network partition. """ client = marathon.create_client() app_def = python_http_app() health_list = [] health_list.append(health_check()) app_def['id'] = 'healthy' app_def['healthChecks'] = health_list pin_to_host(app_def, ip_other_than_mom()) client.add_app(app_def) shakedown.deployment_wait() # healthy app = client.get_app('/healthy') assert app['tasksRunning'] == 1 assert app['tasksHealthy'] == 1 tasks = client.get_tasks('/healthy') host = tasks[0]['host'] port = tasks[0]['ports'][0] # prefer to break at the agent (having issues) mom_ip = ip_of_mom() shakedown.save_iptables(host) block_port(host, port) time.sleep(7) restore_iptables(host) shakedown.deployment_wait() # after network failure is restored. The task returns and is a new task ID @retrying.retry(wait_fixed=1000, stop_max_delay=3000) def check_health_message(): new_tasks = client.get_tasks('/healthy') assert new_tasks[0]['id'] != tasks[0]['id'] app = client.get_app('/healthy') assert app['tasksRunning'] == 1 assert app['tasksHealthy'] == 1 check_health_message()
def test_health_check_unhealthy(): """ Tests failed health checks of an app launched by marathon. This was a health check that never passed. """ client = marathon.create_client() app_def = python_http_app() health_list = [] health_list.append(health_check('/bad-url', failures=0, timeout=0)) app_def['id'] = 'unhealthy' app_def['healthChecks'] = health_list client.add_app(app_def) @retrying.retry(wait_fixed=1000, stop_max_delay=10000) def check_failure_message(): app = client.get_app('/unhealthy') assert app['tasksRunning'] == 1 and app['tasksHealthy'] == 0 and app['tasksUnhealthy'] == 1 check_failure_message()
def test_task_gets_restarted_due_to_network_split(): """Verifies that a health check fails in presence of a network partition.""" app_def = apps.http_server("app-network-split") app_id = app_def["id"] app_def['healthChecks'] = [common.health_check()] common.pin_to_host(app_def, common.ip_other_than_mom()) client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) tasks = client.get_tasks(app_id) task_id = tasks[0]['id'] host = tasks[0]['host'] port = tasks[0]['ports'][0] # introduce a network partition common.block_iptable_rules_for_seconds(host, port, sleep_seconds=10, block_input=True, block_output=False) # Network partition should cause the task to restart N times until the partition is resolved (since we # pinned the task to the split agent). A new task with a new taskId should eventually be running and healthy. @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_health_message(): tasks = client.get_tasks(app_id) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) check_health_message()
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check('/bad-url', 'HTTP', failures=0, timeout=3)] client = marathon.create_client() client.add_app(app_def) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_failure_message(): app = client.get_app(app_def["id"]) print("{}, {}, {}".format(app['tasksRunning'], app['tasksHealthy'], app['tasksUnhealthy'])) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 0, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) assert app['tasksUnhealthy'] == 1, \ "The number of unhealthy tasks is {}, but 1 was expected".format(app['tasksUnhealthy']) check_failure_message()
def test_http_health_check_healthy(protocol): """Tests HTTP, MESOS_HTTP, TCP and MESOS_TCP health checks against a web-server in Python.""" app_def = apps.http_server() client = marathon.create_client() assert_app_healthy(client, app_def, common.health_check(protocol=protocol))