def test_marathon_with_master_process_failure(marathon_service_name): """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and the task ID stays the same. """ app_def = apps.sleep_app() app_id = app_def["id"] host = common.ip_other_than_mom() common.pin_to_host(app_def, host) client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) original_task_id = tasks[0]['id'] common.systemctl_master('restart') shakedown.dcos.service.wait_for_service_endpoint(marathon_service_name, path="ping") @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_recovery(): tasks = client.get_tasks(app_id) assert len(tasks) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(len(tasks)) assert tasks[0]['id'] == original_task_id, \ "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id']) check_task_recovery()
def test_marathon_with_master_process_failure(marathon_service_name): """ Launches an app from Marathon and restarts the master. It is expected that the service endpoint will come back and that the task_id is the original task_id """ app_def = app('master-failure') host = ip_other_than_mom() pin_to_host(app_def, host) client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/master-failure') original_task_id = tasks[0]['id'] common.systemctl_master() shakedown.wait_for_service_endpoint(marathon_service_name) @retrying.retry(wait_fixed=1000, stop_max_delay=10000, retry_on_exception=retry_on_exception) def check_task_recovery(): tasks = client.get_tasks('/master-failure') tasks[0]['id'] == original_task_id check_task_recovery()
def test_marathon_with_master_process_failure(marathon_service_name): """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and the task ID stays the same. """ app_def = apps.sleep_app() host = common.ip_other_than_mom() common.pin_to_host(app_def, host) client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks(app_def["id"]) original_task_id = tasks[0]['id'] common.systemctl_master('restart') common.wait_for_service_endpoint(marathon_service_name, path="ping") @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_recovery(): tasks = client.get_tasks(app_def["id"]) assert len( tasks ) == 1, "The number of tasks is {} after master restart, but 1 was expected".format( len(tasks)) assert tasks[0]['id'] == original_task_id, \ "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id']) check_task_recovery()
def test_marathon_with_master_process_failure(marathon_service_name): """ Launches an app from Marathon and restarts the master. It is expected that the service endpoint will come back and that the task_id is the original task_id """ app_def = app('master-failure') host = ip_other_than_mom() pin_to_host(app_def, host) client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/master-failure') original_task_id = tasks[0]['id'] common.systemctl_master() shakedown.wait_for_service_endpoint(marathon_service_name) @retrying.retry(wait_fixed=1000, stop_max_delay=10000) def check_task_recovery(): tasks = client.get_tasks('/master-failure') tasks[0]['id'] == original_task_id