def test_agent_restart_with_checkpointing_enabled(): (driver_task_id, driver_task, executor_task) = _submit_job_and_get_tasks( extra_args=["--conf spark.mesos.checkpoint=true"]) driver_ip = sdk_networks.get_task_host(driver_task) executor_ip = sdk_networks.get_task_host(executor_task) utils.restart_task_agent_and_verify_state(executor_ip, executor_task, "TASK_RUNNING") utils.restart_task_agent_and_verify_state(driver_ip, driver_task, "TASK_RUNNING") _kill_driver_task(driver_task_id)
def test_agent_restart_with_checkpointing_disabled(): (driver_task_id, driver_task, executor_task) = _submit_job_and_get_tasks() driver_ip = sdk_networks.get_task_host(driver_task) executor_ip = sdk_networks.get_task_host(executor_task) # Dispatcher starts Driver Tasks with checkpointing enabled so Driver is expected to be in RUNNING state utils.restart_task_agent_and_verify_state(driver_ip, driver_task, "TASK_RUNNING") utils.restart_task_agent_and_verify_state(executor_ip, executor_task, "TASK_LOST") _kill_driver_task(driver_task_id)
def test_offers_suppressed_with_lost_task(): app_name = "MockTaskRunner" driver_task_id = _launch_test_task(app_name) executor_task = shakedown.get_service_tasks(app_name)[0] executor_ip = sdk_networks.get_task_host(executor_task) utils.restart_task_agent_and_verify_state(executor_ip, executor_task, "TASK_LOST") shakedown.wait_for_task_completion(driver_task_id, timeout_sec=utils.JOB_WAIT_TIMEOUT_SECONDS) _check_logged_offers(driver_task_id, 1)
def docker_inspect(task, format_options=None): host_ip = sdk_networks.get_task_host(task) task_id = dcos_utils.get_task_container_id(task) inspect_cmd = "sudo docker inspect " if format_options is not None: inspect_cmd = inspect_cmd + format_options + " " inspect_cmd = inspect_cmd + "mesos-" + task_id return dcos_utils.agent_ssh(host_ip, inspect_cmd)
def _check_task_network(task, is_ucr=True): host_ip = sdk_networks.get_task_host(task) task_ip = sdk_networks.get_task_ip(task) subnet = sdk_networks.get_overlay_subnet() _verify_task_ip(task_ip, host_ip, subnet) _verify_task_network_name(task) if is_ucr: _verify_ucr_task_inet_address(task, subnet) else: _check_docker_network(task, host_ip, subnet)
def docker_exec(task, cmd): host_ip = sdk_networks.get_task_host(task) task_id = dcos_utils.get_task_container_id(task) exec_command = "sudo docker exec mesos-{} {}".format(task_id, cmd) return dcos_utils.agent_ssh(host_ip, exec_command)