def make_executor(proxy_driver, checkpoint_root, task, ports={}, fast_status=False, runner_class=ThermosTaskRunner, status_providers=()): status_manager_class = FastStatusManager if fast_status else StatusManager runner_provider = make_provider(checkpoint_root, runner_class) te = FastThermosExecutor( runner_provider=runner_provider, status_manager_class=status_manager_class, sandbox_provider=DefaultTestSandboxProvider, status_providers=status_providers, ) ExecutorTimeout(te.launched, proxy_driver, timeout=Amount(100, Time.MILLISECONDS)).start() task_description = make_task(task, assigned_ports=ports, instanceId=0) te.launchTask(proxy_driver, task_description) te.status_manager_started.wait() sampled_metrics = te.metrics.sample() assert 'kill_manager.enabled' in sampled_metrics for checker in te._chained_checker._status_checkers: # hacky assert ('%s.enabled' % checker.name()) in sampled_metrics while len(proxy_driver.method_calls['sendStatusUpdate']) < 2: time.sleep(0.1) # make sure startup was kosher updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 2 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb2.TASK_STARTING assert status_updates[1].state == mesos_pb2.TASK_RUNNING # wait for the runner to bind to a task while True: runner = TaskRunner.get(task_description.task_id.value, checkpoint_root) if runner: break time.sleep(0.1) assert te.launched.is_set() return runner, te
def make_executor( proxy_driver, checkpoint_root, task, ports={}, fast_status=False, runner_class=ThermosTaskRunner, status_providers=[HealthCheckerProvider()], assert_task_is_running=True, stop_timeout_in_secs=120): status_manager_class = FastStatusManager if fast_status else StatusManager runner_provider = make_provider(checkpoint_root, runner_class) te = FastThermosExecutor( runner_provider=runner_provider, status_manager_class=status_manager_class, sandbox_provider=DefaultTestSandboxProvider(), status_providers=status_providers, stop_timeout_in_secs=stop_timeout_in_secs ) ExecutorTimeout(te.launched, proxy_driver, timeout=Amount(100, Time.MILLISECONDS)).start() task_description = make_task(task, assigned_ports=ports, instanceId=0) te.launchTask(proxy_driver, task_description) te.status_manager_started.wait() while len(proxy_driver.method_calls['sendStatusUpdate']) < 2: time.sleep(0.1) # make sure startup was kosher updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 2 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb2.TASK_STARTING runner = None if assert_task_is_running: assert status_updates[1].state == mesos_pb2.TASK_RUNNING # wait for the runner to bind to a task while True: runner = TaskRunner.get(task_description.task_id.value, checkpoint_root) if runner: break time.sleep(0.1) assert te.launched.is_set() return runner, te
def make_executor( proxy_driver, checkpoint_root, task, ports={}, fast_status=False, runner_class=ThermosTaskRunner, status_providers=()): status_manager_class = FastStatusManager if fast_status else StatusManager runner_provider = make_provider(checkpoint_root, runner_class) te = FastThermosExecutor( runner_provider=runner_provider, status_manager_class=status_manager_class, sandbox_provider=DefaultTestSandboxProvider, status_providers=status_providers, ) ExecutorTimeout(te.launched, proxy_driver, timeout=Amount(100, Time.MILLISECONDS)).start() task_description = make_task(task, assigned_ports=ports, instanceId=0) te.launchTask(proxy_driver, task_description) te.status_manager_started.wait() sampled_metrics = te.metrics.sample() assert 'kill_manager.enabled' in sampled_metrics for checker in te._chained_checker._status_checkers: # hacky assert ('%s.enabled' % checker.name()) in sampled_metrics while len(proxy_driver.method_calls['sendStatusUpdate']) < 2: time.sleep(0.1) # make sure startup was kosher updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 2 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb2.TASK_STARTING assert status_updates[1].state == mesos_pb2.TASK_RUNNING # wait for the runner to bind to a task while True: runner = TaskRunner.get(task_description.task_id.value, checkpoint_root) if runner: break time.sleep(0.1) assert te.launched.is_set() return runner, te
def test_coordinator_dead_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 os.kill(runner.po.pid, signal.SIGKILL) os.kill(process_state.coordinator_pid, signal.SIGKILL) os.kill(process_state.pid, signal.SIGKILL) killer = TaskRunner.get(runner.task_id, runner.root) assert killer is not None killer.kill(force=True) state = tm.get_state() assert len(state.processes['ignorant_process']) == 1 assert state.processes['ignorant_process'][0].state == ProcessState.LOST
def test_coordinator_dead_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.tempdir, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 os.kill(runner.po.pid, signal.SIGKILL) os.kill(process_state.coordinator_pid, signal.SIGKILL) os.kill(process_state.pid, signal.SIGKILL) killer = TaskRunner.get(runner.task_id, runner.root) assert killer is not None killer.kill(force=True) state = tm.get_state() assert len(state.processes['ignorant_process']) == 1 assert state.processes['ignorant_process'][ 0].state == ProcessState.LOST
def test_preemption_wait(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 preempter = TaskRunner.get(runner.task_id, runner.root) assert preempter is not None now = time.time() preempter.kill(force=True, preemption_wait=Amount(1, Time.SECONDS)) duration = time.time() - now # This is arbitrary, but make sure we finish within half a second of # requested preemption wait. assert abs(duration - 1.0) < 0.5 assert preempter.state.statuses[-1].state == TaskState.KILLED assert preempter.state.processes['ignorant_process'][-1].state == ProcessState.KILLED
def test_preemption_wait(self): runner = self.start_runner() tm = TaskMonitor(runner.tempdir, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 preempter = TaskRunner.get(runner.task_id, runner.root) assert preempter is not None now = time.time() preempter.kill(force=True, preemption_wait=Amount(1, Time.SECONDS)) duration = time.time() - now # This is arbitrary, but make sure we finish within half a second of # requested preemption wait. assert abs(duration - 1.0) < 0.5 assert preempter.state.statuses[-1].state == TaskState.KILLED assert preempter.state.processes['ignorant_process'][ -1].state == ProcessState.KILLED