def launchTask(self, driver, task): """ Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks). Note that this task can be realized with a thread, a process, or some simple computation, however, no other callbacks will be invoked on this executor until this callback has returned. """ self.launched.set() self.log('TaskInfo: %s' % task) self.log('launchTask got task: %s:%s' % (task.name, task.task_id.value)) # TODO(wickman) Update the tests to call registered(), then remove this line and issue # an assert if self._driver is not populated. self._driver = driver if self._runner: log.error('Already running a task! %s' % self._task_id) self.send_update(driver, task.task_id.value, mesos_pb2.TASK_LOST, "Task already running on this executor: %s" % self._task_id) return self._slave_id = task.slave_id.value self._task_id = task.task_id.value assigned_task = self.validate_task(task) self.log("Assigned task: %s" % assigned_task) if not assigned_task: self.send_update(driver, self._task_id, mesos_pb2.TASK_FAILED, 'Could not deserialize task.') defer(driver.stop, delay=self.STOP_WAIT) return defer(lambda: self._run(driver, assigned_task, self.extract_mount_paths_from_task(task)))
def start(self): """ Start the runner in a separate thread and wait for the task process to be forked. """ with self._lock: if self._started: raise TaskError("Runner already started") self._started = True # Can potentially hold the lock for a long time but it's OK since the runner is not accessed # by multiple threads until after it's started; can be a noop as well, depending on the # installer implementation. try: # 1. Install the application. self._env = self._installer.install() log.info( "Package installation completed. Resulting environment variables: %s" % self._env) # 2. Restore/initialize the application state. self._state_manager.bootstrap(self._task_control, self._env) log.info("Executor state fully bootstrapped") # 3. Start the task subprocess. # Store the process so we can kill it if necessary. self._popen = self._task_control.start(env=self._env) log.info("Task started in subprocess %s" % self._popen.pid) defer(self._wait) # 4. Start monitoring. # Only start listening to ZK events after the task subprocess has been successfully started. self._listener.start() except (PackageInstaller.Error, StateManager.Error, CalledProcessError) as e: raise TaskError("Failed to start MySQL task: %s" % e)
def _shutdown(self, status_result): runner_status = self._runner.status try: propagate_deadline(self._chained_checker.stop, timeout=self._stop_timeout) except Timeout: log.error('Failed to stop all checkers within deadline.') except Exception: log.error('Failed to stop health checkers:') log.error(traceback.format_exc()) try: propagate_deadline(self._runner.stop, timeout=self._stop_timeout) except Timeout: log.error('Failed to stop runner within deadline.') except Exception: log.error('Failed to stop runner:') log.error(traceback.format_exc()) # If the runner was alive when _shutdown was called, defer to the status_result, # otherwise the runner's terminal state is the preferred state. exit_status = runner_status or status_result self.send_update(self._driver, self._task_id, exit_status.status, status_result.reason) self.terminated.set() defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
def _shutdown(self, status_result): runner_status = self._runner.status try: propagate_deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop all checkers within deadline.') except Exception: log.error('Failed to stop health checkers:') log.error(traceback.format_exc()) try: propagate_deadline(self._runner.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop runner within deadline.') except Exception: log.error('Failed to stop runner:') log.error(traceback.format_exc()) # If the runner was alive when _shutdown was called, defer to the status_result, # otherwise the runner's terminal state is the preferred state. exit_status = runner_status or status_result self.send_update( self._driver, self._task_id, exit_status.status, status_result.reason) self.terminated.set() defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
def launchTask(self, driver, task): """ Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks). Note that this task can be realized with a thread, a process, or some simple computation, however, no other callbacks will be invoked on this executor until this callback has returned. """ self.launched.set() self.log('launchTask got task: %s:%s' % (task.name, task.task_id.value)) # TODO(wickman) Update the tests to call registered(), then remove this line and issue # an assert if self._driver is not populated. self._driver = driver if self._runner: log.error('Already running a task! %s' % self._task_id) self.send_update( driver, task.task_id.value, mesos_pb2.TASK_LOST, "Task already running on this executor: %s" % self._task_id) return self._slave_id = task.slave_id.value self._task_id = task.task_id.value assigned_task = self.validate_task(task) if not assigned_task: self.send_update(driver, self._task_id, mesos_pb2.TASK_FAILED, 'Could not deserialize task.') defer(driver.stop, delay=self.STOP_WAIT) return defer(lambda: self._run(driver, assigned_task))
def _shutdown(self, status_result): runner_status = self._runner.status try: deadline(self._runner.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop runner within deadline.') try: deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop all checkers within deadline.') # If the runner was alive when _shutdown was called, defer to the status_result, # otherwise the runner's terminal state is the preferred state. exit_status = runner_status or status_result self.send_update( self._driver, self._task_id, self.translate_exit_state_to_mesos(exit_status.status), status_result.reason) self.terminated.set() defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
def frameworkMessage(self, driver, message): if not self._runner: log.info( 'Ignoring framework message because no task is running yet') return defer(lambda: self._framework_message(message))
def launchTask(self, driver, task): """ Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks). Note that this task can be realized with a thread, a process, or some simple computation, however, no other callbacks will be invoked on this executor until this callback has returned. """ self.launched.set() self.log('launchTask got task: %s:%s' % (task.name, task.task_id.value)) # TODO(wickman) Update the tests to call registered(), then remove this line and issue # an assert if self._driver is not populated. self._driver = driver if self._runner: log.error('Already running a task! %s' % self._task_id) self.send_update(driver, task.task_id.value, mesos_pb.TASK_LOST, "Task already running on this executor: %s" % self._task_id) return self._slave_id = task.slave_id.value self._task_id = task.task_id.value try: assigned_task = assigned_task_from_mesos_task(task) mesos_task = mesos_task_instance_from_assigned_task(assigned_task) except Exception as e: log.fatal('Could not deserialize AssignedTask') log.fatal(traceback.format_exc()) self.send_update( driver, self._task_id, mesos_pb.TASK_FAILED, "Could not deserialize task: %s" % e) defer(driver.stop, delay=self.STOP_WAIT) return defer(lambda: self._run(driver, assigned_task, mesos_task))
def start(self): """ Start the runner in a separate thread and wait for the task process to be forked. """ with self._lock: if self._started: raise TaskError("Runner already started") self._started = True # Can potentially hold the lock for a long time but it's OK since the runner is not accessed # by multiple threads until after it's started; can be a noop as well, depending on the # installer implementation. try: # 1. Install the application. self._env = self._installer.install() log.info("Package installation completed. Resulting environment variables: %s" % self._env) # 2. Restore/initialize the application state. self._state_manager.bootstrap(self._task_control, self._env) log.info("Executor state fully bootstrapped") # 3. Start the task subprocess. # Store the process so we can kill it if necessary. self._popen = self._task_control.start(env=self._env) log.info("Task started in subprocess %s" % self._popen.pid) defer(self._wait) # 4. Start monitoring. # Only start listening to ZK events after the task subprocess has been successfully started. self._listener.start() except (PackageInstaller.Error, StateManager.Error, CalledProcessError) as e: raise TaskError("Failed to start MySQL task: %s" % e)
def ping(self, message, ttl=60): self._pings.increment() log.info('Got ping (ttl=%s): %s' % (message, ttl)) ttl = int(ttl) - 1 if ttl > 0: defer(partial(self.send_request, 'ping', message, ttl), delay=self.PING_DELAY, clock=self._clock)
def test_defer(): DELAY = 0.5 results = Queue(maxsize=1) def func(): results.put_nowait('success') defer(func, delay=DELAY) with Timer() as timer: assert results.get() == 'success' assert timer.elapsed >= DELAY
def _kill(self): if self._runner: self._killed = True self._runner.stop() # It could be already stopped. If so, self._runner.stop() is a no-op. self._terminated.wait(sys.maxint) assert self._driver # TODO(jyx): Fix https://issues.apache.org/jira/browse/MESOS-243. defer(lambda: self._driver.stop(), delay=self.STOP_WAIT)
def test_defer(): clock = ThreadedClock() DELAY = 3 results = Queue(maxsize=1) def func(): results.put_nowait('success') defer(func, delay=DELAY, clock=clock) with Timer(clock=clock) as timer: clock.tick(4) assert results.get() == 'success' assert timer.elapsed >= DELAY
def _kill(self): if self._runner: self._killed = True self._runner.stop( ) # It could be already stopped. If so, self._runner.stop() is a no-op. self._terminated.wait(sys.maxint) assert self._driver # TODO(jyx): Fix https://issues.apache.org/jira/browse/MESOS-243. defer(lambda: self._driver.stop(), delay=self.STOP_WAIT)
def _on_demote(self): """ Executor shuts itself down when demoted. """ self.demoted.set() # Stop the runner asynchronously. if not self._exited.is_set(): log.info("Shutting down runner because it is demoted.") # Call stop() asynchronously because this callback is invoked from the Kazoo thread which we # don't want to block. defer(self.stop)
def test_defer(): DELAY = 3 clock = ThreadedClock() results = Queue(maxsize=1) def func(): results.put_nowait('success') defer(func, delay=DELAY, clock=clock) with Timer(clock=clock) as timer: with pytest.raises(Empty): results.get_nowait() clock.tick(DELAY + 1) assert results.get() == 'success' assert timer.elapsed == DELAY + 1
def launchTask(self, driver, task): if self._runner: log.error("Executor allows only one task") update = mesos_pb2.TaskStatus() update.state = mesos_pb2.TASK_FAILED driver.sendStatusUpdate(update) return # Create the runner here in the driver thread so subsequent task launches are rejected. try: self._runner = self._runner_provider.from_task(task, self._sandbox) except (TaskError, ValueError) as e: # TODO(jyx): These should really all be 'ValueError's from all providers because they are # simply factory methods. log.error("Failed to create TaskRunner: %s" % e.message) self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED, e.message) self._kill() return # Run the task in a separate daemon thread. defer(lambda: self._run_task(task))
def _die(self, driver, status, msg): log.fatal(msg) self.send_update(driver, self._task_id, status, msg) defer(driver.stop, delay=self.STOP_WAIT)
def ping(self, message, ttl=60): self._pings.increment() log.info("Got ping (ttl=%s): %s" % (message, ttl)) ttl = int(ttl) - 1 if ttl > 0: defer(partial(self.send_request, "ping", message, ttl), delay=self.PING_DELAY, clock=self._clock)
def frameworkMessage(self, driver, message): if not self._runner: log.info('Ignoring framework message because no task is running yet') return defer(lambda: self._framework_message(message))
def _on_promote(self): self.promoted.set() if not self._exited.is_set(): defer(self._promote)
def _on_master_change(self, master): self.master.put(master) if not self._exited.is_set(): defer(lambda: self._reparent(master))