Exemple #1
0
  def launchTask(self, driver, task):
    """
      Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks).
      Note that this task can be realized with a thread, a process, or some simple computation,
      however, no other callbacks will be invoked on this executor until this callback has returned.
    """
    self.launched.set()
    self.log('TaskInfo: %s' % task)
    self.log('launchTask got task: %s:%s' % (task.name, task.task_id.value))

    # TODO(wickman)  Update the tests to call registered(), then remove this line and issue
    # an assert if self._driver is not populated.
    self._driver = driver

    if self._runner:
      log.error('Already running a task! %s' % self._task_id)
      self.send_update(driver, task.task_id.value, mesos_pb2.TASK_LOST,
          "Task already running on this executor: %s" % self._task_id)
      return

    self._slave_id = task.slave_id.value
    self._task_id = task.task_id.value

    assigned_task = self.validate_task(task)
    self.log("Assigned task: %s" % assigned_task)
    if not assigned_task:
      self.send_update(driver, self._task_id, mesos_pb2.TASK_FAILED,
          'Could not deserialize task.')
      defer(driver.stop, delay=self.STOP_WAIT)
      return

    defer(lambda: self._run(driver, assigned_task, self.extract_mount_paths_from_task(task)))
    def start(self):
        """
      Start the runner in a separate thread and wait for the task process to be forked.
    """
        with self._lock:
            if self._started:
                raise TaskError("Runner already started")
            self._started = True

            # Can potentially hold the lock for a long time but it's OK since the runner is not accessed
            # by multiple threads until after it's started; can be a noop as well, depending on the
            # installer implementation.
            try:
                # 1. Install the application.
                self._env = self._installer.install()
                log.info(
                    "Package installation completed. Resulting environment variables: %s"
                    % self._env)

                # 2. Restore/initialize the application state.
                self._state_manager.bootstrap(self._task_control, self._env)
                log.info("Executor state fully bootstrapped")

                # 3. Start the task subprocess.
                # Store the process so we can kill it if necessary.
                self._popen = self._task_control.start(env=self._env)
                log.info("Task started in subprocess %s" % self._popen.pid)
                defer(self._wait)

                # 4. Start monitoring.
                # Only start listening to ZK events after the task subprocess has been successfully started.
                self._listener.start()
            except (PackageInstaller.Error, StateManager.Error,
                    CalledProcessError) as e:
                raise TaskError("Failed to start MySQL task: %s" % e)
Exemple #3
0
    def _shutdown(self, status_result):
        runner_status = self._runner.status

        try:
            propagate_deadline(self._chained_checker.stop,
                               timeout=self._stop_timeout)
        except Timeout:
            log.error('Failed to stop all checkers within deadline.')
        except Exception:
            log.error('Failed to stop health checkers:')
            log.error(traceback.format_exc())

        try:
            propagate_deadline(self._runner.stop, timeout=self._stop_timeout)
        except Timeout:
            log.error('Failed to stop runner within deadline.')
        except Exception:
            log.error('Failed to stop runner:')
            log.error(traceback.format_exc())

        # If the runner was alive when _shutdown was called, defer to the status_result,
        # otherwise the runner's terminal state is the preferred state.
        exit_status = runner_status or status_result

        self.send_update(self._driver, self._task_id, exit_status.status,
                         status_result.reason)

        self.terminated.set()
        defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
  def _shutdown(self, status_result):
    runner_status = self._runner.status

    try:
      propagate_deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop all checkers within deadline.')
    except Exception:
      log.error('Failed to stop health checkers:')
      log.error(traceback.format_exc())

    try:
      propagate_deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop runner within deadline.')
    except Exception:
      log.error('Failed to stop runner:')
      log.error(traceback.format_exc())

    # If the runner was alive when _shutdown was called, defer to the status_result,
    # otherwise the runner's terminal state is the preferred state.
    exit_status = runner_status or status_result

    self.send_update(
        self._driver,
        self._task_id,
        exit_status.status,
        status_result.reason)

    self.terminated.set()
    defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
Exemple #5
0
    def launchTask(self, driver, task):
        """
      Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks).
      Note that this task can be realized with a thread, a process, or some simple computation,
      however, no other callbacks will be invoked on this executor until this callback has returned.
    """
        self.launched.set()
        self.log('launchTask got task: %s:%s' %
                 (task.name, task.task_id.value))

        # TODO(wickman)  Update the tests to call registered(), then remove this line and issue
        # an assert if self._driver is not populated.
        self._driver = driver

        if self._runner:
            log.error('Already running a task! %s' % self._task_id)
            self.send_update(
                driver, task.task_id.value, mesos_pb2.TASK_LOST,
                "Task already running on this executor: %s" % self._task_id)
            return

        self._slave_id = task.slave_id.value
        self._task_id = task.task_id.value

        assigned_task = self.validate_task(task)
        if not assigned_task:
            self.send_update(driver, self._task_id, mesos_pb2.TASK_FAILED,
                             'Could not deserialize task.')
            defer(driver.stop, delay=self.STOP_WAIT)
            return

        defer(lambda: self._run(driver, assigned_task))
  def _shutdown(self, status_result):
    runner_status = self._runner.status

    try:
      deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop runner within deadline.')

    try:
      deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop all checkers within deadline.')

    # If the runner was alive when _shutdown was called, defer to the status_result,
    # otherwise the runner's terminal state is the preferred state.
    exit_status = runner_status or status_result

    self.send_update(
        self._driver,
        self._task_id,
        self.translate_exit_state_to_mesos(exit_status.status),
        status_result.reason)

    self.terminated.set()
    defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
Exemple #7
0
    def frameworkMessage(self, driver, message):
        if not self._runner:
            log.info(
                'Ignoring framework message because no task is running yet')
            return

        defer(lambda: self._framework_message(message))
  def launchTask(self, driver, task):
    """
      Invoked when a task has been launched on this executor (initiated via Scheduler::launchTasks).
      Note that this task can be realized with a thread, a process, or some simple computation,
      however, no other callbacks will be invoked on this executor until this callback has returned.
    """
    self.launched.set()
    self.log('launchTask got task: %s:%s' % (task.name, task.task_id.value))

    # TODO(wickman)  Update the tests to call registered(), then remove this line and issue
    # an assert if self._driver is not populated.
    self._driver = driver

    if self._runner:
      log.error('Already running a task! %s' % self._task_id)
      self.send_update(driver, task.task_id.value, mesos_pb.TASK_LOST,
          "Task already running on this executor: %s" % self._task_id)
      return

    self._slave_id = task.slave_id.value
    self._task_id = task.task_id.value

    try:
      assigned_task = assigned_task_from_mesos_task(task)
      mesos_task = mesos_task_instance_from_assigned_task(assigned_task)
    except Exception as e:
      log.fatal('Could not deserialize AssignedTask')
      log.fatal(traceback.format_exc())
      self.send_update(
          driver, self._task_id, mesos_pb.TASK_FAILED, "Could not deserialize task: %s" % e)
      defer(driver.stop, delay=self.STOP_WAIT)
      return

    defer(lambda: self._run(driver, assigned_task, mesos_task))
  def _shutdown(self, status_result):
    runner_status = self._runner.status

    try:
      deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop runner within deadline.')

    try:
      deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
    except Timeout:
      log.error('Failed to stop all checkers within deadline.')

    # If the runner was alive when _shutdown was called, defer to the status_result,
    # otherwise the runner's terminal state is the preferred state.
    exit_status = runner_status or status_result

    self.send_update(
        self._driver,
        self._task_id,
        self.translate_exit_state_to_mesos(exit_status.status),
        status_result.reason)

    self.terminated.set()
    defer(self._driver.stop, delay=self.PERSISTENCE_WAIT)
  def start(self):
    """
      Start the runner in a separate thread and wait for the task process to be forked.
    """
    with self._lock:
      if self._started:
        raise TaskError("Runner already started")
      self._started = True

      # Can potentially hold the lock for a long time but it's OK since the runner is not accessed
      # by multiple threads until after it's started; can be a noop as well, depending on the
      # installer implementation.
      try:
        # 1. Install the application.
        self._env = self._installer.install()
        log.info("Package installation completed. Resulting environment variables: %s" % self._env)

        # 2. Restore/initialize the application state.
        self._state_manager.bootstrap(self._task_control, self._env)
        log.info("Executor state fully bootstrapped")

        # 3. Start the task subprocess.
        # Store the process so we can kill it if necessary.
        self._popen = self._task_control.start(env=self._env)
        log.info("Task started in subprocess %s" % self._popen.pid)
        defer(self._wait)

        # 4. Start monitoring.
        # Only start listening to ZK events after the task subprocess has been successfully started.
        self._listener.start()
      except (PackageInstaller.Error, StateManager.Error, CalledProcessError) as e:
        raise TaskError("Failed to start MySQL task: %s" % e)
Exemple #11
0
 def ping(self, message, ttl=60):
     self._pings.increment()
     log.info('Got ping (ttl=%s): %s' % (message, ttl))
     ttl = int(ttl) - 1
     if ttl > 0:
         defer(partial(self.send_request, 'ping', message, ttl),
               delay=self.PING_DELAY,
               clock=self._clock)
def test_defer():
  DELAY = 0.5
  results = Queue(maxsize=1)
  def func():
    results.put_nowait('success')
  defer(func, delay=DELAY)
  with Timer() as timer:
    assert results.get() == 'success'
  assert timer.elapsed >= DELAY
  def _kill(self):
    if self._runner:
      self._killed = True
      self._runner.stop()  # It could be already stopped. If so, self._runner.stop() is a no-op.
      self._terminated.wait(sys.maxint)

    assert self._driver

    # TODO(jyx): Fix https://issues.apache.org/jira/browse/MESOS-243.
    defer(lambda: self._driver.stop(), delay=self.STOP_WAIT)
Exemple #14
0
def test_defer():
  clock = ThreadedClock()
  DELAY = 3
  results = Queue(maxsize=1)
  def func():
    results.put_nowait('success')
  defer(func, delay=DELAY, clock=clock)
  with Timer(clock=clock) as timer:
    clock.tick(4)
    assert results.get() == 'success'
  assert timer.elapsed >= DELAY
Exemple #15
0
    def _kill(self):
        if self._runner:
            self._killed = True
            self._runner.stop(
            )  # It could be already stopped. If so, self._runner.stop() is a no-op.
            self._terminated.wait(sys.maxint)

        assert self._driver

        # TODO(jyx): Fix https://issues.apache.org/jira/browse/MESOS-243.
        defer(lambda: self._driver.stop(), delay=self.STOP_WAIT)
    def _on_demote(self):
        """
      Executor shuts itself down when demoted.
    """
        self.demoted.set()

        # Stop the runner asynchronously.
        if not self._exited.is_set():
            log.info("Shutting down runner because it is demoted.")
            # Call stop() asynchronously because this callback is invoked from the Kazoo thread which we
            # don't want to block.
            defer(self.stop)
  def _on_demote(self):
    """
      Executor shuts itself down when demoted.
    """
    self.demoted.set()

    # Stop the runner asynchronously.
    if not self._exited.is_set():
      log.info("Shutting down runner because it is demoted.")
      # Call stop() asynchronously because this callback is invoked from the Kazoo thread which we
      # don't want to block.
      defer(self.stop)
Exemple #18
0
def test_defer():
  DELAY = 3

  clock = ThreadedClock()
  results = Queue(maxsize=1)

  def func():
    results.put_nowait('success')

  defer(func, delay=DELAY, clock=clock)

  with Timer(clock=clock) as timer:
    with pytest.raises(Empty):
      results.get_nowait()
    clock.tick(DELAY + 1)
    assert results.get() == 'success'

  assert timer.elapsed == DELAY + 1
Exemple #19
0
def test_defer():
    DELAY = 3

    clock = ThreadedClock()
    results = Queue(maxsize=1)

    def func():
        results.put_nowait('success')

    defer(func, delay=DELAY, clock=clock)

    with Timer(clock=clock) as timer:
        with pytest.raises(Empty):
            results.get_nowait()
        clock.tick(DELAY + 1)
        assert results.get() == 'success'

    assert timer.elapsed == DELAY + 1
Exemple #20
0
  def launchTask(self, driver, task):
    if self._runner:
      log.error("Executor allows only one task")
      update = mesos_pb2.TaskStatus()
      update.state = mesos_pb2.TASK_FAILED
      driver.sendStatusUpdate(update)
      return

    # Create the runner here in the driver thread so subsequent task launches are rejected.
    try:
      self._runner = self._runner_provider.from_task(task, self._sandbox)
    except (TaskError, ValueError) as e:
      # TODO(jyx): These should really all be 'ValueError's from all providers because they are
      # simply factory methods.
      log.error("Failed to create TaskRunner: %s" % e.message)
      self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED, e.message)
      self._kill()
      return

    # Run the task in a separate daemon thread.
    defer(lambda: self._run_task(task))
Exemple #21
0
    def launchTask(self, driver, task):
        if self._runner:
            log.error("Executor allows only one task")
            update = mesos_pb2.TaskStatus()
            update.state = mesos_pb2.TASK_FAILED
            driver.sendStatusUpdate(update)
            return

        # Create the runner here in the driver thread so subsequent task launches are rejected.
        try:
            self._runner = self._runner_provider.from_task(task, self._sandbox)
        except (TaskError, ValueError) as e:
            # TODO(jyx): These should really all be 'ValueError's from all providers because they are
            # simply factory methods.
            log.error("Failed to create TaskRunner: %s" % e.message)
            self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED,
                              e.message)
            self._kill()
            return

        # Run the task in a separate daemon thread.
        defer(lambda: self._run_task(task))
 def _die(self, driver, status, msg):
   log.fatal(msg)
   self.send_update(driver, self._task_id, status, msg)
   defer(driver.stop, delay=self.STOP_WAIT)
Exemple #23
0
 def ping(self, message, ttl=60):
     self._pings.increment()
     log.info("Got ping (ttl=%s): %s" % (message, ttl))
     ttl = int(ttl) - 1
     if ttl > 0:
         defer(partial(self.send_request, "ping", message, ttl), delay=self.PING_DELAY, clock=self._clock)
Exemple #24
0
  def frameworkMessage(self, driver, message):
    if not self._runner:
      log.info('Ignoring framework message because no task is running yet')
      return

    defer(lambda: self._framework_message(message))
Exemple #25
0
 def _die(self, driver, status, msg):
     log.fatal(msg)
     self.send_update(driver, self._task_id, status, msg)
     defer(driver.stop, delay=self.STOP_WAIT)
 def _on_promote(self):
     self.promoted.set()
     if not self._exited.is_set():
         defer(self._promote)
 def _on_master_change(self, master):
   self.master.put(master)
   if not self._exited.is_set():
     defer(lambda: self._reparent(master))
 def _on_promote(self):
   self.promoted.set()
   if not self._exited.is_set():
     defer(self._promote)
 def _on_master_change(self, master):
     self.master.put(master)
     if not self._exited.is_set():
         defer(lambda: self._reparent(master))