Ejemplo n.º 1
0
    def __init__(
        self,
        batch_size,
        restart_threshold,
        watch_secs,
        max_per_shard_failures,
        max_total_failures,
        rollback_on_failure=True,
        wait_for_batch_completion=False,
        pulse_interval_secs=None,
    ):

        if batch_size <= 0:
            raise ValueError("Batch size should be greater than 0")
        if watch_secs <= 0:
            raise ValueError("Watch seconds should be greater than 0")
        if pulse_interval_secs is not None and pulse_interval_secs < self.MIN_PULSE_INTERVAL_SECONDS:
            raise ValueError("Pulse interval seconds must be at least %s seconds." % self.MIN_PULSE_INTERVAL_SECONDS)
        if restart_threshold:
            log.warn("restart_threshold has been deprecated and will be removed in a future release")

        self.batch_size = batch_size
        self.watch_secs = watch_secs
        self.max_total_failures = max_total_failures
        self.max_per_instance_failures = max_per_shard_failures
        self.rollback_on_failure = rollback_on_failure
        self.wait_for_batch_completion = wait_for_batch_completion
        self.pulse_interval_secs = pulse_interval_secs
Ejemplo n.º 2
0
 def _maybe_scrubbed_env(cls):
   for env_var in cls._SCRUBBED_ENV:
     value = os.getenv(env_var)
     if value:
       log.warn('Scrubbing {env_var}={value}'.format(env_var=env_var, value=value))
   with environment_as(**cls._SCRUBBED_ENV):
     yield
Ejemplo n.º 3
0
def initialize(options):
  cwd_path = os.path.abspath(CWD)
  checkpoint_root = os.path.join(cwd_path, MesosPathDetector.DEFAULT_SANDBOX_PATH)

  # status providers:
  status_providers = [
      HealthCheckerProvider(),
      ResourceManagerProvider(checkpoint_root=checkpoint_root)
  ]

  if options.announcer_enable:
    log.warn('Please remove the deprecated and no-op --announcer-enable flag in scheduler config!')

  if options.announcer_ensemble is not None:
    status_providers.append(DefaultAnnouncerCheckerProvider(
      options.announcer_ensemble,
      options.announcer_serverset_path,
      options.announcer_allow_custom_serverset_path,
      options.announcer_hostname,
      make_zk_auth(options.announcer_zookeeper_auth_config)
    ))

  # Create executor stub
  if options.execute_as_user or options.nosetuid:
    # If nosetuid is set, execute_as_user is also None
    thermos_runner_provider = UserOverrideThermosTaskRunnerProvider(
      dump_runner_pex(),
      checkpoint_root,
      artifact_dir=cwd_path,
      process_logger_destination=options.runner_logger_destination,
      process_logger_mode=options.runner_logger_mode,
      rotate_log_size_mb=options.runner_rotate_log_size_mb,
      rotate_log_backups=options.runner_rotate_log_backups,
      preserve_env=options.preserve_env
    )
    thermos_runner_provider.set_role(None)

    thermos_executor = AuroraExecutor(
      runner_provider=thermos_runner_provider,
      status_providers=status_providers,
      sandbox_provider=UserOverrideDirectorySandboxProvider(options.execute_as_user)
    )
  else:
    thermos_runner_provider = DefaultThermosTaskRunnerProvider(
      dump_runner_pex(),
      checkpoint_root,
      artifact_dir=cwd_path,
      process_logger_destination=options.runner_logger_destination,
      process_logger_mode=options.runner_logger_mode,
      rotate_log_size_mb=options.runner_rotate_log_size_mb,
      rotate_log_backups=options.runner_rotate_log_backups,
      preserve_env=options.preserve_env
    )

    thermos_executor = AuroraExecutor(
      runner_provider=thermos_runner_provider,
      status_providers=status_providers
    )

  return thermos_executor
Ejemplo n.º 4
0
    def run(self):
        # Re-run the election in a loop periodically until a master can be elected or the elector is
        # aborted.
        while not self._aborted.is_set() and not self._completed.wait(
                self._query_interval):
            if datetime.utcnow() < self._election_deadline:
                self._elect(timedout=False)
            else:
                log.info(
                    "Timed out waiting for all slaves to respond. Now elect from existing responses"
                )
                self._elect(timedout=True)
                if not self._completed.is_set():
                    log.warn("No slave is electable after timeout")

        if self._aborted.is_set(
        ):  # If asked to stop, directly return without triggering the callback.
            log.info(
                "Asked to stop the elector thread for cluster %s. Stopping..."
                % self._cluster_name)
            return

        self._master_callback(
            self._master)  # Invoke the callback from the elector thread.
        log.info(
            "Stopping the elector thread for cluster %s because the election has completed"
            % self._cluster_name)
Ejemplo n.º 5
0
 def _maybe_scrubbed_env(cls):
   for env_var in cls._SCRUBBED_ENV:
     value = os.getenv(env_var)
     if value:
       log.warn('Scrubbing {env_var}={value}'.format(env_var=env_var, value=value))
   with environment_as(**cls._SCRUBBED_ENV):
     yield
Ejemplo n.º 6
0
  def update(self, instances=None):
    """Performs the job update, blocking until it completes.
    A rollback will be performed if the update was considered a failure based on the
    update configuration.

    Arguments:
    instances -- (optional) instances to update. If not specified, all instances will be updated.

    Returns a response object with update result status.
    """
    resp = self._start()
    if resp.responseCode != ResponseCode.OK:
      return resp

    try:
      # Handle cron jobs separately from other jobs.
      if self._replace_template_if_cron():
        log.info('Cron template updated, next run will reflect changes')
        return self._finish()
      else:
        if not self._update(instances):
          log.warn('Update failures threshold reached')
          self._finish()
          return self._failed_response('Update reverted')
        else:
          log.info('Update successful')
          return self._finish()
    except self.Error as e:
      return self._failed_response('Aborting update without rollback! Fatal error: %s' % e)
Ejemplo n.º 7
0
    def update(self, instances=None):
        """Performs the job update, blocking until it completes.
    A rollback will be performed if the update was considered a failure based on the
    update configuration.

    Arguments:
    instances -- (optional) instances to update. If not specified, all instances will be updated.

    Returns a response object with update result status.
    """
        resp = self._start()
        if resp.responseCode != ResponseCode.OK:
            return resp

        try:
            # Handle cron jobs separately from other jobs.
            if self._replace_template_if_cron():
                log.info(
                    'Cron template updated, next run will reflect changes')
                return self._finish()
            else:
                if not self._update(instances):
                    log.warn('Update failures threshold reached')
                    self._finish()
                    return self._failed_response('Update reverted')
                else:
                    log.info('Update successful')
                    return self._finish()
        except self.Error as e:
            return self._failed_response(
                'Aborting update without rollback! Fatal error: %s' % e)
Ejemplo n.º 8
0
  def wait_for_accept(cls, port, tunnel_popen, timeout):
    total_time = Amount(0, Time.SECONDS)
    sleep = cls.MIN_RETRY
    warned = False  # Did we log a warning that shows we're waiting for the tunnel?

    while total_time < timeout and tunnel_popen.returncode is None:
      try:
        accepted_socket = socket.create_connection(('localhost', port), timeout=5.0)
        accepted_socket.close()
        return True
      except socket.error:
        total_time += sleep
        time.sleep(sleep.as_(Time.SECONDS))

        # Increase sleep exponentially until MAX_INTERVAL is reached
        sleep = min(sleep * 2, cls.MAX_INTERVAL)

        if total_time > cls.WARN_THRESHOLD and not warned:
          log.warn('Still waiting for tunnel to be established after %s (timeout is %s)' % (
              total_time, cls.DEFAULT_TIMEOUT))
          warned = True

        tunnel_popen.poll()  # needed to update tunnel_popen.returncode
    if tunnel_popen.returncode is not None:
      cls.log('SSH returned prematurely with code %s' % str(tunnel_popen.returncode))
    else:
      cls.log('timed out initializing tunnel')
    return False
Ejemplo n.º 9
0
  def wait_for_accept(cls, port, tunnel_popen, timeout):
    total_time = Amount(0, Time.SECONDS)
    sleep = cls.MIN_RETRY
    warned = False  # Did we log a warning that shows we're waiting for the tunnel?

    while total_time < timeout and tunnel_popen.returncode is None:
      try:
        accepted_socket = socket.create_connection(('localhost', port), timeout=5.0)
        accepted_socket.close()
        return True
      except socket.error:
        total_time += sleep
        time.sleep(sleep.as_(Time.SECONDS))

        # Increase sleep exponentially until MAX_INTERVAL is reached
        sleep = min(sleep * 2, cls.MAX_INTERVAL)

        if total_time > cls.WARN_THRESHOLD and not warned:
          log.warn('Still waiting for tunnel to be established after %s (timeout is %s)' % (
              total_time, cls.DEFAULT_TIMEOUT))
          warned = True

        tunnel_popen.poll()  # needed to update tunnel_popen.returncode
    if tunnel_popen.returncode is not None:
      cls.log('SSH returned prematurely with code %s' % str(tunnel_popen.returncode))
    else:
      cls.log('timed out initializing tunnel')
    return False
Ejemplo n.º 10
0
  def _run_task(self, task):
    assert self._runner, "_runner should be created before this method is called"

    try:
      self._runner.start()
      log.info("Task runner for task %s started" % task.task_id)

      self._send_update(task.task_id.value, mesos_pb2.TASK_RUNNING)
    except TaskError as e:
      log.error("Task runner for task %s failed to start: %s" % (task.task_id, str(e)))
      # Send TASK_FAILED if the task failed to start.
      self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED)
    except Exception as e:
      log.error("Error occurred while executing the task: %s" % e)
      log.error(traceback.format_exc())
      # Send TASK_LOST for unknown errors.
      self._send_update(task.task_id.value, mesos_pb2.TASK_LOST)
    else:
      # Wait for the task's return code (when it terminates).
      try:
        returncode = self._runner.join()
        # If '_runner' terminates, it has either failed or been killed.
        log.warn("Task process terminated with return code %s" % returncode)
      except TaskError as e:
        log.error("Task terminated: %s" % e)
      finally:
        if self._killed:
          self._send_update(task.task_id.value, mesos_pb2.TASK_KILLED)
        else:
          self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED)
        self._terminated.set()
    finally:
      # No matter what happens above, when we reach here the executor has no task to run so it
      # should just commit seppuku.
      self._kill()
Ejemplo n.º 11
0
    def __init__(self,
                 batch_size,
                 restart_threshold,
                 watch_secs,
                 max_per_shard_failures,
                 max_total_failures,
                 rollback_on_failure=True,
                 wait_for_batch_completion=False,
                 pulse_interval_secs=None):

        if batch_size <= 0:
            raise ValueError('Batch size should be greater than 0')
        if watch_secs <= 0:
            raise ValueError('Watch seconds should be greater than 0')
        if pulse_interval_secs is not None and pulse_interval_secs < self.MIN_PULSE_INTERVAL_SECONDS:
            raise ValueError(
                'Pulse interval seconds must be at least %s seconds.' %
                self.MIN_PULSE_INTERVAL_SECONDS)
        if restart_threshold:
            log.warn(
                'restart_threshold has been deprecated and will be removed in a future release'
            )

        self.batch_size = batch_size
        self.watch_secs = watch_secs
        self.max_total_failures = max_total_failures
        self.max_per_instance_failures = max_per_shard_failures
        self.rollback_on_failure = rollback_on_failure
        self.wait_for_batch_completion = wait_for_batch_completion
        self.pulse_interval_secs = pulse_interval_secs
Ejemplo n.º 12
0
    def start(self, env=None):
        if self._process:
            log.warn(
                "start() called when a running task subprocess already exists")
            return

        command = (
            "%(cmd)s %(framework_user)s %(host)s %(port)s %(server_id)s %(data_dir)s %(log_dir)s "
            "%(tmp_dir)s %(conf_file)s %(buffer_pool_size)s" %
            dict(cmd=os.path.join(self._scripts_dir, "mysos_launch_mysqld.sh"),
                 framework_user=self._framework_user,
                 host=self._host,
                 port=self._port,
                 server_id=self._server_id,
                 data_dir=self._sandbox.mysql_data_dir,
                 log_dir=self._sandbox.mysql_log_dir,
                 tmp_dir=self._sandbox.mysql_tmp_dir,
                 conf_file=self._conf_file,
                 buffer_pool_size=self._buffer_pool_size))
        log.info("Executing command: %s" % command)
        self._process = subprocess.Popen(command,
                                         shell=True,
                                         env=env,
                                         preexec_fn=os.setpgrp)

        # There is a delay before mysqld becomes available to accept requests. Wait for it.
        command = "%(cmd)s %(pid_file)s %(port)s %(timeout)s" % dict(
            cmd=os.path.join(self._scripts_dir, "mysos_wait_for_mysqld.sh"),
            pid_file=os.path.join(self._sandbox.mysql_log_dir, "mysqld.pid"),
            port=self._port,
            timeout=60)
        log.info("Executing command: %s" % command)
        subprocess.check_call(command, shell=True, env=env)

        return self._process
Ejemplo n.º 13
0
  def start(self, env=None):
    if self._process:
      log.warn("start() called when a running task subprocess already exists")
      return

    command = (
        "%(cmd)s %(framework_user)s %(host)s %(port)s %(server_id)s %(data_dir)s %(log_dir)s "
        "%(tmp_dir)s %(conf_file)s %(buffer_pool_size)s" % dict(
            cmd=os.path.join(self._scripts_dir, "mysos_launch_mysqld.sh"),
            framework_user=self._framework_user,
            host=self._host,
            port=self._port,
            server_id=self._server_id,
            data_dir=self._sandbox.mysql_data_dir,
            log_dir=self._sandbox.mysql_log_dir,
            tmp_dir=self._sandbox.mysql_tmp_dir,
            conf_file=self._conf_file,
            buffer_pool_size=self._buffer_pool_size))
    log.info("Executing command: %s" % command)
    self._process = subprocess.Popen(command, shell=True, env=env, preexec_fn=os.setpgrp)

    # There is a delay before mysqld becomes available to accept requests. Wait for it.
    command = "%(cmd)s %(pid_file)s %(port)s %(timeout)s" % dict(
        cmd=os.path.join(self._scripts_dir, "mysos_wait_for_mysqld.sh"),
        pid_file=os.path.join(self._sandbox.mysql_log_dir, "mysqld.pid"),
        port=self._port,
        timeout=60)
    log.info("Executing command: %s" % command)
    subprocess.check_call(command, shell=True, env=env)

    return self._process
Ejemplo n.º 14
0
 def __check_int(item):
     if item is not None:
         try:
             item = int(item)
         except ValueError:
             log.warn('Failed to deserialize value %r' % item)
             item = None
     return item
Ejemplo n.º 15
0
 def __check_int(item):
   if item is not None:
     try:
       item = int(item)
     except ValueError:
       log.warn('Failed to deserialize value %r' % item)
       item = None
   return item
Ejemplo n.º 16
0
 def _request_agent_containers(self):
   try:
     resp = requests.get(self._url, timeout=self._request_timeout)
     resp.raise_for_status()
     return resp.json()
   except requests.exceptions.RequestException as ex:
     log.warn("MesosDiskCollector: Unexpected error talking to agent api: %s", ex)
     return []
Ejemplo n.º 17
0
 def iterate(self):
   with self._lock:
     try:
       with open(self._filename, 'r') as fp:
         self._sample = json.load(fp)
     except (IOError, OSError, ValueError) as e:
       if log:
         log.warn('Failed to collect sample: %s' % e)
Ejemplo n.º 18
0
 def iterate(self):
     with self._lock:
         try:
             with open(self._filename, 'r') as fp:
                 self._sample = json.load(fp)
         except (IOError, OSError, ValueError) as e:
             if log:
                 log.warn('Failed to collect sample: %s' % e)
Ejemplo n.º 19
0
    def add_to_queue(self, queue, item, label):
        """ queue items send to us by the sniffer """
        count = len(queue)
        if count > queue.maxlength():  # pragma: no cover
            log.warn("Too many %s queued (%d)", label, count)
            return

        queue.appendleft(item)
Ejemplo n.º 20
0
    def add_to_queue(self, queue, item, label):
        """ queue items send to us by the sniffer """
        count = len(queue)
        if count > queue.maxlength():  # pragma: no cover
            log.warn("Too many %s queued (%d)", label, count)
            return

        queue.appendleft(item)
Ejemplo n.º 21
0
    def run(self, lock):
        if self.options.dry_run:
            print "****** Dry Run ******"

        logger = None
        if self.options.log or self.options.log_level:
            from twitter.common.log import init
            from twitter.common.log.options import LogOptions

            LogOptions.set_stderr_log_level((self.options.log_level or "info").upper())
            logdir = self.options.logdir or self.config.get("goals", "logdir", default=None)
            if logdir:
                safe_mkdir(logdir)
                LogOptions.set_log_dir(logdir)
                init("goals")
            else:
                init()
            logger = log

        if self.options.recursive_directory:
            log.warn("--all-recursive is deprecated, use a target spec with the form [dir]:: instead")
            for dir in self.options.recursive_directory:
                self.add_target_recursive(dir)

        if self.options.target_directory:
            log.warn("--all is deprecated, use a target spec with the form [dir]: instead")
            for dir in self.options.target_directory:
                self.add_target_directory(dir)

        context = Context(
            self.config,
            self.options,
            self.targets,
            requested_goals=self.requested_goals,
            lock=lock,
            log=logger,
            timer=self.timer if self.options.time else None,
        )

        unknown = []
        for phase in self.phases:
            if not phase.goals():
                unknown.append(phase)

        if unknown:
            print ("Unknown goal(s): %s" % " ".join(phase.name for phase in unknown))
            print ("")
            return Phase.execute(context, "goals")

        if logger:
            logger.debug("Operating on targets: %s", self.targets)

        ret = Phase.attempt(context, self.phases)
        if self.options.time:
            print ("Timing report")
            print ("=============")
            self.timer.print_timings()
        return ret
Ejemplo n.º 22
0
def initialize(options):
    cwd_path = os.path.abspath(CWD)
    checkpoint_root = os.path.join(cwd_path,
                                   MesosPathDetector.DEFAULT_SANDBOX_PATH)

    # status providers:
    status_providers = [
        HealthCheckerProvider(),
        ResourceManagerProvider(checkpoint_root=checkpoint_root)
    ]

    if options.announcer_enable:
        log.warn(
            'Please remove the deprecated and no-op --announcer-enable flag in scheduler config!'
        )

    if options.announcer_ensemble is not None:
        status_providers.append(
            DefaultAnnouncerCheckerProvider(
                options.announcer_ensemble, options.announcer_serverset_path,
                options.announcer_allow_custom_serverset_path,
                options.announcer_hostname))

    # Create executor stub
    if options.execute_as_user or options.nosetuid:
        # If nosetuid is set, execute_as_user is also None
        thermos_runner_provider = UserOverrideThermosTaskRunnerProvider(
            dump_runner_pex(),
            checkpoint_root,
            artifact_dir=cwd_path,
            process_logger_destination=options.runner_logger_destination,
            process_logger_mode=options.runner_logger_mode,
            rotate_log_size_mb=options.runner_rotate_log_size_mb,
            rotate_log_backups=options.runner_rotate_log_backups,
            preserve_env=options.preserve_env)
        thermos_runner_provider.set_role(None)

        thermos_executor = AuroraExecutor(
            runner_provider=thermos_runner_provider,
            status_providers=status_providers,
            sandbox_provider=UserOverrideDirectorySandboxProvider(
                options.execute_as_user))
    else:
        thermos_runner_provider = DefaultThermosTaskRunnerProvider(
            dump_runner_pex(),
            checkpoint_root,
            artifact_dir=cwd_path,
            process_logger_destination=options.runner_logger_destination,
            process_logger_mode=options.runner_logger_mode,
            rotate_log_size_mb=options.runner_rotate_log_size_mb,
            rotate_log_backups=options.runner_rotate_log_backups,
            preserve_env=options.preserve_env)

        thermos_executor = AuroraExecutor(
            runner_provider=thermos_runner_provider,
            status_providers=status_providers)

    return thermos_executor
Ejemplo n.º 23
0
 def _maybe_scrubbed_classpath(self):
     if self._scrub_classpath:
         classpath = os.getenv('CLASSPATH')
         if classpath:
             log.warn('Scrubbing CLASSPATH=%s' % classpath)
         with environment_as(CLASSPATH=None):
             yield
     else:
         yield
Ejemplo n.º 24
0
  def run(self, lock):
    with self.check_errors("Target contains a dependency cycle") as error:
      for target in self.targets:
        try:
          InternalTarget.check_cycles(target)
        except InternalTarget.CycleException as e:
          error(target.id)

    timer = None
    if self.options.time:
      class Timer(object):
        def now(self):
          return time.time()
        def log(self, message):
          print(message)
      timer = Timer()

    logger = None
    if self.options.log or self.options.log_level:
      from twitter.common.log import init
      from twitter.common.log.options import LogOptions
      LogOptions.set_stderr_log_level((self.options.log_level or 'info').upper())
      logdir = self.options.logdir or self.config.get('goals', 'logdir', default=None)
      if logdir:
        safe_mkdir(logdir)
        LogOptions.set_log_dir(logdir)
        init('goals')
      else:
        init()
      logger = log

    if self.options.recursive_directory:
      log.warn('--all-recursive is deprecated, use a target spec with the form [dir]:: instead')
      for dir in self.options.recursive_directory:
        self.add_target_recursive(dir)

    if self.options.target_directory:
      log.warn('--all is deprecated, use a target spec with the form [dir]: instead')
      for dir in self.options.target_directory:
        self.add_target_directory(dir)

    context = Context(self.config, self.options, self.targets, lock=lock, log=logger)

    unknown = []
    for phase in self.phases:
      if not phase.goals():
        unknown.append(phase)

    if unknown:
        print('Unknown goal(s): %s' % ' '.join(phase.name for phase in unknown))
        print('')
        return Phase.execute(context, 'goals')

    if logger:
      logger.debug('Operating on targets: %s', self.targets)

    return Phase.attempt(context, self.phases, timer=timer)
Ejemplo n.º 25
0
 def cpu_affinity(self):
     """
     Get CPU affinity of this process
     :return: a list() of CPU cores this processes is pinned to
     """
     try:
       return self.process.cpu_affinity()
     except AttributeError:
       log.warn('cpu affinity is not available on your platform')
Ejemplo n.º 26
0
 def get_cpu_affinity(self):
     """
     Get CPU affinity of this process
     :return: a list() of CPU cores this processes is pinned to
     """
     try:
         return self.process.cpu_affinity()
     except AttributeError:
         log.warn('cpu affinity is not available on your platform')
Ejemplo n.º 27
0
 def _maybe_scrubbed_classpath(self):
   if self._scrub_classpath:
     classpath = os.getenv('CLASSPATH')
     if classpath:
       log.warn('Scrubbing CLASSPATH=%s' % classpath)
     with environment_as(CLASSPATH=None):
       yield
   else:
     yield
Ejemplo n.º 28
0
  def getpage(self, wiki_space, page_title):
    """ Fetches a page object.

    Returns None if the page does not exist or otherwise could not be fetched.
    """
    try:
      return self._server.confluence1.getPage(self._session_token, wiki_space, page_title)
    except Fault as e:
      log.warn('Failed to fetch page %s: %s' % (page_title, e))
      return None
Ejemplo n.º 29
0
  def getpage(self, wiki_space, page_title):
    """ Fetches a page object.

    Returns None if the page does not exist or otherwise could not be fetched.
    """
    try:
      return self._api_entrypoint.getPage(self._session_token, wiki_space, page_title)
    except XMLRPCError as e:
      log.warn('Failed to fetch page %s: %s' % (page_title, e))
      return None
Ejemplo n.º 30
0
    def add_to_queue(self, queue, item, label):
        """ queue items send to us by the sniffer """
        with self._cv:
            count = len(queue)
            if count > queue.maxlength():
                log.warn("Too many %s queued (%d)", label, count)
                return

            queue.appendleft(item)
            self._cv.notify()
Ejemplo n.º 31
0
  def add_to_queue(self, queue, item, label):
    """ queue items send to us by the sniffer """
    with self._cv:
      count = len(queue)
      if count > queue.maxlength():
        log.warn("Too many %s queued (%d)", label, count)
        return

      queue.appendleft(item)
      self._cv.notify()
Ejemplo n.º 32
0
    def run(self, lock):
        with self.check_errors("Target contains a dependency cycle") as error:
            with self.timer.timing("parse:check_cycles"):
                for target in self.targets:
                    try:
                        InternalTarget.check_cycles(target)
                    except InternalTarget.CycleException as e:
                        error(target.id)

        logger = None
        if self.options.log or self.options.log_level:
            from twitter.common.log import init
            from twitter.common.log.options import LogOptions

            LogOptions.set_stderr_log_level((self.options.log_level or "info").upper())
            logdir = self.options.logdir or self.config.get("goals", "logdir", default=None)
            if logdir:
                safe_mkdir(logdir)
                LogOptions.set_log_dir(logdir)
                init("goals")
            else:
                init()
            logger = log

        if self.options.recursive_directory:
            log.warn("--all-recursive is deprecated, use a target spec with the form [dir]:: instead")
            for dir in self.options.recursive_directory:
                self.add_target_recursive(dir)

        if self.options.target_directory:
            log.warn("--all is deprecated, use a target spec with the form [dir]: instead")
            for dir in self.options.target_directory:
                self.add_target_directory(dir)

        context = Context(self.config, self.options, self.targets, lock=lock, log=logger)

        unknown = []
        for phase in self.phases:
            if not phase.goals():
                unknown.append(phase)

        if unknown:
            print("Unknown goal(s): %s" % " ".join(phase.name for phase in unknown))
            print("")
            return Phase.execute(context, "goals")

        if logger:
            logger.debug("Operating on targets: %s", self.targets)

        ret = Phase.attempt(context, self.phases, timer=self.timer if self.options.time else None)
        if self.options.time:
            print("Timing report")
            print("=============")
            self.timer.print_timings()
        return ret
Ejemplo n.º 33
0
  def run(self, lock):
    if self.options.dry_run:
      print '****** Dry Run ******'

    logger = None
    if self.options.log or self.options.log_level:
      from twitter.common.log import init
      from twitter.common.log.options import LogOptions
      LogOptions.set_stderr_log_level((self.options.log_level or 'info').upper())
      logdir = self.options.logdir or self.config.get('goals', 'logdir', default=None)
      if logdir:
        safe_mkdir(logdir)
        LogOptions.set_log_dir(logdir)
        init('goals')
      else:
        init()
      logger = log

    if self.options.recursive_directory:
      log.warn('--all-recursive is deprecated, use a target spec with the form [dir]:: instead')
      for dir in self.options.recursive_directory:
        self.add_target_recursive(dir)

    if self.options.target_directory:
      log.warn('--all is deprecated, use a target spec with the form [dir]: instead')
      for dir in self.options.target_directory:
        self.add_target_directory(dir)

    context = Context(
      self.config,
      self.options,
      self.targets,
      lock=lock,
      log=logger,
      timer=self.timer if self.options.time else None)

    unknown = []
    for phase in self.phases:
      if not phase.goals():
        unknown.append(phase)

    if unknown:
        print('Unknown goal(s): %s' % ' '.join(phase.name for phase in unknown))
        print('')
        return Phase.execute(context, 'goals')

    if logger:
      logger.debug('Operating on targets: %s', self.targets)

    ret = Phase.attempt(context, self.phases)
    if self.options.time:
      print('Timing report')
      print('=============')
      self.timer.print_timings()
    return ret
Ejemplo n.º 34
0
    def getpage(self, wiki_space, page_title):
        """ Fetches a page object.

    Returns None if the page does not exist or otherwise could not be fetched.
    """
        try:
            return self._server.confluence1.getPage(self._session_token,
                                                    wiki_space, page_title)
        except Fault as e:
            log.warn('Failed to fetch page %s: %s' % (page_title, e))
            return None
Ejemplo n.º 35
0
    def getpage(self, wiki_space, page_title):
        """ Fetches a page object.

    Returns None if the page does not exist or otherwise could not be fetched.
    """
        try:
            return self._api_entrypoint.getPage(self._session_token,
                                                wiki_space, page_title)
        except XMLRPCError as e:
            log.warn('Failed to fetch page %s: %s' % (page_title, e))
            return None
Ejemplo n.º 36
0
  def run(self, lock):
    with self.check_errors("Target contains a dependency cycle") as error:
      with self.timer.timing('parse:check_cycles'):
        for target in self.targets:
          try:
            InternalTarget.check_cycles(target)
          except InternalTarget.CycleException as e:
            error(target.id)

    logger = None
    if self.options.log or self.options.log_level:
      from twitter.common.log import init
      from twitter.common.log.options import LogOptions
      LogOptions.set_stderr_log_level((self.options.log_level or 'info').upper())
      logdir = self.options.logdir or self.config.get('goals', 'logdir', default=None)
      if logdir:
        safe_mkdir(logdir)
        LogOptions.set_log_dir(logdir)
        init('goals')
      else:
        init()
      logger = log

    if self.options.recursive_directory:
      log.warn('--all-recursive is deprecated, use a target spec with the form [dir]:: instead')
      for dir in self.options.recursive_directory:
        self.add_target_recursive(dir)

    if self.options.target_directory:
      log.warn('--all is deprecated, use a target spec with the form [dir]: instead')
      for dir in self.options.target_directory:
        self.add_target_directory(dir)

    context = Context(self.config, self.options, self.targets, lock=lock, log=logger)

    unknown = []
    for phase in self.phases:
      if not phase.goals():
        unknown.append(phase)

    if unknown:
        print('Unknown goal(s): %s' % ' '.join(phase.name for phase in unknown))
        print('')
        return Phase.execute(context, 'goals')

    if logger:
      logger.debug('Operating on targets: %s', self.targets)

    ret = Phase.attempt(context, self.phases, timer=self.timer if self.options.time else None)
    if self.options.time:
      print('Timing report')
      print('=============')
      self.timer.print_timings()
    return ret
Ejemplo n.º 37
0
 def set_niceness(self, nice_level):
     """
     Set the nice level of this process
     :param nice_level: the nice level to set
     """
     try:
         # TODO (phobos182): double check that psutil does not allow negative nice values
         if not 0 <= nice_level <= 20:
             raise ValueError('nice level must be between 0 and 20')
         self.process.nice(nice_level)
     except(EnvironmentError, ValueError, AccessDenied, NoSuchProcess) as e:
         log.warn('unable to set nice level on process: {}'.format(e))
Ejemplo n.º 38
0
 def set_cpu_affinity(self, cpu_affinity_csv):
     """
     Set CPU affinity for this process
     :param cpu_affinity_csv: A comma-separated string representing CPU cores
     """
     try:
         cpu_list = self.parse_cpu_affinity(cpu_affinity_csv)
         self.process.cpu_affinity(cpu_list)
     except (OSError, ValueError) as e:
         log.warn('unable to set cpu affinity: {}, on process: {}'.format(cpu_affinity_csv, e))
     except AttributeError:
         log.warn('cpu affinity is not available on your platform')
Ejemplo n.º 39
0
 def callback():
     if hook_method is None:
         return True
     log.debug("Running %s in %s" % (hook_method.__name__, hook.__class__.__name__))
     hook_result = False
     try:
         hook_result = hook_method()
         if not hook_result:
             log.debug("%s in %s returned False" % (hook_method.__name__, hook.__class__.__name__))
     except Exception:
         log.warn("Error in %s in %s" % (hook_method.__name__, hook.__class__.__name__))
         log.warn(traceback.format_exc())
     return hook_result
Ejemplo n.º 40
0
 def set_cpu_affinity(self, cpu_affinity_csv):
     """
     Set CPU affinity for this process
     :param cpu_affinity_csv: A comma-separated string representing CPU cores
     """
     try:
         cpu_list = self.parse_cpu_affinity(cpu_affinity_csv)
         self.process.cpu_affinity(cpu_list)
     except (OSError, ValueError) as e:
         log.warn('unable to set cpu affinity: {}, on process: {}'.format(
             cpu_affinity_csv, e))
     except AttributeError:
         log.warn('cpu affinity is not available on your platform')
Ejemplo n.º 41
0
 def set_niceness(self, nice_level):
     """
     Set the nice level of this process
     :param nice_level: the nice level to set
     """
     try:
         # TODO (phobos182): double check that psutil does not allow negative nice values
         if not 0 <= nice_level <= 20:
             raise ValueError('nice level must be between 0 and 20')
         self.process.nice(nice_level)
     except (EnvironmentError, ValueError, AccessDenied,
             NoSuchProcess) as e:
         log.warn('unable to set nice level on process: {}'.format(e))
Ejemplo n.º 42
0
    def _stop(self, timeout):
        """
      Stop the runner and wait for its thread (and the sub-processes) to exit.

      :param timeout: The timeout that the process should die before a hard SIGKILL is issued
                      (SIGTERM is used initially).
      :return: True if an active runner is stopped, False if the runner is not started or already
               stopping/stopped.
    """
        with self._lock:
            if not self._started:
                log.warn("Cannot stop the runner because it's not started")
                return False

            if not self._popen:
                log.info(
                    "The runner task did not start successfully so no need to kill it"
                )
                return False

            try:
                log.info("Terminating process group: %s" % self._popen.pid)
                os.killpg(self._popen.pid, signal.SIGTERM)
            except OSError as e:
                log.info("The sub-processes are already terminated: %s" % e)
                return False

        log.info("Waiting for process to terminate due to SIGTERM")

        # Escalate to SIGKILL if SIGTERM is not sufficient.
        if not self._exited.wait(timeout=timeout):
            with self._lock:
                try:
                    log.warn(
                        "Killing process group %s which failed to terminate cleanly within %s secs"
                        % (self._popen.pid, timeout))
                    os.killpg(self._popen.pid, signal.SIGKILL)
                except OSError as e:
                    log.info("The sub-processes are already terminated: %s" %
                             e)
                    return False
        else:
            return True

        log.info("Waiting for process to terminate due to SIGKILL")
        if not self._exited.wait(timeout=timeout):
            raise TaskError("Failed to kill process group %s" %
                            self._popen.pid)

        return True
  def stop(self, timeout=10):
    with self._lock:
      # stop() could be called by multiple threads. Locking so we only stop the runner once.
      if self._stopping:
        log.warn("The runner is already stopping/stopped")
        return False
      else:
        log.info("Stopping runner")
        self._stopping = True

    try:
      return self._stop(timeout)
    finally:
      self._kazoo.stop()
      log.info("Runner cleaned up")
Ejemplo n.º 44
0
    def stop(self, timeout=10):
        with self._lock:
            # stop() could be called by multiple threads. Locking so we only stop the runner once.
            if self._stopping:
                log.warn("The runner is already stopping/stopped")
                return False
            else:
                log.info("Stopping runner")
                self._stopping = True

        try:
            return self._stop(timeout)
        finally:
            self._kazoo.stop()
            log.info("Runner cleaned up")
Ejemplo n.º 45
0
 def callback():
   if hook_method is None:
     return True
   log.debug('Running %s in %s' % (hook_method.__name__, hook.__class__.__name__))
   hook_result = False
   try:
     hook_result = hook_method()
     if not hook_result:
       log.debug('%s in %s returned False' % (hook_method.__name__,
           hook.__class__.__name__))
   except Exception:
     log.warn('Error in %s in %s' %
         (hook_method.__name__, hook.__class__.__name__))
     log.warn(traceback.format_exc())
   return hook_result
Ejemplo n.º 46
0
def _validate_health_check_config(config):
  health_check_config = config.health_check_config().get()
  health_checker = health_check_config.get('health_checker', {})
  # If we have old-style of configuring.
  # TODO (AURORA-1563): Remove this code after we drop support for defining these directly in
  # HealthCheckConfig.
  for deprecated in {'endpoint', 'expected_response', 'expected_response_code'}:
    if deprecated in health_check_config:
      log.warn(HTTP_DEPRECATION_WARNING)
      break
  if SHELL_HEALTH_CHECK in health_checker:
    # Make sure we specified a shell_command if we chose a shell config.
    shell_health_checker = health_checker.get(SHELL_HEALTH_CHECK, {})
    shell_command = shell_health_checker.get('shell_command')
    if not shell_command:
      # Must define a command.
      die(MUST_PROVIDE_SHELL_COMMAND_ERROR)
Ejemplo n.º 47
0
    def update(self, instances=None):
        """Performs the job update, blocking until it completes.

    A rollback will be performed if the update was considered a failure based on the
    update configuration.

    Arguments:
    instances -- (optional) instances to update. If not specified, all instances will be updated.

    Returns a response object with update result status.
    """
        try:
            resp = self._start()
            if resp.responseCode != ResponseCode.OK:
                return resp

            try:
                # Handle cron jobs separately from other jobs.
                if self._replace_template_if_cron():
                    log.info(
                        'Cron template updated, next run will reflect changes')
                    return self._finish()
                else:
                    try:
                        instance_configs = self._get_update_instructions(
                            instances)
                        self._check_and_log_response(
                            self._validate_quota(instance_configs))
                    except self.Error as e:
                        # Safe to release the lock acquired above as no job mutation has happened yet.
                        self._finish()
                        return self._failed_response(
                            'Unable to start job update: %s' % e)

                    if not self._update(instance_configs):
                        log.warn('Update failures threshold reached')
                        self._finish()
                        return self._failed_response('Update reverted')
                    else:
                        log.info('Update successful')
                        return self._finish()
            except (self.Error, ExecutionError, Exception) as e:
                return self._failed_response(
                    'Aborting update without rollback! Fatal error: %s' % e)
        finally:
            self._scheduler_mux.terminate()
Ejemplo n.º 48
0
    def _await_nailgun_server(self, stdout, stderr):
        nailgun_timeout_seconds = 5
        max_socket_connect_attempts = 10
        nailgun = None
        port_parse_start = time.time()
        with safe_open(self._ng_out, 'r') as ng_out:
            while not nailgun:
                started = ng_out.readline()
                if started.find(
                        'Listening for transport dt_socket at address:') >= 0:
                    nailgun_timeout_seconds = 60
                    log.warn(
                        'Timeout extended to {timeout} seconds for debugger to attach to ng server.'
                        .format(timeout=nailgun_timeout_seconds))
                    started = ng_out.readline()
                if started:
                    port = self._parse_nailgun_port(started)
                    nailgun = self._create_ngclient(port, stdout, stderr)
                    log.debug('Detected ng server up on port %d' % port)
                elif time.time() - port_parse_start > nailgun_timeout_seconds:
                    raise NailgunClient.NailgunError(
                        'Failed to read ng output after'
                        ' %s seconds' % nailgun_timeout_seconds)

        attempt = 0
        while nailgun:
            sock = nailgun.try_connect()
            if sock:
                sock.close()
                endpoint = self._get_nailgun_endpoint()
                if endpoint:
                    log.debug(
                        'Connected to ng server launched with %s fingerprint %s pid: %d @ port: %d'
                        % endpoint)
                else:
                    raise NailgunClient.NailgunError(
                        'Failed to connect to ng server.')
                return nailgun
            elif attempt > max_socket_connect_attempts:
                raise nailgun.NailgunError(
                    'Failed to connect to ng output after %d connect attempts'
                    % max_socket_connect_attempts)
            attempt += 1
            log.debug('Failed to connect on attempt %d' % attempt)
            time.sleep(0.1)
  def _stop(self, timeout):
    """
      Stop the runner and wait for its thread (and the sub-processes) to exit.

      :param timeout: The timeout that the process should die before a hard SIGKILL is issued
                      (SIGTERM is used initially).
      :return: True if an active runner is stopped, False if the runner is not started or already
               stopping/stopped.
    """
    with self._lock:
      if not self._started:
        log.warn("Cannot stop the runner because it's not started")
        return False

      if not self._popen:
        log.info("The runner task did not start successfully so no need to kill it")
        return False

      try:
        log.info("Terminating process group: %s" % self._popen.pid)
        os.killpg(self._popen.pid, signal.SIGTERM)
      except OSError as e:
        log.info("The sub-processes are already terminated: %s" % e)
        return False

    log.info("Waiting for process to terminate due to SIGTERM")

    # Escalate to SIGKILL if SIGTERM is not sufficient.
    if not self._exited.wait(timeout=timeout):
      with self._lock:
        try:
          log.warn("Killing process group %s which failed to terminate cleanly within %s secs" %
                   (self._popen.pid, timeout))
          os.killpg(self._popen.pid, signal.SIGKILL)
        except OSError as e:
          log.info("The sub-processes are already terminated: %s" % e)
          return False
    else:
      return True

    log.info("Waiting for process to terminate due to SIGKILL")
    if not self._exited.wait(timeout=timeout):
      raise TaskError("Failed to kill process group %s" % self._popen.pid)

    return True
Ejemplo n.º 50
0
    def bootstrap(self, task_control, env):
        """
      Bootstraps the executor state.

      :param task_control: Task control for carrying out state bootstrapping commands.
      :param env: The environment variables for task_control methods.
    """

        # 1. Directly return if the data folder is not empty.
        if os.listdir(self._sandbox.mysql_data_dir):
            # TODO(jyx): This will be expected when we use persistent volumes. Validate the state in that
            # case.
            log.warn(
                "MySQL state already exists unexpectedly. Finishing bootstrap without restoration "
                "or initialization")
            return

        # 2. If the data folder is clean, restore state from the backup store. This can be a noop if the
        # user doesn't want to restore any state.
        try:
            backup_info = self._backup_store.restore()
        except BackupStore.Error as e:
            raise self.Error("Failed to restore MySQL state: %s" % e)

        if backup_info:
            # If some backup is restored, persist the backup info.
            log.info("Finished restoring the backup")
            backup_info_file = os.path.join(self._sandbox.var,
                                            BACKUP_INFO_FILENAME)
            # Useful for the user to check the result of backup restore.
            with open(backup_info_file, 'w') as f:
                json.dump(backup_info.__dict__, f)
            log.info("Persisted backup info '%s' to file %s" %
                     (backup_info.__dict__, backup_info_file))
        else:
            # If no recovery necessary, initialize the data dirs.
            log.info(
                "No MySQL backup is restored. Initializing a new MySQL instance"
            )
            try:
                task_control.initialize(env)
            except subprocess.CalledProcessError as e:
                raise self.Error("Unable to initialize MySQL state: %s" % e)
Ejemplo n.º 51
0
 def unpack_json(cls, blob):
   blob = json.loads(blob)
   for key in ('status', 'serviceEndpoint', 'additionalEndpoints'):
     if key not in blob:
       raise ValueError('Expected to find %s in ServiceInstance JSON!' % key)
   additional_endpoints = dict((name, Endpoint(value['host'], value['port']))
     for name, value in blob['additionalEndpoints'].items())
   shard = blob.get('shard')
   if shard is not None:
     try:
       shard = int(shard)
     except ValueError:
       log.warn('Failed to deserialize shard from value %r' % shard)
       shard = None
   return cls(
     service_endpoint=Endpoint(blob['serviceEndpoint']['host'], blob['serviceEndpoint']['port']),
     additional_endpoints=additional_endpoints,
     status=Status.from_string(blob['status']),
     shard=shard)
Ejemplo n.º 52
0
  def _await_nailgun_server(self, stdout, stderr, debug_desc):
    # TODO(Eric Ayers) Make these cmdline/config parameters once we have a global way to fetch
    # the global options scope.
    nailgun_timeout_seconds = 10
    max_socket_connect_attempts = 5
    nailgun = None
    port_parse_start = time.time()
    with safe_open(self._ng_out, 'r') as ng_out:
      while not nailgun:
        started = ng_out.readline()
        if started.find('Listening for transport dt_socket at address:') >= 0:
          nailgun_timeout_seconds = 60
          log.warn('Timeout extended to {timeout} seconds for debugger to attach to ng server.'
                   .format(timeout=nailgun_timeout_seconds))
          started = ng_out.readline()
        if started:
          port = self._parse_nailgun_port(started)
          nailgun = self._create_ngclient(port, stdout, stderr)
          log.debug('Detected ng server up on port {port}'.format(port=port))
        elif time.time() - port_parse_start > nailgun_timeout_seconds:
          raise NailgunClient.NailgunError(
            'Failed to read ng output after {sec} seconds.\n {desc}'
            .format(sec=nailgun_timeout_seconds, desc=debug_desc))

    attempt = 0
    while nailgun:
      sock = nailgun.try_connect()
      if sock:
        sock.close()
        endpoint = self._get_nailgun_endpoint()
        if endpoint:
          log.debug('Connected to ng server launched with {endpoint}'
                    .format(endpoint=repr(endpoint)))
        else:
          raise NailgunClient.NailgunError('Failed to connect to ng server.')
        return nailgun
      elif attempt > max_socket_connect_attempts:
        raise nailgun.NailgunError('Failed to connect to ng output after {count} connect attempts'
                                   .format(count=max_socket_connect_attempts))
      attempt += 1
      log.debug('Failed to connect on attempt {count}'.format(count=attempt))
      time.sleep(0.1)
Ejemplo n.º 53
0
  def _is_restart_needed(self, failed_instances):
    """Checks if there are any failed instances recoverable via restart.

    Arguments:
    failed_instances -- Failed instance IDs.

    Returns True if restart is allowed, False otherwise (i.e. update failed).
    """
    if not failed_instances:
      return False

    log.info('Failed instances: %s' % failed_instances)

    with self._thread_lock:
      unretryable_instances = self.failure_threshold.update_failure_counts(failed_instances)
      if unretryable_instances:
        log.warn('Not restarting failed instances %s, which exceeded '
                 'maximum allowed instance failure limit of %s' %
                 (unretryable_instances, self._update_config.max_per_instance_failures))
      return False if unretryable_instances else True
Ejemplo n.º 54
0
  def _is_restart_needed(self, failed_instances):
    """Checks if there are any failed instances recoverable via restart.

    Arguments:
    failed_instances -- Failed instance IDs.

    Returns True if restart is allowed, False otherwise (i.e. update failed).
    """
    if not failed_instances:
      return False

    log.info('Failed instances: %s' % failed_instances)

    with self._thread_lock:
      unretryable_instances = self.failure_threshold.update_failure_counts(failed_instances)
      if unretryable_instances:
        log.warn('Not restarting failed instances %s, which exceeded '
                 'maximum allowed instance failure limit of %s' %
                 (unretryable_instances, self._update_config.max_per_instance_failures))
      return False if unretryable_instances else True
Ejemplo n.º 55
0
  def run(self):
    # Re-run the election in a loop periodically until a master can be elected or the elector is
    # aborted.
    while not self._aborted.is_set() and not self._completed.wait(self._query_interval):
      if datetime.utcnow() < self._election_deadline:
        self._elect(timedout=False)
      else:
        log.info("Timed out waiting for all slaves to respond. Now elect from existing responses")
        self._elect(timedout=True)
        if not self._completed.is_set():
          log.warn("No slave is electable after timeout")

    if self._aborted.is_set():  # If asked to stop, directly return without triggering the callback.
      log.info("Asked to stop the elector thread for cluster %s. Stopping..." % self._cluster_name)
      return

    self._master_callback(self._master)  # Invoke the callback from the elector thread.
    log.info(
        "Stopping the elector thread for cluster %s (epoch %s) because the election has completed" %
        (self._cluster_name, self._epoch))
Ejemplo n.º 56
0
def check_duplicate_conflicting_protos(sources_by_base, sources, log):
  """Checks if proto files are duplicate or conflicting.

  There are sometimes two files with the same name on the .proto path.  This causes the protobuf
  compiler to stop with an error.  Some repos have legitimate cases for this, and so this task
  decides to just choose one to keep the entire build from failing.  Sometimes, they are identical
  copies.  That is harmless, but if there are two files with the same name with different contents,
  that is ambiguous and we want to complain loudly.

  :param dict sources_by_base: mapping of base to path
  :param list sources: list of sources
  :param Context.Log log: writes error messages to the console for conflicts
  """
  sources_by_genfile = {}
  for base in sources_by_base.keys(): # Need to iterate over /original/ bases.
    for path in sources_by_base[base]:
      if not path in sources:
        continue # Check to make sure we haven't already removed it.
      source = path[len(base):]

      genfiles = calculate_genfiles(path, source)
      for key in genfiles.keys():
        for genfile in genfiles[key]:
          if genfile in sources_by_genfile:
            # Possible conflict!
            prev = sources_by_genfile[genfile]
            if not prev in sources:
              # Must have been culled by an earlier pass.
              continue
            if not _same_contents(path, prev):
              log.error('Proto conflict detected (.proto files are different):\n'
                        '1: {prev}\n2: {curr}'.format(prev=prev, curr=path))
            else:
              log.warn('Proto duplication detected (.proto files are identical):\n'
                       '1: {prev}\n2: {curr}'.format(prev=prev, curr=path))
            log.warn('  Arbitrarily favoring proto 1.')
            if path in sources:
              sources.remove(path) # Favor the first version.
            continue
          sources_by_genfile[genfile] = path
Ejemplo n.º 57
0
    def _run_task(self, task):
        assert self._runner, "_runner should be created before this method is called"

        try:
            self._runner.start()
            log.info("Task runner for task %s started" % task.task_id)

            self._send_update(task.task_id.value, mesos_pb2.TASK_RUNNING)
        except TaskError as e:
            log.error("Task runner for task %s failed to start: %s" %
                      (task.task_id, str(e)))
            # Send TASK_FAILED if the task failed to start.
            self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED)
        except Exception as e:
            log.error("Error occurred while executing the task: %s" % e)
            log.error(traceback.format_exc())
            # Send TASK_LOST for unknown errors.
            self._send_update(task.task_id.value, mesos_pb2.TASK_LOST)
        else:
            # Wait for the task's return code (when it terminates).
            try:
                returncode = self._runner.join()
                # If '_runner' terminates, it has either failed or been killed.
                log.warn("Task process terminated with return code %s" %
                         returncode)
            except TaskError as e:
                log.error("Task terminated: %s" % e)
            finally:
                if self._killed:
                    self._send_update(task.task_id.value,
                                      mesos_pb2.TASK_KILLED)
                else:
                    self._send_update(task.task_id.value,
                                      mesos_pb2.TASK_FAILED)
                self._terminated.set()
        finally:
            # No matter what happens above, when we reach here the executor has no task to run so it
            # should just commit seppuku.
            self._kill()