def statusUpdate(self, driver, status): ''' Invoked when the status of a task has changed (e.g., a slave is lost and so the task is lost, a task finishes and an executor sends a status update saying so, etc.) Note that returning from this callback acknowledges receipt of this status update. If for whatever reason the scheduler aborts during this callback (or the process exits) another status update will be delivered. Note, however, that this is currently not true if the slave sending the status update is lost or fails during that time. See documentation for :meth:`mesos_api.mesos.Scheduler.statusUpdate`. ''' if self.debug: connect_remote_debug() status_str = utils.status_to_string(status.state) task_id = status.task_id.value job_exe_id = ScaleJobExecution.get_job_exe_id(task_id) logger.info('Status update for task %s: %s', task_id, status_str) # Got a status update, so remove task from reconciliation set try: self.recon_lock.acquire() if task_id in self.recon_set: self.recon_set.remove(task_id) finally: self.recon_lock.release() try: scale_job_exe = self._get_job_exe(job_exe_id) if not scale_job_exe: # Scheduler doesn't have any knowledge of this job execution error = get_scheduler_error() Queue.objects.handle_job_failure(job_exe_id, now(), error) return if status.state == mesos_pb2.TASK_RUNNING: scale_job_exe.task_running(task_id, status) elif status.state == mesos_pb2.TASK_FINISHED: scale_job_exe.task_completed(task_id, status) elif status.state in [mesos_pb2.TASK_LOST, mesos_pb2.TASK_ERROR, mesos_pb2.TASK_FAILED, mesos_pb2.TASK_KILLED]: # The task had an error so job execution is failed scale_job_exe.task_failed(task_id, status) if scale_job_exe.is_finished(): # No more tasks so job execution is completed self._delete_job_exe(scale_job_exe) except: logger.exception('Error handling status update for job execution: %s', job_exe_id) # Error handling status update, add task so it can be reconciled try: self.recon_lock.acquire() self.recon_set.add(task_id) finally: self.recon_lock.release()