def _sync_running_job_executions(self): """Syncs job executions that are currently running by handling any canceled or timed out executions """ running_job_exes = {} for job_exe in running_job_mgr.get_all_job_exes(): running_job_exes[job_exe.id] = job_exe right_now = now() for job_exe_model in JobExecution.objects.filter(id__in=running_job_exes.keys()).iterator(): running_job_exe = running_job_exes[job_exe_model.id] task_to_kill = None if job_exe_model.status == 'CANCELED': try: task_to_kill = running_job_exe.execution_canceled() except DatabaseError: logger.exception('Error canceling job execution %i', running_job_exe.id) elif job_exe_model.is_timed_out(right_now): try: task_to_kill = running_job_exe.execution_timed_out(right_now) except DatabaseError: logger.exception('Error failing timed out job execution %i', running_job_exe.id) if task_to_kill: pb_task_to_kill = mesos_pb2.TaskID() pb_task_to_kill.value = task_to_kill.id logger.info('Killing task %s', task_to_kill.id) self._driver.killTask(pb_task_to_kill) if running_job_exe.is_finished(): running_job_mgr.remove_job_exe(running_job_exe.id) cleanup_mgr.add_job_execution(running_job_exe)
def _perform_scheduling(self): """Performs task reconciliation with the Mesos master :returns: The number of Mesos tasks that were scheduled :rtype: int """ # Get updated node and job type models from managers nodes = node_mgr.get_nodes() cleanup_mgr.update_nodes(nodes) offer_mgr.update_nodes(nodes) offer_mgr.ready_new_offers() self._job_types = job_type_mgr.get_job_types() # Look at job type limits and determine number available to be scheduled self._job_type_limit_available = {} for job_type in self._job_types.values(): if job_type.max_scheduled: self._job_type_limit_available[job_type.id] = job_type.max_scheduled for running_job_exe in running_job_mgr.get_all_job_exes(): if running_job_exe.job_type_id in self._job_type_limit_available: self._job_type_limit_available[running_job_exe.job_type_id] -= 1 self._send_tasks_for_reconciliation() self._consider_cleanup_tasks() self._consider_running_job_exes() self._consider_new_job_exes() return self._schedule_accepted_tasks()