Ejemplo n.º 1
0
    def _sync_running_job_executions(self):
        """Syncs job executions that are currently running by handling any canceled or timed out executions
        """

        running_job_exes = {}
        for job_exe in running_job_mgr.get_all_job_exes():
            running_job_exes[job_exe.id] = job_exe

        right_now = now()

        for job_exe_model in JobExecution.objects.filter(id__in=running_job_exes.keys()).iterator():
            running_job_exe = running_job_exes[job_exe_model.id]
            task_to_kill = None

            if job_exe_model.status == 'CANCELED':
                try:
                    task_to_kill = running_job_exe.execution_canceled()
                except DatabaseError:
                    logger.exception('Error canceling job execution %i', running_job_exe.id)
            elif job_exe_model.is_timed_out(right_now):
                try:
                    task_to_kill = running_job_exe.execution_timed_out(right_now)
                except DatabaseError:
                    logger.exception('Error failing timed out job execution %i', running_job_exe.id)

            if task_to_kill:
                pb_task_to_kill = mesos_pb2.TaskID()
                pb_task_to_kill.value = task_to_kill.id
                logger.info('Killing task %s', task_to_kill.id)
                self._driver.killTask(pb_task_to_kill)

            if running_job_exe.is_finished():
                running_job_mgr.remove_job_exe(running_job_exe.id)
                cleanup_mgr.add_job_execution(running_job_exe)
Ejemplo n.º 2
0
    def _perform_scheduling(self):
        """Performs task reconciliation with the Mesos master

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        # Get updated node and job type models from managers
        nodes = node_mgr.get_nodes()
        cleanup_mgr.update_nodes(nodes)
        offer_mgr.update_nodes(nodes)
        offer_mgr.ready_new_offers()
        self._job_types = job_type_mgr.get_job_types()

        # Look at job type limits and determine number available to be scheduled
        self._job_type_limit_available = {}
        for job_type in self._job_types.values():
            if job_type.max_scheduled:
                self._job_type_limit_available[job_type.id] = job_type.max_scheduled
        for running_job_exe in running_job_mgr.get_all_job_exes():
            if running_job_exe.job_type_id in self._job_type_limit_available:
                self._job_type_limit_available[running_job_exe.job_type_id] -= 1

        self._send_tasks_for_reconciliation()
        self._consider_cleanup_tasks()
        self._consider_running_job_exes()
        self._consider_new_job_exes()

        return self._schedule_accepted_tasks()