Exemple #1
0
    def _perform_scheduling(self):
        """Performs task reconciliation with the Mesos master

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()

        # Get updated node and job type models from managers
        nodes = node_mgr.get_nodes()
        cleanup_mgr.update_nodes(nodes)
        offer_mgr.update_nodes(nodes)
        offer_mgr.ready_new_offers()
        self._job_types = job_type_mgr.get_job_types()

        # Look at job type limits and determine number available to be scheduled
        self._job_type_limit_available = {}
        for job_type in self._job_types.values():
            if job_type.max_scheduled:
                self._job_type_limit_available[job_type.id] = job_type.max_scheduled
        for running_job_exe in job_exe_mgr.get_running_job_exes():
            if running_job_exe.job_type_id in self._job_type_limit_available:
                self._job_type_limit_available[running_job_exe.job_type_id] -= 1

        self._consider_node_tasks(when)
        self._consider_running_job_exes()
        self._consider_new_job_exes()

        return self._schedule_accepted_tasks()
Exemple #2
0
    def _perform_scheduling(self):
        """Performs task reconciliation with the Mesos master

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        # Get updated node and job type models from managers
        nodes = node_mgr.get_nodes()
        cleanup_mgr.update_nodes(nodes)
        offer_mgr.update_nodes(nodes)
        offer_mgr.ready_new_offers()
        self._job_types = job_type_mgr.get_job_types()

        # Look at job type limits and determine number available to be scheduled
        self._job_type_limit_available = {}
        for job_type in self._job_types.values():
            if job_type.max_scheduled:
                self._job_type_limit_available[job_type.id] = job_type.max_scheduled
        for running_job_exe in running_job_mgr.get_all_job_exes():
            if running_job_exe.job_type_id in self._job_type_limit_available:
                self._job_type_limit_available[running_job_exe.job_type_id] -= 1

        self._send_tasks_for_reconciliation()
        self._consider_cleanup_tasks()
        self._consider_running_job_exes()
        self._consider_new_job_exes()

        return self._schedule_accepted_tasks()
Exemple #3
0
    def setUp(self):
        django.setup()

        reset_error_cache()

        self.framework_id = '1234'
        Scheduler.objects.initialize_scheduler()
        Scheduler.objects.update(
            num_message_handlers=0
        )  # Prevent message handler tasks from scheduling
        self._client = MagicMock()

        scheduler_mgr.sync_with_database()
        scheduler_mgr.update_from_mesos(framework_id=self.framework_id)
        resource_mgr.clear()
        job_exe_mgr.clear()

        self.agent_1 = Agent('agent_1', 'host_1')
        self.agent_2 = Agent('agent_2', 'host_2')
        self.agent_3 = Agent('agent_3', 'host_2')
        node_mgr.clear()
        node_mgr.register_agents([self.agent_1, self.agent_2])
        node_mgr.sync_with_database(scheduler_mgr.config)
        # Ignore initial cleanup, health check, and image pull tasks
        for node in node_mgr.get_nodes():
            node._last_health_task = now()
            node._initial_cleanup_completed()
            node._is_image_pulled = True
            node._update_state()
            if node.agent_id == 'agent_1':
                self.node_1_id = node.id
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        self.node_1 = Node.objects.get(id=self.node_1_id)
        # Ignore system tasks
        system_task_mgr._is_db_update_completed = True

        self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0,
                                                     mem_required=1024.0,
                                                     disk_in_required=100.0,
                                                     disk_out_required=200.0,
                                                     disk_total_required=300.0)
        self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0,
                                                     mem_required=512.0,
                                                     disk_in_required=400.0,
                                                     disk_out_required=45.0,
                                                     disk_total_required=445.0)
        self.queue_large = queue_test_utils.create_queue(
            resources=NodeResources([Cpus(
                125.0), Mem(12048.0), Disk(12048.0)]))

        job_type_mgr.sync_with_database()
Exemple #4
0
    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        scheduler_mgr.sync_with_database()
        job_type_mgr.sync_with_database()
        job_exe_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        mesos_master = scheduler_mgr.mesos_address
        resource_mgr.sync_with_mesos(mesos_master.hostname, mesos_master.port)

        # Handle canceled job executions
        for finished_job_exe in job_exe_mgr.sync_with_database():
            cleanup_mgr.add_job_execution(finished_job_exe)

        if settings.SECRETS_URL:
            secrets_mgr.sync_with_backend()
Exemple #5
0
    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        scheduler_mgr.sync_with_database()
        job_type_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        mesos_master = scheduler_mgr.mesos_address
        resource_mgr.sync_with_mesos(mesos_master.hostname, mesos_master.port)

        # Kill running tasks for canceled job executions
        for task_to_kill in job_exe_mgr.sync_with_database():
            pb_task_to_kill = mesos_pb2.TaskID()
            pb_task_to_kill.value = task_to_kill.id
            logger.info('Killing task %s', task_to_kill.id)
            self._driver.killTask(pb_task_to_kill)

        if settings.SECRETS_URL:
            secrets_mgr.sync_with_backend()