Example #1
0
    def test_job_type_limit(self):
        """Tests calling perform_scheduling() with a job type limit"""
        Queue.objects.all().delete()
        job_type_with_limit = job_test_utils.create_seed_job_type()
        job_type_with_limit.max_scheduled = 4
        job_type_with_limit.save()
        running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id,
                                                                  job_type=job_type_with_limit, node=self.node_1)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        job_type_mgr.sync_with_database()
        # One job of this type is already running
        job_exe_mgr.schedule_job_exes([running_job_exe_1], [])

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 3)  # One is already running, should only be able to schedule 3 more
Example #2
0
    def test_canceled_queue_model(self):
        """Tests successfully calling perform_scheduling() when a queue model has been canceled"""
        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        self.queue_1.is_canceled = True
        self.queue_1.save()

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks, 1)  # Scheduled non-canceled queued job execution
        # queue_1 should be canceled, queue_2 should be running, queue should be empty now
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 0)
        # Job execution manager should have a message for the canceled job execution
        messages = job_exe_mgr.get_messages()
        found_job_exe_end_message = False
        for message in messages:
            if message.type == 'create_job_exe_ends':
                found_job_exe_end_message = True
        self.assertTrue(found_job_exe_end_message)
Example #3
0
    def test_successful_schedule(self):
        """Tests successfully calling perform_scheduling()"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks,
                         2)  # Schedule smaller queued job executions
        # Ensure job execution models are created and queue models are deleted
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(
            JobExecution.objects.filter(
                job_id=self.queue_large.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 0)
Example #4
0
    def test_missing_workspace(self):
        """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler"""

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        # Add workspaces to the queued jobs
        queue_1 = Queue.objects.get(id=self.queue_1.id)
        config = queue_1.get_execution_configuration()
        config.set_output_workspaces({'my_output': 'my_workspace'})
        queue_1.configuration = config.get_dict()
        queue_1.save()
        queue_2 = Queue.objects.get(id=self.queue_2.id)
        config = queue_2.get_execution_configuration()
        config.set_output_workspaces({'my_output': 'my_workspace'})
        queue_2.configuration = config.get_dict()
        queue_2.save()

        scheduling_manager = SchedulingManager()

        # Clear out workspace manager for scheduling
        with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces:
            mock_get_workspaces.return_value = {}
            num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        # Nothing should be scheduled
        self.assertEqual(num_tasks, 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Example #5
0
    def test_paused_job_type(self):
        """Tests calling perform_scheduling() when a job type is paused"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        self.queue_1.job_type.is_paused = True
        self.queue_1.job_type.save()
        job_type_mgr.sync_with_database()

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks,
                         1)  # Schedule queued job execution that is not paused
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
Example #6
0
    def test_schedule_system_tasks(self):
        """Tests successfully calling perform_scheduling() when scheduling system tasks"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        # Clear the queue
        Queue.objects.all().delete()
        # Set us up to schedule a database update task
        system_task_mgr._is_db_update_completed = False
        # Set us up to schedule 2 message handler tasks
        Scheduler.objects.update(num_message_handlers=2)
        scheduler_mgr.sync_with_database()

        scheduling_manager = SchedulingManager()

        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(
            num_tasks,
            3)  # Schedule database update task and 2 message handler tasks
Example #7
0
    def test_missing_job_types(self):
        """Tests calling perform_scheduling() when a queued job type has not been synced to the scheduler"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()

        # Clear out job type manager for scheduling
        with patch('scheduler.scheduling.manager.job_type_mgr.get_job_types'
                   ) as mock_get_job_types:
            mock_get_job_types.return_value = {}
            num_tasks = scheduling_manager.perform_scheduling(
                self._client, now())

        # Nothing should be scheduled
        self.assertEqual(num_tasks, 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Example #8
0
    def test_paused_scheduler(self):
        """Tests calling perform_scheduling() with a paused scheduler"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        Scheduler.objects.update(is_paused=True)
        scheduler_mgr.sync_with_database()
        node_mgr.sync_with_database(
            scheduler_mgr.config)  # Updates nodes with paused scheduler
        system_task_mgr._is_db_update_completed = False  # Make sure system tasks don't get scheduled

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Example #9
0
    def test_node_with_new_agent_id(self):
        """Tests successfully calling perform_scheduling() when a node get a new agent ID"""
        # Host 2 gets new agent ID of agent_3
        node_mgr.lost_node(self.agent_2)
        node_mgr.register_agents([self.agent_3])
        node_mgr.sync_with_database(scheduler_mgr.config)

        offer = ResourceOffer(
            'offer', self.agent_3.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks, 2)  # Schedule both queued job executions
        # Check that created tasks have the correct agent ID
        calls = self._client.method_calls
        # One for checking for driver and second for task launch
        self.assertEqual(2, len(calls))
        # Get tasks off 2nd calls (index
        mesos_tasks = calls[1][1][1]
        for mesos_task in mesos_tasks:
            self.assertEqual(self.agent_3.agent_id,
                             mesos_task['agent_id']['value'])
Example #10
0
class SchedulingThread(BaseSchedulerThread):
    """This class manages the scheduling background thread for the scheduler"""
    def __init__(self, client):
        """Constructor

        :param driver: The Mesos scheduler client
        :type driver: :class:`mesoshttp.client.MesosClient`
        """

        super(SchedulingThread, self).__init__('Scheduling', THROTTLE,
                                               WARN_THRESHOLD)
        self._client = client
        self._manager = SchedulingManager()

    @property
    def client(self):
        """Returns the client

        :returns: The client
        :rtype: :class:`mesoshttp.client.MesosClient`
        """

        return self._client

    @client.setter
    def client(self, value):
        """Sets the driver

        :param value: The client
        :type value: :class:`mesoshttp.client.MesosClient`
        """

        self._client = value

    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        logger.debug('Entering %s _execute...', __name__)

        self._manager.perform_scheduling(self._client, now())
Example #11
0
class SchedulingThread(BaseSchedulerThread):
    """This class manages the scheduling background thread for the scheduler"""

    def __init__(self, driver):
        """Constructor

        :param driver: The Mesos scheduler driver
        :type driver: :class:`mesos_api.mesos.SchedulerDriver`
        """

        super(SchedulingThread, self).__init__('Scheduling', THROTTLE, WARN_THRESHOLD)
        self._driver = driver
        self._manager = SchedulingManager()

    @property
    def driver(self):
        """Returns the driver

        :returns: The driver
        :rtype: :class:`mesos_api.mesos.SchedulerDriver`
        """

        return self._driver

    @driver.setter
    def driver(self, value):
        """Sets the driver

        :param value: The driver
        :type value: :class:`mesos_api.mesos.SchedulerDriver`
        """

        self._driver = value

    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        self._manager.perform_scheduling(self._driver, now())
Example #12
0
    def test_no_default_workspace(self, mock_taskinfo):
        """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler"""
        mock_taskinfo.return_value = MagicMock()

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now())
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now())
        resource_mgr.add_new_offers([offer_1, offer_2])
        
        # Add output data to the first queued job:
        # output data + no workspace defined = fail
        queue_1 = Queue.objects.get(id=self.queue_1.id)
        queue_1.get_job_interface().definition['output_data'] = [{'name': 'my_output', 'type': 'file'}]
        config = queue_1.get_execution_configuration()
        queue_1.configuration = config.get_dict()
        queue_1.save()
        # No output data + no workspace = pass
        queue_2 = Queue.objects.get(id=self.queue_2.id)
        config = queue_2.get_execution_configuration()
        queue_2.configuration = config.get_dict()
        queue_2.save()
        
        scheduling_manager = SchedulingManager()
        
        # Set a workspace on the manager
        with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces:
            mock_get_workspaces.return_value = {
                'name': 'my_workspace',
                'title': 'My Workspace',
                'description': 'My workspaces',
                'is_active': True,
                'json_config': {'version': '1.0','broker': {'type': 'host','host_path': '/host/path'}},
            }
            num_tasks = scheduling_manager.perform_scheduling(self._driver, now())
        
        # Only queue_2 should be scheduled
        self.assertEqual(num_tasks, 1)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
Example #13
0
    def test_node_with_new_agent_id(self, mock_taskinfo):
        """Tests successfully calling perform_scheduling() when a node get a new agent ID"""
        mock_taskinfo.return_value = MagicMock()

        # Host 2 gets new agent ID of agent_3
        node_mgr.lost_node(self.agent_2)
        node_mgr.register_agents([self.agent_3])
        node_mgr.sync_with_database(scheduler_mgr.config)

        offer = ResourceOffer('offer', self.agent_3.agent_id, self.framework_id,
                              NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now())
        resource_mgr.add_new_offers([offer])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._driver, now())
        self.assertEqual(num_tasks, 2)  # Schedule both queued job executions
        # Check that created tasks have the correct agent ID
        calls = self._driver.method_calls
        self.assertEqual(1, len(calls))
        mesos_tasks = calls[0][1][1]
        for mesos_task in mesos_tasks:
            self.assertEqual(self.agent_3.agent_id, mesos_task.slave_id.value)