Ejemplo n.º 1
0
    def setUp(self):
        django.setup()

        Scheduler.objects.initialize_scheduler()
        self._driver = MagicMock()

        scheduler_mgr.sync_with_database()
        offer_mgr.clear()

        self.node_agent_1 = 'agent_1'
        self.node_agent_2 = 'agent_2'
        self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1),
                            SlaveInfo('host_2', slave_id=self.node_agent_2)]
        node_mgr.clear()
        node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2])
        with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves:
            mock_get_slaves.return_value = self.slave_infos
            node_mgr.sync_with_database('master_host', 5050)
        # Ignore initial cleanup tasks
        for node in node_mgr.get_nodes():
            node.initial_cleanup_completed()

        self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0,
                                                     disk_out_required=200.0, disk_total_required=300.0)
        self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0,
                                                     disk_out_required=45.0, disk_total_required=445.0)
        job_type_mgr.sync_with_database()

        self._scheduling_thread = SchedulingThread(self._driver, '123')
Ejemplo n.º 2
0
    def _perform_sync(self):
        """Performs the sync with the database
        """

        scheduler_mgr.sync_with_database()
        job_type_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        mesos_master = scheduler_mgr.mesos_address
        node_mgr.sync_with_database(mesos_master.hostname, mesos_master.port)

        self._sync_running_job_executions()
Ejemplo n.º 3
0
    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        scheduler_mgr.sync_with_database()
        job_type_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        mesos_master = scheduler_mgr.mesos_address
        resource_mgr.sync_with_mesos(mesos_master.hostname, mesos_master.port)

        # Kill running tasks for canceled job executions
        for task_to_kill in job_exe_mgr.sync_with_database():
            pb_task_to_kill = mesos_pb2.TaskID()
            pb_task_to_kill.value = task_to_kill.id
            logger.info('Killing task %s', task_to_kill.id)
            self._driver.killTask(pb_task_to_kill)

        if settings.SECRETS_URL:
            secrets_mgr.sync_with_backend()
Ejemplo n.º 4
0
    def _execute(self):
        """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute`
        """

        scheduler_mgr.sync_with_database()
        job_type_mgr.sync_with_database()
        job_exe_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        mesos_master = scheduler_mgr.mesos_address
        if mesos_master:
            resource_mgr.sync_with_mesos(mesos_master.hostname,
                                         mesos_master.port)

        # Handle canceled job executions
        for finished_job_exe in job_exe_mgr.sync_with_database():
            cleanup_mgr.add_job_execution(finished_job_exe)

        if settings.SECRETS_URL:
            secrets_mgr.sync_with_backend()
Ejemplo n.º 5
0
    def setUp(self):
        django.setup()

        Scheduler.objects.initialize_scheduler()
        self._driver = MagicMock()

        scheduler_mgr.sync_with_database()
        offer_mgr.clear()

        self.node_agent_1 = 'agent_1'
        self.node_agent_2 = 'agent_2'
        self.slave_infos = [
            SlaveInfo('host_1', slave_id=self.node_agent_1),
            SlaveInfo('host_2', slave_id=self.node_agent_2)
        ]
        node_mgr.clear()
        node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2])
        with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves:
            mock_get_slaves.return_value = self.slave_infos
            node_mgr.sync_with_database('master_host', 5050)
        # Ignore initial cleanup tasks and health check tasks
        for node in node_mgr.get_nodes():
            node._last_heath_task = now()
            node._initial_cleanup_completed()
            node._update_state()

        self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0,
                                                     mem_required=1024.0,
                                                     disk_in_required=100.0,
                                                     disk_out_required=200.0,
                                                     disk_total_required=300.0)
        self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0,
                                                     mem_required=512.0,
                                                     disk_in_required=400.0,
                                                     disk_out_required=45.0,
                                                     disk_total_required=445.0)
        job_type_mgr.sync_with_database()

        self._scheduling_thread = SchedulingThread(self._driver, '123')
Ejemplo n.º 6
0
    def test_node_with_new_agent_id(self):
        """Tests successfully calling perform_scheduling() when a node get a new agent ID"""
        # Host 2 gets new agent ID of agent_3
        node_mgr.lost_node(self.agent_2)
        node_mgr.register_agents([self.agent_3])
        node_mgr.sync_with_database(scheduler_mgr.config)

        offer = ResourceOffer('offer', self.agent_3.agent_id, self.framework_id,
                              NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks, 2)  # Schedule both queued job executions
        # Check that created tasks have the correct agent ID
        calls = self._client.method_calls
        # One for checking for driver and second for task launch
        self.assertEqual(2, len(calls))
        # Get tasks off 2nd calls (index
        mesos_tasks = calls[1][1][1]
        for mesos_task in mesos_tasks:
            self.assertEqual(self.agent_3.agent_id, mesos_task['agent_id']['value'])
Ejemplo n.º 7
0
    def test_generate_nodes_status(self):
        """Tests the _generate_nodes_status method"""

        # Setup nodes
        from scheduler.node.manager import node_mgr
        node_mgr.clear()

        nodes = dependency_mgr._generate_nodes_status()
        self.assertDictEqual(
            nodes, {
                'OK': False,
                'detail': {
                    'msg': 'No nodes reported'
                },
                'errors': [{
                    'NODES_OFFLINE': 'No nodes reported.'
                }],
                'warnings': []
            })

        node_mgr.register_agents([
            self.agent_1, self.agent_2, self.agent_3, self.agent_4,
            self.agent_5, self.agent_6, self.agent_7, self.agent_8,
            self.agent_9, self.agent_10
        ])
        node_mgr.sync_with_database(scheduler_mgr.config)

        nodes = node_mgr.get_nodes()
        self.assertEqual(len(nodes), 10)

        nodes = dependency_mgr._generate_nodes_status()
        self.assertDictEqual(
            nodes, {
                'OK': True,
                'detail': {
                    'msg': 'Enough nodes are online to function.'
                },
                'errors': [],
                'warnings': []
            })

        node_mgr.lost_node(self.agent_1.agent_id)
        node_mgr.lost_node(self.agent_2.agent_id)
        node_mgr.lost_node(self.agent_3.agent_id)
        node_mgr.lost_node(self.agent_4.agent_id)
        nodes = dependency_mgr._generate_nodes_status()
        self.assertDictEqual(
            nodes, {
                'OK':
                False,
                'detail': {
                    u'msg': u'Over a third of nodes are in an error state'
                },
                'errors': [{
                    'NODES_ERRORED':
                    'Over a third of the nodes are offline or degraded.'
                }],
                'warnings': [{
                    u'NODES_OFFLINE': u'4 nodes are offline'
                }]
            })