Ejemplo n.º 1
0
    def test_lost_node(self):
        """Tests accepting a running and queued job execution and then the node being lost"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])
        manager.update_nodes([self.node, self.paused_node])
        manager.ready_new_offers()

        job_exe_1 = QueuedJobExecution(self.queue_1)
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.ACCEPTED)

        job_exe_2 = RunningJobExecution(self.running_job_exe_2)
        result = manager.consider_next_task(job_exe_2)
        self.assertEqual(result, OfferManager.ACCEPTED)

        manager.lost_node(self.node_agent)
        node_offers = manager.pop_offers_with_accepted_job_exes()
        self.assertEqual(len(node_offers), 0)
Ejemplo n.º 2
0
    def setUp(self):
        django.setup()

        Scheduler.objects.initialize_scheduler()
        self._driver = MagicMock()
        self._job_exe_manager = RunningJobExecutionManager()
        self._job_type_manager = JobTypeManager()
        self._node_manager = NodeManager()
        self._offer_manager = OfferManager()
        self._scheduler_manager = SchedulerManager()
        self._workspace_manager = WorkspaceManager()

        self._scheduler_manager.sync_with_database()

        self.node_agent_1 = 'agent_1'
        self.node_agent_2 = 'agent_2'
        self.node_1 = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent_1)
        self.node_2 = node_test_utils.create_node(hostname='host_2', slave_id=self.node_agent_2)
        self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1),
                            SlaveInfo('host_2', slave_id=self.node_agent_2)]
        self._node_manager.add_agent_ids([self.node_agent_1, self.node_agent_2])
        with patch('scheduler.sync.node_manager.api.get_slaves') as mock_get_slaves:
            mock_get_slaves.return_value = self.slave_infos
            self._node_manager.sync_with_database('master_host', 5050)

        self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0,
                                                     disk_out_required=200.0, disk_total_required=300.0)
        self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0,
                                                     disk_out_required=45.0, disk_total_required=445.0)
        self._job_type_manager.sync_with_database()

        self._scheduling_thread = SchedulingThread(self._driver, '123', self._job_exe_manager, self._job_type_manager,
                                                   self._node_manager, self._offer_manager, self._scheduler_manager,
                                                   self._workspace_manager)
Ejemplo n.º 3
0
    def __init__(self):
        """Constructor
        """

        self._driver = None
        self._framework_id = None
        self._master_hostname = None
        self._master_port = None

        self._job_exe_manager = RunningJobExecutionManager()
        self._job_type_manager = JobTypeManager()
        self._node_manager = NodeManager()
        self._offer_manager = OfferManager()
        self._scheduler_manager = SchedulerManager()
        self._workspace_manager = WorkspaceManager()

        self._db_sync_thread = None
        self._recon_thread = None
        self._scheduling_thread = None
Ejemplo n.º 4
0
    def test_no_ready_offers(self):
        """Tests considering job executions when no offers are ready"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent_paused,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])

        job_exe_1 = QueuedJobExecution(self.queue_1)
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE)

        job_exe_2 = RunningJobExecution(self.running_job_exe_1)
        result = manager.consider_next_task(job_exe_2)
        self.assertEqual(result, OfferManager.NODE_OFFLINE)
Ejemplo n.º 5
0
    def test_high_disk(self):
        """Tests rejecting a queued job execution due to too much disk required"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent_paused,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])
        manager.update_nodes([self.node, self.paused_node])
        manager.ready_new_offers()

        job_exe_1 = QueuedJobExecution(self.queue_high_disk)
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.NOT_ENOUGH_DISK)

        node_offers = manager.pop_offers_with_accepted_job_exes()
        self.assertEqual(len(node_offers), 0)
Ejemplo n.º 6
0
    def test_all_offers_paused(self):
        """Tests rejecting a queued job execution due to all nodes being paused"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent_paused,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent_paused,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])
        manager.update_nodes([self.paused_node])
        manager.ready_new_offers()

        job_exe_1 = QueuedJobExecution(self.queue_1)
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE)

        node_offers = manager.pop_offers_with_accepted_job_exes()
        self.assertEqual(len(node_offers), 0)
Ejemplo n.º 7
0
    def test_offers_with_no_nodes(self):
        """Tests considering job executions when offers cannot be readied due to no nodes updated"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent_paused,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])
        manager.ready_new_offers()

        job_exe_1 = QueuedJobExecution(self.queue_1)
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE)

        job_exe_2 = RunningJobExecution(self.running_job_exe_1)
        result = manager.consider_next_task(job_exe_2)
        self.assertEqual(result, OfferManager.NODE_NOT_READY)
Ejemplo n.º 8
0
    def __init__(self, executor):
        """Constructor

        :param executor: The executor to use for launching tasks
        :type executor: :class:`mesos_pb2.ExecutorInfo`
        """

        self._driver = None
        self._executor = executor
        self._framework_id = None
        self._master_hostname = None
        self._master_port = None

        self._job_exe_manager = RunningJobExecutionManager()
        self._job_type_manager = JobTypeManager()
        self._node_manager = NodeManager()
        self._offer_manager = OfferManager()
        self._scheduler_manager = SchedulerManager()

        self._db_sync_thread = None
        self._recon_thread = None
        self._scheduling_thread = None
Ejemplo n.º 9
0
    def test_lost_node_that_comes_back(self):
        """Tests that when a lost name comes back, it can schedule tasks again"""

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent,
            NodeResources(cpus=2.0, mem=1024.0, disk=1024.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent,
            NodeResources(cpus=25.0, mem=2048.0, disk=2048.0))

        manager = OfferManager()
        manager.add_new_offers([offer_1, offer_2])
        manager.update_nodes([self.node])
        manager.ready_new_offers()

        # Node goes down and comes back up with new agent ID
        manager.lost_node(self.node_agent)
        new_node_agent = 'i_am_a_new_node_agent'
        self.node.update_from_mesos(agent_id=new_node_agent)

        job_exe_1 = QueuedJobExecution(self.queue_1)

        # Offers for previous agent should be gone, do not schedule the job exe
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE)

        offer_3 = ResourceOffer(
            'offer_3', new_node_agent,
            NodeResources(cpus=35.0, mem=3048.0, disk=3048.0))
        manager.add_new_offers([offer_3])
        manager.update_nodes([self.node])
        manager.ready_new_offers()

        # New offers have come in for new agent ID, should schedule job exe now
        result = manager.consider_new_job_exe(job_exe_1)
        self.assertEqual(result, OfferManager.ACCEPTED)
        node_offers = manager.pop_offers_with_accepted_job_exes()
        self.assertEqual(len(node_offers), 1)