Esempio n. 1
0
    def test_job_type_limit(self):
        """Tests calling perform_scheduling() with a job type limit"""
        Queue.objects.all().delete()
        job_type_with_limit = job_test_utils.create_seed_job_type()
        job_type_with_limit.max_scheduled = 4
        job_type_with_limit.save()
        running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id,
                                                                  job_type=job_type_with_limit, node=self.node_1)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        job_type_mgr.sync_with_database()
        # One job of this type is already running
        job_exe_mgr.schedule_job_exes([running_job_exe_1], [])

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 3)  # One is already running, should only be able to schedule 3 more
Esempio n. 2
0
    def _schedule_new_job_exes(self, framework_id, nodes, job_types, job_type_limits, job_type_resources, workspaces):
        """Schedules new job executions from the queue and adds them to the appropriate node

        :param framework_id: The scheduling framework ID
        :type framework_id: string
        :param nodes: The dict of scheduling nodes stored by node ID where every node has fulfilled all waiting tasks
        :type nodes: dict
        :param job_types: The dict of job type models stored by job type ID
        :type job_types: dict
        :param job_type_limits: The dict of job type IDs mapping to job type limits
        :type job_type_limits: dict
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :param workspaces: A dict of all workspaces stored by name
        :type workspaces: dict
        :returns: The number of new job executions that were scheduled
        :rtype: int
        """

        # Can only use nodes that are ready for new job executions
        available_nodes = {}  # {Node ID: SchedulingNode}
        for node in nodes.values():
            if node.is_ready_for_new_job:
                available_nodes[node.node_id] = node

        try:
            scheduled_job_exes = self._process_queue(available_nodes, job_types, job_type_limits, job_type_resources,
                                                     workspaces)
            running_job_exes = self._process_scheduled_job_executions(framework_id, scheduled_job_exes, job_types,
                                                                      workspaces)
            all_running_job_exes = []
            for node_id in running_job_exes:
                all_running_job_exes.extend(running_job_exes[node_id])
            job_exe_mgr.schedule_job_exes(all_running_job_exes, create_running_job_messages(all_running_job_exes))
            node_ids = set()
            job_exe_count = 0
            scheduled_resources = NodeResources()
            for node_id in running_job_exes:
                if node_id in nodes:
                    nodes[node_id].add_scheduled_job_exes(running_job_exes[node_id])
                    for running_job_exe in running_job_exes[node_id]:
                        first_task = running_job_exe.next_task()
                        if first_task:
                            node_ids.add(node_id)
                            scheduled_resources.add(first_task.get_resources())
                            job_exe_count += 1
                else:
                    logger.error('Scheduled jobs on an unknown node')
            if job_exe_count:
                logger.info('Scheduled %d new job(s) with %s on %d node(s)', job_exe_count, scheduled_resources,
                            len(node_ids))
        except DatabaseError:
            logger.exception('Error occurred while scheduling new jobs from the queue')
            job_exe_count = 0
            for node in available_nodes.values():
                node.reset_new_job_exes()

        return job_exe_count
Esempio n. 3
0
    def test_job_type_limit(self, mock_taskinfo):
        """Tests running the scheduling thread with a job type limit"""
        mock_taskinfo.return_value = MagicMock()

        Queue.objects.all().delete()
        job_type_with_limit = job_test_utils.create_job_type()
        job_type_with_limit.max_scheduled = 4
        job_type_with_limit.save()
        job_exe_1 = job_test_utils.create_job_exe(job_type=job_type_with_limit,
                                                  status='RUNNING')
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        job_type_mgr.sync_with_database()
        # One job of this type is already running
        job_exe_mgr.schedule_job_exes([RunningJobExecution(job_exe_1)])

        offer_1 = ResourceOffer(
            'offer_1', self.node_agent_1,
            NodeResources(cpus=200.0, mem=102400.0, disk=102400.0))
        offer_2 = ResourceOffer(
            'offer_2', self.node_agent_2,
            NodeResources(cpus=200.0, mem=204800.0, disk=204800.0))
        offer_mgr.add_new_offers([offer_1, offer_2])

        # Ignore Docker pull tasks
        for node in node_mgr.get_nodes():
            node._is_image_pulled = True

        # Ignore cleanup tasks
        for node in node_mgr.get_nodes():
            node._initial_cleanup_completed()
            node._update_state()

        num_tasks = self._scheduling_thread._perform_scheduling()
        self.assertEqual(
            num_tasks, 3
        )  # One is already running, should only be able to schedule 3 more
Esempio n. 4
0
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()
        tasks = []
        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes()
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Add cleanup tasks
            for task in node_offers.get_accepted_tasks():
                tasks.append(task)
                mesos_tasks.append(create_mesos_task(task))
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = workspace_mgr.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces)
            job_exe_mgr.schedule_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        task_mgr.launch_tasks(tasks, when)
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes)
        return total_num_tasks