コード例 #1
0
ファイル: test_messaging_service.py プロジェクト: sau29/scale
    def test_get_tasks_to_kill(self):
        """Tests calling get_tasks_to_kill() successfully"""

        # Start with 5 tasks
        scheduler_mgr.config.num_message_handlers = 5
        service = MessagingService()
        tasks = service.get_tasks_to_schedule()
        task_mgr.launch_tasks(tasks, now())

        # Lower number of desired tasks to 3, should get 2 to kill
        scheduler_mgr.config.num_message_handlers = 3
        tasks_to_kill = service.get_tasks_to_kill()
        self.assertEqual(len(tasks_to_kill), 2)

        # Kill the 2 tasks
        for task in tasks_to_kill:
            update = job_test_utils.create_task_status_update(
                task.id, task.agent_id, TaskStatusUpdate.KILLED, now())
            task_mgr.handle_task_update(update)
            service.handle_task_update(update)
        self.assertEqual(service.get_actual_task_count(), 3)

        # Increase desired tasks to 10, should not get any to kill
        scheduler_mgr.config.num_message_handlers = 10
        tasks_to_kill = service.get_tasks_to_kill()
        self.assertEqual(len(tasks_to_kill), 0)
コード例 #2
0
ファイル: test_messaging_service.py プロジェクト: sau29/scale
    def test_get_tasks_to_schedule(self):
        """Tests calling get_tasks_to_schedule() successfully"""

        # Set desired tasks to 5
        scheduler_mgr.config.num_message_handlers = 5
        service = MessagingService()

        # Should get 5 tasks to schedule
        tasks = service.get_tasks_to_schedule()
        self.assertEqual(len(tasks), 5)

        # Launch the 5 tasks
        task_mgr.launch_tasks(tasks, now())
        self.assertEqual(service.get_actual_task_count(), 5)

        # Lower number of desired tasks to 3, should not get any to schedule
        scheduler_mgr.config.num_message_handlers = 3
        tasks = service.get_tasks_to_schedule()
        self.assertEqual(len(tasks), 0)
コード例 #3
0
ファイル: test_messaging_service.py プロジェクト: sau29/scale
    def test_handle_task_update(self):
        """Tests calling handle_task_update() successfully"""

        # Start with 5 tasks
        scheduler_mgr.config.num_message_handlers = 5
        service = MessagingService()
        tasks = service.get_tasks_to_schedule()
        task_mgr.launch_tasks(tasks, now())

        # One task fails
        task = tasks[0]
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FAILED, now())
        task_mgr.handle_task_update(update)
        service.handle_task_update(update)
        self.assertEqual(service.get_actual_task_count(), 4)

        # Should get one new task to schedule
        tasks = service.get_tasks_to_schedule()
        self.assertEqual(len(tasks), 1)
コード例 #4
0
ファイル: manager.py プロジェクト: kaydoh/scale
    def _launch_tasks(self, client, nodes):
        """Launches all of the tasks that have been scheduled on the given nodes

        :param client: The Mesos scheduler client
        :type client: :class:`mesoshttp.client.MesosClient`
        :param nodes: The dict of all scheduling nodes stored by node ID
        :type nodes: dict
        :returns: The number of tasks that were launched and the number of offers accepted
        :rtype: tuple
        """

        started = now()

        # Start and launch tasks in the task manager
        all_tasks = []
        for node in nodes.values():
            node.start_job_exe_tasks()
            all_tasks.extend(node.allocated_tasks)
        task_mgr.launch_tasks(all_tasks, started)

        # Launch tasks in Mesos
        node_count = 0
        total_node_count = 0
        total_offer_count = 0
        total_task_count = 0
        total_offer_resources = NodeResources()
        total_task_resources = NodeResources()
        for node in nodes.values():
            mesos_offers = []
            mesos_tasks = []
            offers = node.allocated_offers
            for offer in offers:
                total_offer_count += 1
                total_offer_resources.add(offer.resources)
                mesos_offers.append(offer.mesos_offer)
            tasks = node.allocated_tasks
            for task in tasks:
                total_task_resources.add(task.get_resources())
                mesos_tasks.append(create_mesos_task(task))
            task_count = len(tasks)
            total_task_count += task_count
            if task_count:
                node_count += 1
            if mesos_offers:
                total_node_count += 1
                try:
                    client.combine_offers(mesos_offers, mesos_tasks)
                except Exception:
                    logger.exception(
                        'Error occurred while launching tasks on node %s',
                        node.hostname)

        duration = now() - started
        msg = 'Launching tasks took %.3f seconds'
        if duration > LAUNCH_TASK_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

        declined_resources = NodeResources()
        declined_resources.add(total_offer_resources)
        declined_resources.subtract(total_task_resources)
        if total_offer_count:
            logger.info(
                'Accepted %d offer(s) from %d node(s), launched %d task(s) with %s on %d node(s), declined %s',
                total_offer_count, total_node_count, total_task_count,
                total_task_resources, node_count, declined_resources)
        return total_task_count, total_offer_count
コード例 #5
0
ファイル: schedule.py プロジェクト: mnjstwins/scale
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()
        tasks = []
        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes()
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Add cleanup tasks
            for task in node_offers.get_accepted_tasks():
                tasks.append(task)
                mesos_tasks.append(create_mesos_task(task))
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = workspace_mgr.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces)
            job_exe_mgr.schedule_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        task_mgr.launch_tasks(tasks, when)
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes)
        return total_num_tasks