コード例 #1
0
ファイル: schedule.py プロジェクト: ngageoint/scale
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()
        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes()
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Add cleanup tasks
            for task in node_offers.get_accepted_tasks():
                task.launch(when)
                mesos_tasks.append(create_mesos_task(task))
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    task.launch(when)
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = workspace_mgr.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces)
            running_job_mgr.add_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    task.launch(when)
                    tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes)
        return total_num_tasks
コード例 #2
0
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = self._offer_manager.pop_offers_with_accepted_job_exes(
        )
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = self._workspace_manager.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(
                queued_job_exes_to_schedule, workspaces)
            self._job_exe_manager.add_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    tasks_to_launch[scheduled_job_exe.node_id].append(
                        create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)',
                        total_num_tasks, total_num_nodes)
        return total_num_tasks
コード例 #3
0
ファイル: manager.py プロジェクト: kaydoh/scale
    def _launch_tasks(self, client, nodes):
        """Launches all of the tasks that have been scheduled on the given nodes

        :param client: The Mesos scheduler client
        :type client: :class:`mesoshttp.client.MesosClient`
        :param nodes: The dict of all scheduling nodes stored by node ID
        :type nodes: dict
        :returns: The number of tasks that were launched and the number of offers accepted
        :rtype: tuple
        """

        started = now()

        # Start and launch tasks in the task manager
        all_tasks = []
        for node in nodes.values():
            node.start_job_exe_tasks()
            all_tasks.extend(node.allocated_tasks)
        task_mgr.launch_tasks(all_tasks, started)

        # Launch tasks in Mesos
        node_count = 0
        total_node_count = 0
        total_offer_count = 0
        total_task_count = 0
        total_offer_resources = NodeResources()
        total_task_resources = NodeResources()
        for node in nodes.values():
            mesos_offers = []
            mesos_tasks = []
            offers = node.allocated_offers
            for offer in offers:
                total_offer_count += 1
                total_offer_resources.add(offer.resources)
                mesos_offers.append(offer.mesos_offer)
            tasks = node.allocated_tasks
            for task in tasks:
                total_task_resources.add(task.get_resources())
                mesos_tasks.append(create_mesos_task(task))
            task_count = len(tasks)
            total_task_count += task_count
            if task_count:
                node_count += 1
            if mesos_offers:
                total_node_count += 1
                try:
                    client.combine_offers(mesos_offers, mesos_tasks)
                except Exception:
                    logger.exception(
                        'Error occurred while launching tasks on node %s',
                        node.hostname)

        duration = now() - started
        msg = 'Launching tasks took %.3f seconds'
        if duration > LAUNCH_TASK_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

        declined_resources = NodeResources()
        declined_resources.add(total_offer_resources)
        declined_resources.subtract(total_task_resources)
        if total_offer_count:
            logger.info(
                'Accepted %d offer(s) from %d node(s), launched %d task(s) with %s on %d node(s), declined %s',
                total_offer_count, total_node_count, total_task_count,
                total_task_resources, node_count, declined_resources)
        return total_task_count, total_offer_count